btrfs-progs: check: repair dir inode isize in lowmem mode
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133
134 static inline struct data_backref* to_data_backref(struct extent_backref *back)
135 {
136         return container_of(back, struct data_backref, node);
137 }
138
139 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
140 {
141         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
142         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
143         struct data_backref *back1 = to_data_backref(ext1);
144         struct data_backref *back2 = to_data_backref(ext2);
145
146         WARN_ON(!ext1->is_data);
147         WARN_ON(!ext2->is_data);
148
149         /* parent and root are a union, so this covers both */
150         if (back1->parent > back2->parent)
151                 return 1;
152         if (back1->parent < back2->parent)
153                 return -1;
154
155         /* This is a full backref and the parents match. */
156         if (back1->node.full_backref)
157                 return 0;
158
159         if (back1->owner > back2->owner)
160                 return 1;
161         if (back1->owner < back2->owner)
162                 return -1;
163
164         if (back1->offset > back2->offset)
165                 return 1;
166         if (back1->offset < back2->offset)
167                 return -1;
168
169         if (back1->found_ref && back2->found_ref) {
170                 if (back1->disk_bytenr > back2->disk_bytenr)
171                         return 1;
172                 if (back1->disk_bytenr < back2->disk_bytenr)
173                         return -1;
174
175                 if (back1->bytes > back2->bytes)
176                         return 1;
177                 if (back1->bytes < back2->bytes)
178                         return -1;
179         }
180
181         return 0;
182 }
183
184 /*
185  * Much like data_backref, just removed the undetermined members
186  * and change it to use list_head.
187  * During extent scan, it is stored in root->orphan_data_extent.
188  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
189  */
190 struct orphan_data_extent {
191         struct list_head list;
192         u64 root;
193         u64 objectid;
194         u64 offset;
195         u64 disk_bytenr;
196         u64 disk_len;
197 };
198
199 struct tree_backref {
200         struct extent_backref node;
201         union {
202                 u64 parent;
203                 u64 root;
204         };
205 };
206
207 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
208 {
209         return container_of(back, struct tree_backref, node);
210 }
211
212 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
213 {
214         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
215         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
216         struct tree_backref *back1 = to_tree_backref(ext1);
217         struct tree_backref *back2 = to_tree_backref(ext2);
218
219         WARN_ON(ext1->is_data);
220         WARN_ON(ext2->is_data);
221
222         /* parent and root are a union, so this covers both */
223         if (back1->parent > back2->parent)
224                 return 1;
225         if (back1->parent < back2->parent)
226                 return -1;
227
228         return 0;
229 }
230
231 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
232 {
233         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
234         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
235
236         if (ext1->is_data > ext2->is_data)
237                 return 1;
238
239         if (ext1->is_data < ext2->is_data)
240                 return -1;
241
242         if (ext1->full_backref > ext2->full_backref)
243                 return 1;
244         if (ext1->full_backref < ext2->full_backref)
245                 return -1;
246
247         if (ext1->is_data)
248                 return compare_data_backref(node1, node2);
249         else
250                 return compare_tree_backref(node1, node2);
251 }
252
253 /* Explicit initialization for extent_record::flag_block_full_backref */
254 enum { FLAG_UNSET = 2 };
255
256 struct extent_record {
257         struct list_head backrefs;
258         struct list_head dups;
259         struct rb_root backref_tree;
260         struct list_head list;
261         struct cache_extent cache;
262         struct btrfs_disk_key parent_key;
263         u64 start;
264         u64 max_size;
265         u64 nr;
266         u64 refs;
267         u64 extent_item_refs;
268         u64 generation;
269         u64 parent_generation;
270         u64 info_objectid;
271         u32 num_duplicates;
272         u8 info_level;
273         unsigned int flag_block_full_backref:2;
274         unsigned int found_rec:1;
275         unsigned int content_checked:1;
276         unsigned int owner_ref_checked:1;
277         unsigned int is_root:1;
278         unsigned int metadata:1;
279         unsigned int bad_full_backref:1;
280         unsigned int crossing_stripes:1;
281         unsigned int wrong_chunk_type:1;
282 };
283
284 static inline struct extent_record* to_extent_record(struct list_head *entry)
285 {
286         return container_of(entry, struct extent_record, list);
287 }
288
289 struct inode_backref {
290         struct list_head list;
291         unsigned int found_dir_item:1;
292         unsigned int found_dir_index:1;
293         unsigned int found_inode_ref:1;
294         u8 filetype;
295         u8 ref_type;
296         int errors;
297         u64 dir;
298         u64 index;
299         u16 namelen;
300         char name[0];
301 };
302
303 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
304 {
305         return list_entry(entry, struct inode_backref, list);
306 }
307
308 struct root_item_record {
309         struct list_head list;
310         u64 objectid;
311         u64 bytenr;
312         u64 last_snapshot;
313         u8 level;
314         u8 drop_level;
315         struct btrfs_key drop_key;
316 };
317
318 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
319 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
320 #define REF_ERR_NO_INODE_REF            (1 << 2)
321 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
322 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
323 #define REF_ERR_DUP_INODE_REF           (1 << 5)
324 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
325 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
326 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
327 #define REF_ERR_NO_ROOT_REF             (1 << 9)
328 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
329 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
330 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
331
332 struct file_extent_hole {
333         struct rb_node node;
334         u64 start;
335         u64 len;
336 };
337
338 struct inode_record {
339         struct list_head backrefs;
340         unsigned int checked:1;
341         unsigned int merging:1;
342         unsigned int found_inode_item:1;
343         unsigned int found_dir_item:1;
344         unsigned int found_file_extent:1;
345         unsigned int found_csum_item:1;
346         unsigned int some_csum_missing:1;
347         unsigned int nodatasum:1;
348         int errors;
349
350         u64 ino;
351         u32 nlink;
352         u32 imode;
353         u64 isize;
354         u64 nbytes;
355
356         u32 found_link;
357         u64 found_size;
358         u64 extent_start;
359         u64 extent_end;
360         struct rb_root holes;
361         struct list_head orphan_extents;
362
363         u32 refs;
364 };
365
366 #define I_ERR_NO_INODE_ITEM             (1 << 0)
367 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
368 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
369 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
370 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
371 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
372 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
373 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
374 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
375 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
376 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
377 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
378 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
379 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
380 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
381
382 struct root_backref {
383         struct list_head list;
384         unsigned int found_dir_item:1;
385         unsigned int found_dir_index:1;
386         unsigned int found_back_ref:1;
387         unsigned int found_forward_ref:1;
388         unsigned int reachable:1;
389         int errors;
390         u64 ref_root;
391         u64 dir;
392         u64 index;
393         u16 namelen;
394         char name[0];
395 };
396
397 static inline struct root_backref* to_root_backref(struct list_head *entry)
398 {
399         return list_entry(entry, struct root_backref, list);
400 }
401
402 struct root_record {
403         struct list_head backrefs;
404         struct cache_extent cache;
405         unsigned int found_root_item:1;
406         u64 objectid;
407         u32 found_ref;
408 };
409
410 struct ptr_node {
411         struct cache_extent cache;
412         void *data;
413 };
414
415 struct shared_node {
416         struct cache_extent cache;
417         struct cache_tree root_cache;
418         struct cache_tree inode_cache;
419         struct inode_record *current;
420         u32 refs;
421 };
422
423 struct block_info {
424         u64 start;
425         u32 size;
426 };
427
428 struct walk_control {
429         struct cache_tree shared;
430         struct shared_node *nodes[BTRFS_MAX_LEVEL];
431         int active_node;
432         int root_level;
433 };
434
435 struct bad_item {
436         struct btrfs_key key;
437         u64 root_id;
438         struct list_head list;
439 };
440
441 struct extent_entry {
442         u64 bytenr;
443         u64 bytes;
444         int count;
445         int broken;
446         struct list_head list;
447 };
448
449 struct root_item_info {
450         /* level of the root */
451         u8 level;
452         /* number of nodes at this level, must be 1 for a root */
453         int node_count;
454         u64 bytenr;
455         u64 gen;
456         struct cache_extent cache_extent;
457 };
458
459 /*
460  * Error bit for low memory mode check.
461  *
462  * Currently no caller cares about it yet.  Just internal use for error
463  * classification.
464  */
465 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
466 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
467 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
468 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
469 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
470 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
471 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
472 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
473 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
474 #define CHUNK_TYPE_MISMATCH     (1 << 8)
475
476 static void *print_status_check(void *p)
477 {
478         struct task_ctx *priv = p;
479         const char work_indicator[] = { '.', 'o', 'O', 'o' };
480         uint32_t count = 0;
481         static char *task_position_string[] = {
482                 "checking extents",
483                 "checking free space cache",
484                 "checking fs roots",
485         };
486
487         task_period_start(priv->info, 1000 /* 1s */);
488
489         if (priv->tp == TASK_NOTHING)
490                 return NULL;
491
492         while (1) {
493                 printf("%s [%c]\r", task_position_string[priv->tp],
494                                 work_indicator[count % 4]);
495                 count++;
496                 fflush(stdout);
497                 task_period_wait(priv->info);
498         }
499         return NULL;
500 }
501
502 static int print_status_return(void *p)
503 {
504         printf("\n");
505         fflush(stdout);
506
507         return 0;
508 }
509
510 static enum btrfs_check_mode parse_check_mode(const char *str)
511 {
512         if (strcmp(str, "lowmem") == 0)
513                 return CHECK_MODE_LOWMEM;
514         if (strcmp(str, "orig") == 0)
515                 return CHECK_MODE_ORIGINAL;
516         if (strcmp(str, "original") == 0)
517                 return CHECK_MODE_ORIGINAL;
518
519         return CHECK_MODE_UNKNOWN;
520 }
521
522 /* Compatible function to allow reuse of old codes */
523 static u64 first_extent_gap(struct rb_root *holes)
524 {
525         struct file_extent_hole *hole;
526
527         if (RB_EMPTY_ROOT(holes))
528                 return (u64)-1;
529
530         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
531         return hole->start;
532 }
533
534 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
535 {
536         struct file_extent_hole *hole1;
537         struct file_extent_hole *hole2;
538
539         hole1 = rb_entry(node1, struct file_extent_hole, node);
540         hole2 = rb_entry(node2, struct file_extent_hole, node);
541
542         if (hole1->start > hole2->start)
543                 return -1;
544         if (hole1->start < hole2->start)
545                 return 1;
546         /* Now hole1->start == hole2->start */
547         if (hole1->len >= hole2->len)
548                 /*
549                  * Hole 1 will be merge center
550                  * Same hole will be merged later
551                  */
552                 return -1;
553         /* Hole 2 will be merge center */
554         return 1;
555 }
556
557 /*
558  * Add a hole to the record
559  *
560  * This will do hole merge for copy_file_extent_holes(),
561  * which will ensure there won't be continuous holes.
562  */
563 static int add_file_extent_hole(struct rb_root *holes,
564                                 u64 start, u64 len)
565 {
566         struct file_extent_hole *hole;
567         struct file_extent_hole *prev = NULL;
568         struct file_extent_hole *next = NULL;
569
570         hole = malloc(sizeof(*hole));
571         if (!hole)
572                 return -ENOMEM;
573         hole->start = start;
574         hole->len = len;
575         /* Since compare will not return 0, no -EEXIST will happen */
576         rb_insert(holes, &hole->node, compare_hole);
577
578         /* simple merge with previous hole */
579         if (rb_prev(&hole->node))
580                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
581                                 node);
582         if (prev && prev->start + prev->len >= hole->start) {
583                 hole->len = hole->start + hole->len - prev->start;
584                 hole->start = prev->start;
585                 rb_erase(&prev->node, holes);
586                 free(prev);
587                 prev = NULL;
588         }
589
590         /* iterate merge with next holes */
591         while (1) {
592                 if (!rb_next(&hole->node))
593                         break;
594                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
595                                         node);
596                 if (hole->start + hole->len >= next->start) {
597                         if (hole->start + hole->len <= next->start + next->len)
598                                 hole->len = next->start + next->len -
599                                             hole->start;
600                         rb_erase(&next->node, holes);
601                         free(next);
602                         next = NULL;
603                 } else
604                         break;
605         }
606         return 0;
607 }
608
609 static int compare_hole_range(struct rb_node *node, void *data)
610 {
611         struct file_extent_hole *hole;
612         u64 start;
613
614         hole = (struct file_extent_hole *)data;
615         start = hole->start;
616
617         hole = rb_entry(node, struct file_extent_hole, node);
618         if (start < hole->start)
619                 return -1;
620         if (start >= hole->start && start < hole->start + hole->len)
621                 return 0;
622         return 1;
623 }
624
625 /*
626  * Delete a hole in the record
627  *
628  * This will do the hole split and is much restrict than add.
629  */
630 static int del_file_extent_hole(struct rb_root *holes,
631                                 u64 start, u64 len)
632 {
633         struct file_extent_hole *hole;
634         struct file_extent_hole tmp;
635         u64 prev_start = 0;
636         u64 prev_len = 0;
637         u64 next_start = 0;
638         u64 next_len = 0;
639         struct rb_node *node;
640         int have_prev = 0;
641         int have_next = 0;
642         int ret = 0;
643
644         tmp.start = start;
645         tmp.len = len;
646         node = rb_search(holes, &tmp, compare_hole_range, NULL);
647         if (!node)
648                 return -EEXIST;
649         hole = rb_entry(node, struct file_extent_hole, node);
650         if (start + len > hole->start + hole->len)
651                 return -EEXIST;
652
653         /*
654          * Now there will be no overlap, delete the hole and re-add the
655          * split(s) if they exists.
656          */
657         if (start > hole->start) {
658                 prev_start = hole->start;
659                 prev_len = start - hole->start;
660                 have_prev = 1;
661         }
662         if (hole->start + hole->len > start + len) {
663                 next_start = start + len;
664                 next_len = hole->start + hole->len - start - len;
665                 have_next = 1;
666         }
667         rb_erase(node, holes);
668         free(hole);
669         if (have_prev) {
670                 ret = add_file_extent_hole(holes, prev_start, prev_len);
671                 if (ret < 0)
672                         return ret;
673         }
674         if (have_next) {
675                 ret = add_file_extent_hole(holes, next_start, next_len);
676                 if (ret < 0)
677                         return ret;
678         }
679         return 0;
680 }
681
682 static int copy_file_extent_holes(struct rb_root *dst,
683                                   struct rb_root *src)
684 {
685         struct file_extent_hole *hole;
686         struct rb_node *node;
687         int ret = 0;
688
689         node = rb_first(src);
690         while (node) {
691                 hole = rb_entry(node, struct file_extent_hole, node);
692                 ret = add_file_extent_hole(dst, hole->start, hole->len);
693                 if (ret)
694                         break;
695                 node = rb_next(node);
696         }
697         return ret;
698 }
699
700 static void free_file_extent_holes(struct rb_root *holes)
701 {
702         struct rb_node *node;
703         struct file_extent_hole *hole;
704
705         node = rb_first(holes);
706         while (node) {
707                 hole = rb_entry(node, struct file_extent_hole, node);
708                 rb_erase(node, holes);
709                 free(hole);
710                 node = rb_first(holes);
711         }
712 }
713
714 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
715
716 static void record_root_in_trans(struct btrfs_trans_handle *trans,
717                                  struct btrfs_root *root)
718 {
719         if (root->last_trans != trans->transid) {
720                 root->track_dirty = 1;
721                 root->last_trans = trans->transid;
722                 root->commit_root = root->node;
723                 extent_buffer_get(root->node);
724         }
725 }
726
727 static u8 imode_to_type(u32 imode)
728 {
729 #define S_SHIFT 12
730         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
731                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
732                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
733                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
734                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
735                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
736                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
737                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
738         };
739
740         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
741 #undef S_SHIFT
742 }
743
744 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
745 {
746         struct device_record *rec1;
747         struct device_record *rec2;
748
749         rec1 = rb_entry(node1, struct device_record, node);
750         rec2 = rb_entry(node2, struct device_record, node);
751         if (rec1->devid > rec2->devid)
752                 return -1;
753         else if (rec1->devid < rec2->devid)
754                 return 1;
755         else
756                 return 0;
757 }
758
759 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
760 {
761         struct inode_record *rec;
762         struct inode_backref *backref;
763         struct inode_backref *orig;
764         struct inode_backref *tmp;
765         struct orphan_data_extent *src_orphan;
766         struct orphan_data_extent *dst_orphan;
767         struct rb_node *rb;
768         size_t size;
769         int ret;
770
771         rec = malloc(sizeof(*rec));
772         if (!rec)
773                 return ERR_PTR(-ENOMEM);
774         memcpy(rec, orig_rec, sizeof(*rec));
775         rec->refs = 1;
776         INIT_LIST_HEAD(&rec->backrefs);
777         INIT_LIST_HEAD(&rec->orphan_extents);
778         rec->holes = RB_ROOT;
779
780         list_for_each_entry(orig, &orig_rec->backrefs, list) {
781                 size = sizeof(*orig) + orig->namelen + 1;
782                 backref = malloc(size);
783                 if (!backref) {
784                         ret = -ENOMEM;
785                         goto cleanup;
786                 }
787                 memcpy(backref, orig, size);
788                 list_add_tail(&backref->list, &rec->backrefs);
789         }
790         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
791                 dst_orphan = malloc(sizeof(*dst_orphan));
792                 if (!dst_orphan) {
793                         ret = -ENOMEM;
794                         goto cleanup;
795                 }
796                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
797                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
798         }
799         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
800         if (ret < 0)
801                 goto cleanup_rb;
802
803         return rec;
804
805 cleanup_rb:
806         rb = rb_first(&rec->holes);
807         while (rb) {
808                 struct file_extent_hole *hole;
809
810                 hole = rb_entry(rb, struct file_extent_hole, node);
811                 rb = rb_next(rb);
812                 free(hole);
813         }
814
815 cleanup:
816         if (!list_empty(&rec->backrefs))
817                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
818                         list_del(&orig->list);
819                         free(orig);
820                 }
821
822         if (!list_empty(&rec->orphan_extents))
823                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
824                         list_del(&orig->list);
825                         free(orig);
826                 }
827
828         free(rec);
829
830         return ERR_PTR(ret);
831 }
832
833 static void print_orphan_data_extents(struct list_head *orphan_extents,
834                                       u64 objectid)
835 {
836         struct orphan_data_extent *orphan;
837
838         if (list_empty(orphan_extents))
839                 return;
840         printf("The following data extent is lost in tree %llu:\n",
841                objectid);
842         list_for_each_entry(orphan, orphan_extents, list) {
843                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
844                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
845                        orphan->disk_len);
846         }
847 }
848
849 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
850 {
851         u64 root_objectid = root->root_key.objectid;
852         int errors = rec->errors;
853
854         if (!errors)
855                 return;
856         /* reloc root errors, we print its corresponding fs root objectid*/
857         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
858                 root_objectid = root->root_key.offset;
859                 fprintf(stderr, "reloc");
860         }
861         fprintf(stderr, "root %llu inode %llu errors %x",
862                 (unsigned long long) root_objectid,
863                 (unsigned long long) rec->ino, rec->errors);
864
865         if (errors & I_ERR_NO_INODE_ITEM)
866                 fprintf(stderr, ", no inode item");
867         if (errors & I_ERR_NO_ORPHAN_ITEM)
868                 fprintf(stderr, ", no orphan item");
869         if (errors & I_ERR_DUP_INODE_ITEM)
870                 fprintf(stderr, ", dup inode item");
871         if (errors & I_ERR_DUP_DIR_INDEX)
872                 fprintf(stderr, ", dup dir index");
873         if (errors & I_ERR_ODD_DIR_ITEM)
874                 fprintf(stderr, ", odd dir item");
875         if (errors & I_ERR_ODD_FILE_EXTENT)
876                 fprintf(stderr, ", odd file extent");
877         if (errors & I_ERR_BAD_FILE_EXTENT)
878                 fprintf(stderr, ", bad file extent");
879         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
880                 fprintf(stderr, ", file extent overlap");
881         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
882                 fprintf(stderr, ", file extent discount");
883         if (errors & I_ERR_DIR_ISIZE_WRONG)
884                 fprintf(stderr, ", dir isize wrong");
885         if (errors & I_ERR_FILE_NBYTES_WRONG)
886                 fprintf(stderr, ", nbytes wrong");
887         if (errors & I_ERR_ODD_CSUM_ITEM)
888                 fprintf(stderr, ", odd csum item");
889         if (errors & I_ERR_SOME_CSUM_MISSING)
890                 fprintf(stderr, ", some csum missing");
891         if (errors & I_ERR_LINK_COUNT_WRONG)
892                 fprintf(stderr, ", link count wrong");
893         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
894                 fprintf(stderr, ", orphan file extent");
895         fprintf(stderr, "\n");
896         /* Print the orphan extents if needed */
897         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
899
900         /* Print the holes if needed */
901         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
902                 struct file_extent_hole *hole;
903                 struct rb_node *node;
904                 int found = 0;
905
906                 node = rb_first(&rec->holes);
907                 fprintf(stderr, "Found file extent holes:\n");
908                 while (node) {
909                         found = 1;
910                         hole = rb_entry(node, struct file_extent_hole, node);
911                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
912                                 hole->start, hole->len);
913                         node = rb_next(node);
914                 }
915                 if (!found)
916                         fprintf(stderr, "\tstart: 0, len: %llu\n",
917                                 round_up(rec->isize,
918                                          root->fs_info->sectorsize));
919         }
920 }
921
922 static void print_ref_error(int errors)
923 {
924         if (errors & REF_ERR_NO_DIR_ITEM)
925                 fprintf(stderr, ", no dir item");
926         if (errors & REF_ERR_NO_DIR_INDEX)
927                 fprintf(stderr, ", no dir index");
928         if (errors & REF_ERR_NO_INODE_REF)
929                 fprintf(stderr, ", no inode ref");
930         if (errors & REF_ERR_DUP_DIR_ITEM)
931                 fprintf(stderr, ", dup dir item");
932         if (errors & REF_ERR_DUP_DIR_INDEX)
933                 fprintf(stderr, ", dup dir index");
934         if (errors & REF_ERR_DUP_INODE_REF)
935                 fprintf(stderr, ", dup inode ref");
936         if (errors & REF_ERR_INDEX_UNMATCH)
937                 fprintf(stderr, ", index mismatch");
938         if (errors & REF_ERR_FILETYPE_UNMATCH)
939                 fprintf(stderr, ", filetype mismatch");
940         if (errors & REF_ERR_NAME_TOO_LONG)
941                 fprintf(stderr, ", name too long");
942         if (errors & REF_ERR_NO_ROOT_REF)
943                 fprintf(stderr, ", no root ref");
944         if (errors & REF_ERR_NO_ROOT_BACKREF)
945                 fprintf(stderr, ", no root backref");
946         if (errors & REF_ERR_DUP_ROOT_REF)
947                 fprintf(stderr, ", dup root ref");
948         if (errors & REF_ERR_DUP_ROOT_BACKREF)
949                 fprintf(stderr, ", dup root backref");
950         fprintf(stderr, "\n");
951 }
952
953 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
954                                           u64 ino, int mod)
955 {
956         struct ptr_node *node;
957         struct cache_extent *cache;
958         struct inode_record *rec = NULL;
959         int ret;
960
961         cache = lookup_cache_extent(inode_cache, ino, 1);
962         if (cache) {
963                 node = container_of(cache, struct ptr_node, cache);
964                 rec = node->data;
965                 if (mod && rec->refs > 1) {
966                         node->data = clone_inode_rec(rec);
967                         if (IS_ERR(node->data))
968                                 return node->data;
969                         rec->refs--;
970                         rec = node->data;
971                 }
972         } else if (mod) {
973                 rec = calloc(1, sizeof(*rec));
974                 if (!rec)
975                         return ERR_PTR(-ENOMEM);
976                 rec->ino = ino;
977                 rec->extent_start = (u64)-1;
978                 rec->refs = 1;
979                 INIT_LIST_HEAD(&rec->backrefs);
980                 INIT_LIST_HEAD(&rec->orphan_extents);
981                 rec->holes = RB_ROOT;
982
983                 node = malloc(sizeof(*node));
984                 if (!node) {
985                         free(rec);
986                         return ERR_PTR(-ENOMEM);
987                 }
988                 node->cache.start = ino;
989                 node->cache.size = 1;
990                 node->data = rec;
991
992                 if (ino == BTRFS_FREE_INO_OBJECTID)
993                         rec->found_link = 1;
994
995                 ret = insert_cache_extent(inode_cache, &node->cache);
996                 if (ret)
997                         return ERR_PTR(-EEXIST);
998         }
999         return rec;
1000 }
1001
1002 static void free_orphan_data_extents(struct list_head *orphan_extents)
1003 {
1004         struct orphan_data_extent *orphan;
1005
1006         while (!list_empty(orphan_extents)) {
1007                 orphan = list_entry(orphan_extents->next,
1008                                     struct orphan_data_extent, list);
1009                 list_del(&orphan->list);
1010                 free(orphan);
1011         }
1012 }
1013
1014 static void free_inode_rec(struct inode_record *rec)
1015 {
1016         struct inode_backref *backref;
1017
1018         if (--rec->refs > 0)
1019                 return;
1020
1021         while (!list_empty(&rec->backrefs)) {
1022                 backref = to_inode_backref(rec->backrefs.next);
1023                 list_del(&backref->list);
1024                 free(backref);
1025         }
1026         free_orphan_data_extents(&rec->orphan_extents);
1027         free_file_extent_holes(&rec->holes);
1028         free(rec);
1029 }
1030
1031 static int can_free_inode_rec(struct inode_record *rec)
1032 {
1033         if (!rec->errors && rec->checked && rec->found_inode_item &&
1034             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1035                 return 1;
1036         return 0;
1037 }
1038
1039 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1040                                  struct inode_record *rec)
1041 {
1042         struct cache_extent *cache;
1043         struct inode_backref *tmp, *backref;
1044         struct ptr_node *node;
1045         u8 filetype;
1046
1047         if (!rec->found_inode_item)
1048                 return;
1049
1050         filetype = imode_to_type(rec->imode);
1051         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1052                 if (backref->found_dir_item && backref->found_dir_index) {
1053                         if (backref->filetype != filetype)
1054                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1055                         if (!backref->errors && backref->found_inode_ref &&
1056                             rec->nlink == rec->found_link) {
1057                                 list_del(&backref->list);
1058                                 free(backref);
1059                         }
1060                 }
1061         }
1062
1063         if (!rec->checked || rec->merging)
1064                 return;
1065
1066         if (S_ISDIR(rec->imode)) {
1067                 if (rec->found_size != rec->isize)
1068                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1069                 if (rec->found_file_extent)
1070                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1071         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1072                 if (rec->found_dir_item)
1073                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1074                 if (rec->found_size != rec->nbytes)
1075                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1076                 if (rec->nlink > 0 && !no_holes &&
1077                     (rec->extent_end < rec->isize ||
1078                      first_extent_gap(&rec->holes) < rec->isize))
1079                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1080         }
1081
1082         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1083                 if (rec->found_csum_item && rec->nodatasum)
1084                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1085                 if (rec->some_csum_missing && !rec->nodatasum)
1086                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1087         }
1088
1089         BUG_ON(rec->refs != 1);
1090         if (can_free_inode_rec(rec)) {
1091                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1092                 node = container_of(cache, struct ptr_node, cache);
1093                 BUG_ON(node->data != rec);
1094                 remove_cache_extent(inode_cache, &node->cache);
1095                 free(node);
1096                 free_inode_rec(rec);
1097         }
1098 }
1099
1100 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1101 {
1102         struct btrfs_path path;
1103         struct btrfs_key key;
1104         int ret;
1105
1106         key.objectid = BTRFS_ORPHAN_OBJECTID;
1107         key.type = BTRFS_ORPHAN_ITEM_KEY;
1108         key.offset = ino;
1109
1110         btrfs_init_path(&path);
1111         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1112         btrfs_release_path(&path);
1113         if (ret > 0)
1114                 ret = -ENOENT;
1115         return ret;
1116 }
1117
1118 static int process_inode_item(struct extent_buffer *eb,
1119                               int slot, struct btrfs_key *key,
1120                               struct shared_node *active_node)
1121 {
1122         struct inode_record *rec;
1123         struct btrfs_inode_item *item;
1124
1125         rec = active_node->current;
1126         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1127         if (rec->found_inode_item) {
1128                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1129                 return 1;
1130         }
1131         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1132         rec->nlink = btrfs_inode_nlink(eb, item);
1133         rec->isize = btrfs_inode_size(eb, item);
1134         rec->nbytes = btrfs_inode_nbytes(eb, item);
1135         rec->imode = btrfs_inode_mode(eb, item);
1136         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1137                 rec->nodatasum = 1;
1138         rec->found_inode_item = 1;
1139         if (rec->nlink == 0)
1140                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1141         maybe_free_inode_rec(&active_node->inode_cache, rec);
1142         return 0;
1143 }
1144
1145 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1146                                                 const char *name,
1147                                                 int namelen, u64 dir)
1148 {
1149         struct inode_backref *backref;
1150
1151         list_for_each_entry(backref, &rec->backrefs, list) {
1152                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1153                         break;
1154                 if (backref->dir != dir || backref->namelen != namelen)
1155                         continue;
1156                 if (memcmp(name, backref->name, namelen))
1157                         continue;
1158                 return backref;
1159         }
1160
1161         backref = malloc(sizeof(*backref) + namelen + 1);
1162         if (!backref)
1163                 return NULL;
1164         memset(backref, 0, sizeof(*backref));
1165         backref->dir = dir;
1166         backref->namelen = namelen;
1167         memcpy(backref->name, name, namelen);
1168         backref->name[namelen] = '\0';
1169         list_add_tail(&backref->list, &rec->backrefs);
1170         return backref;
1171 }
1172
1173 static int add_inode_backref(struct cache_tree *inode_cache,
1174                              u64 ino, u64 dir, u64 index,
1175                              const char *name, int namelen,
1176                              u8 filetype, u8 itemtype, int errors)
1177 {
1178         struct inode_record *rec;
1179         struct inode_backref *backref;
1180
1181         rec = get_inode_rec(inode_cache, ino, 1);
1182         BUG_ON(IS_ERR(rec));
1183         backref = get_inode_backref(rec, name, namelen, dir);
1184         BUG_ON(!backref);
1185         if (errors)
1186                 backref->errors |= errors;
1187         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1188                 if (backref->found_dir_index)
1189                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1190                 if (backref->found_inode_ref && backref->index != index)
1191                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1192                 if (backref->found_dir_item && backref->filetype != filetype)
1193                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1194
1195                 backref->index = index;
1196                 backref->filetype = filetype;
1197                 backref->found_dir_index = 1;
1198         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1199                 rec->found_link++;
1200                 if (backref->found_dir_item)
1201                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1202                 if (backref->found_dir_index && backref->filetype != filetype)
1203                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1204
1205                 backref->filetype = filetype;
1206                 backref->found_dir_item = 1;
1207         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1208                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1209                 if (backref->found_inode_ref)
1210                         backref->errors |= REF_ERR_DUP_INODE_REF;
1211                 if (backref->found_dir_index && backref->index != index)
1212                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1213                 else
1214                         backref->index = index;
1215
1216                 backref->ref_type = itemtype;
1217                 backref->found_inode_ref = 1;
1218         } else {
1219                 BUG_ON(1);
1220         }
1221
1222         maybe_free_inode_rec(inode_cache, rec);
1223         return 0;
1224 }
1225
1226 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1227                             struct cache_tree *dst_cache)
1228 {
1229         struct inode_backref *backref;
1230         u32 dir_count = 0;
1231         int ret = 0;
1232
1233         dst->merging = 1;
1234         list_for_each_entry(backref, &src->backrefs, list) {
1235                 if (backref->found_dir_index) {
1236                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1237                                         backref->index, backref->name,
1238                                         backref->namelen, backref->filetype,
1239                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1240                 }
1241                 if (backref->found_dir_item) {
1242                         dir_count++;
1243                         add_inode_backref(dst_cache, dst->ino,
1244                                         backref->dir, 0, backref->name,
1245                                         backref->namelen, backref->filetype,
1246                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1247                 }
1248                 if (backref->found_inode_ref) {
1249                         add_inode_backref(dst_cache, dst->ino,
1250                                         backref->dir, backref->index,
1251                                         backref->name, backref->namelen, 0,
1252                                         backref->ref_type, backref->errors);
1253                 }
1254         }
1255
1256         if (src->found_dir_item)
1257                 dst->found_dir_item = 1;
1258         if (src->found_file_extent)
1259                 dst->found_file_extent = 1;
1260         if (src->found_csum_item)
1261                 dst->found_csum_item = 1;
1262         if (src->some_csum_missing)
1263                 dst->some_csum_missing = 1;
1264         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1265                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1266                 if (ret < 0)
1267                         return ret;
1268         }
1269
1270         BUG_ON(src->found_link < dir_count);
1271         dst->found_link += src->found_link - dir_count;
1272         dst->found_size += src->found_size;
1273         if (src->extent_start != (u64)-1) {
1274                 if (dst->extent_start == (u64)-1) {
1275                         dst->extent_start = src->extent_start;
1276                         dst->extent_end = src->extent_end;
1277                 } else {
1278                         if (dst->extent_end > src->extent_start)
1279                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1280                         else if (dst->extent_end < src->extent_start) {
1281                                 ret = add_file_extent_hole(&dst->holes,
1282                                         dst->extent_end,
1283                                         src->extent_start - dst->extent_end);
1284                         }
1285                         if (dst->extent_end < src->extent_end)
1286                                 dst->extent_end = src->extent_end;
1287                 }
1288         }
1289
1290         dst->errors |= src->errors;
1291         if (src->found_inode_item) {
1292                 if (!dst->found_inode_item) {
1293                         dst->nlink = src->nlink;
1294                         dst->isize = src->isize;
1295                         dst->nbytes = src->nbytes;
1296                         dst->imode = src->imode;
1297                         dst->nodatasum = src->nodatasum;
1298                         dst->found_inode_item = 1;
1299                 } else {
1300                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1301                 }
1302         }
1303         dst->merging = 0;
1304
1305         return 0;
1306 }
1307
1308 static int splice_shared_node(struct shared_node *src_node,
1309                               struct shared_node *dst_node)
1310 {
1311         struct cache_extent *cache;
1312         struct ptr_node *node, *ins;
1313         struct cache_tree *src, *dst;
1314         struct inode_record *rec, *conflict;
1315         u64 current_ino = 0;
1316         int splice = 0;
1317         int ret;
1318
1319         if (--src_node->refs == 0)
1320                 splice = 1;
1321         if (src_node->current)
1322                 current_ino = src_node->current->ino;
1323
1324         src = &src_node->root_cache;
1325         dst = &dst_node->root_cache;
1326 again:
1327         cache = search_cache_extent(src, 0);
1328         while (cache) {
1329                 node = container_of(cache, struct ptr_node, cache);
1330                 rec = node->data;
1331                 cache = next_cache_extent(cache);
1332
1333                 if (splice) {
1334                         remove_cache_extent(src, &node->cache);
1335                         ins = node;
1336                 } else {
1337                         ins = malloc(sizeof(*ins));
1338                         BUG_ON(!ins);
1339                         ins->cache.start = node->cache.start;
1340                         ins->cache.size = node->cache.size;
1341                         ins->data = rec;
1342                         rec->refs++;
1343                 }
1344                 ret = insert_cache_extent(dst, &ins->cache);
1345                 if (ret == -EEXIST) {
1346                         conflict = get_inode_rec(dst, rec->ino, 1);
1347                         BUG_ON(IS_ERR(conflict));
1348                         merge_inode_recs(rec, conflict, dst);
1349                         if (rec->checked) {
1350                                 conflict->checked = 1;
1351                                 if (dst_node->current == conflict)
1352                                         dst_node->current = NULL;
1353                         }
1354                         maybe_free_inode_rec(dst, conflict);
1355                         free_inode_rec(rec);
1356                         free(ins);
1357                 } else {
1358                         BUG_ON(ret);
1359                 }
1360         }
1361
1362         if (src == &src_node->root_cache) {
1363                 src = &src_node->inode_cache;
1364                 dst = &dst_node->inode_cache;
1365                 goto again;
1366         }
1367
1368         if (current_ino > 0 && (!dst_node->current ||
1369             current_ino > dst_node->current->ino)) {
1370                 if (dst_node->current) {
1371                         dst_node->current->checked = 1;
1372                         maybe_free_inode_rec(dst, dst_node->current);
1373                 }
1374                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1375                 BUG_ON(IS_ERR(dst_node->current));
1376         }
1377         return 0;
1378 }
1379
1380 static void free_inode_ptr(struct cache_extent *cache)
1381 {
1382         struct ptr_node *node;
1383         struct inode_record *rec;
1384
1385         node = container_of(cache, struct ptr_node, cache);
1386         rec = node->data;
1387         free_inode_rec(rec);
1388         free(node);
1389 }
1390
1391 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1392
1393 static struct shared_node *find_shared_node(struct cache_tree *shared,
1394                                             u64 bytenr)
1395 {
1396         struct cache_extent *cache;
1397         struct shared_node *node;
1398
1399         cache = lookup_cache_extent(shared, bytenr, 1);
1400         if (cache) {
1401                 node = container_of(cache, struct shared_node, cache);
1402                 return node;
1403         }
1404         return NULL;
1405 }
1406
1407 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1408 {
1409         int ret;
1410         struct shared_node *node;
1411
1412         node = calloc(1, sizeof(*node));
1413         if (!node)
1414                 return -ENOMEM;
1415         node->cache.start = bytenr;
1416         node->cache.size = 1;
1417         cache_tree_init(&node->root_cache);
1418         cache_tree_init(&node->inode_cache);
1419         node->refs = refs;
1420
1421         ret = insert_cache_extent(shared, &node->cache);
1422
1423         return ret;
1424 }
1425
1426 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1427                              struct walk_control *wc, int level)
1428 {
1429         struct shared_node *node;
1430         struct shared_node *dest;
1431         int ret;
1432
1433         if (level == wc->active_node)
1434                 return 0;
1435
1436         BUG_ON(wc->active_node <= level);
1437         node = find_shared_node(&wc->shared, bytenr);
1438         if (!node) {
1439                 ret = add_shared_node(&wc->shared, bytenr, refs);
1440                 BUG_ON(ret);
1441                 node = find_shared_node(&wc->shared, bytenr);
1442                 wc->nodes[level] = node;
1443                 wc->active_node = level;
1444                 return 0;
1445         }
1446
1447         if (wc->root_level == wc->active_node &&
1448             btrfs_root_refs(&root->root_item) == 0) {
1449                 if (--node->refs == 0) {
1450                         free_inode_recs_tree(&node->root_cache);
1451                         free_inode_recs_tree(&node->inode_cache);
1452                         remove_cache_extent(&wc->shared, &node->cache);
1453                         free(node);
1454                 }
1455                 return 1;
1456         }
1457
1458         dest = wc->nodes[wc->active_node];
1459         splice_shared_node(node, dest);
1460         if (node->refs == 0) {
1461                 remove_cache_extent(&wc->shared, &node->cache);
1462                 free(node);
1463         }
1464         return 1;
1465 }
1466
1467 static int leave_shared_node(struct btrfs_root *root,
1468                              struct walk_control *wc, int level)
1469 {
1470         struct shared_node *node;
1471         struct shared_node *dest;
1472         int i;
1473
1474         if (level == wc->root_level)
1475                 return 0;
1476
1477         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1478                 if (wc->nodes[i])
1479                         break;
1480         }
1481         BUG_ON(i >= BTRFS_MAX_LEVEL);
1482
1483         node = wc->nodes[wc->active_node];
1484         wc->nodes[wc->active_node] = NULL;
1485         wc->active_node = i;
1486
1487         dest = wc->nodes[wc->active_node];
1488         if (wc->active_node < wc->root_level ||
1489             btrfs_root_refs(&root->root_item) > 0) {
1490                 BUG_ON(node->refs <= 1);
1491                 splice_shared_node(node, dest);
1492         } else {
1493                 BUG_ON(node->refs < 2);
1494                 node->refs--;
1495         }
1496         return 0;
1497 }
1498
1499 /*
1500  * Returns:
1501  * < 0 - on error
1502  * 1   - if the root with id child_root_id is a child of root parent_root_id
1503  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1504  *       has other root(s) as parent(s)
1505  * 2   - if the root child_root_id doesn't have any parent roots
1506  */
1507 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1508                          u64 child_root_id)
1509 {
1510         struct btrfs_path path;
1511         struct btrfs_key key;
1512         struct extent_buffer *leaf;
1513         int has_parent = 0;
1514         int ret;
1515
1516         btrfs_init_path(&path);
1517
1518         key.objectid = parent_root_id;
1519         key.type = BTRFS_ROOT_REF_KEY;
1520         key.offset = child_root_id;
1521         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1522                                 0, 0);
1523         if (ret < 0)
1524                 return ret;
1525         btrfs_release_path(&path);
1526         if (!ret)
1527                 return 1;
1528
1529         key.objectid = child_root_id;
1530         key.type = BTRFS_ROOT_BACKREF_KEY;
1531         key.offset = 0;
1532         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1533                                 0, 0);
1534         if (ret < 0)
1535                 goto out;
1536
1537         while (1) {
1538                 leaf = path.nodes[0];
1539                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1540                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1541                         if (ret)
1542                                 break;
1543                         leaf = path.nodes[0];
1544                 }
1545
1546                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1547                 if (key.objectid != child_root_id ||
1548                     key.type != BTRFS_ROOT_BACKREF_KEY)
1549                         break;
1550
1551                 has_parent = 1;
1552
1553                 if (key.offset == parent_root_id) {
1554                         btrfs_release_path(&path);
1555                         return 1;
1556                 }
1557
1558                 path.slots[0]++;
1559         }
1560 out:
1561         btrfs_release_path(&path);
1562         if (ret < 0)
1563                 return ret;
1564         return has_parent ? 0 : 2;
1565 }
1566
1567 static int process_dir_item(struct extent_buffer *eb,
1568                             int slot, struct btrfs_key *key,
1569                             struct shared_node *active_node)
1570 {
1571         u32 total;
1572         u32 cur = 0;
1573         u32 len;
1574         u32 name_len;
1575         u32 data_len;
1576         int error;
1577         int nritems = 0;
1578         u8 filetype;
1579         struct btrfs_dir_item *di;
1580         struct inode_record *rec;
1581         struct cache_tree *root_cache;
1582         struct cache_tree *inode_cache;
1583         struct btrfs_key location;
1584         char namebuf[BTRFS_NAME_LEN];
1585
1586         root_cache = &active_node->root_cache;
1587         inode_cache = &active_node->inode_cache;
1588         rec = active_node->current;
1589         rec->found_dir_item = 1;
1590
1591         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1592         total = btrfs_item_size_nr(eb, slot);
1593         while (cur < total) {
1594                 nritems++;
1595                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1596                 name_len = btrfs_dir_name_len(eb, di);
1597                 data_len = btrfs_dir_data_len(eb, di);
1598                 filetype = btrfs_dir_type(eb, di);
1599
1600                 rec->found_size += name_len;
1601                 if (cur + sizeof(*di) + name_len > total ||
1602                     name_len > BTRFS_NAME_LEN) {
1603                         error = REF_ERR_NAME_TOO_LONG;
1604
1605                         if (cur + sizeof(*di) > total)
1606                                 break;
1607                         len = min_t(u32, total - cur - sizeof(*di),
1608                                     BTRFS_NAME_LEN);
1609                 } else {
1610                         len = name_len;
1611                         error = 0;
1612                 }
1613
1614                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1615
1616                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1617                     key->offset != btrfs_name_hash(namebuf, len)) {
1618                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1619                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1620                         key->objectid, key->offset, namebuf, len, filetype,
1621                         key->offset, btrfs_name_hash(namebuf, len));
1622                 }
1623
1624                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1625                         add_inode_backref(inode_cache, location.objectid,
1626                                           key->objectid, key->offset, namebuf,
1627                                           len, filetype, key->type, error);
1628                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1629                         add_inode_backref(root_cache, location.objectid,
1630                                           key->objectid, key->offset,
1631                                           namebuf, len, filetype,
1632                                           key->type, error);
1633                 } else {
1634                         fprintf(stderr, "invalid location in dir item %u\n",
1635                                 location.type);
1636                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1637                                           key->objectid, key->offset, namebuf,
1638                                           len, filetype, key->type, error);
1639                 }
1640
1641                 len = sizeof(*di) + name_len + data_len;
1642                 di = (struct btrfs_dir_item *)((char *)di + len);
1643                 cur += len;
1644         }
1645         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1646                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1647
1648         return 0;
1649 }
1650
1651 static int process_inode_ref(struct extent_buffer *eb,
1652                              int slot, struct btrfs_key *key,
1653                              struct shared_node *active_node)
1654 {
1655         u32 total;
1656         u32 cur = 0;
1657         u32 len;
1658         u32 name_len;
1659         u64 index;
1660         int error;
1661         struct cache_tree *inode_cache;
1662         struct btrfs_inode_ref *ref;
1663         char namebuf[BTRFS_NAME_LEN];
1664
1665         inode_cache = &active_node->inode_cache;
1666
1667         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1668         total = btrfs_item_size_nr(eb, slot);
1669         while (cur < total) {
1670                 name_len = btrfs_inode_ref_name_len(eb, ref);
1671                 index = btrfs_inode_ref_index(eb, ref);
1672
1673                 /* inode_ref + namelen should not cross item boundary */
1674                 if (cur + sizeof(*ref) + name_len > total ||
1675                     name_len > BTRFS_NAME_LEN) {
1676                         if (total < cur + sizeof(*ref))
1677                                 break;
1678
1679                         /* Still try to read out the remaining part */
1680                         len = min_t(u32, total - cur - sizeof(*ref),
1681                                     BTRFS_NAME_LEN);
1682                         error = REF_ERR_NAME_TOO_LONG;
1683                 } else {
1684                         len = name_len;
1685                         error = 0;
1686                 }
1687
1688                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1689                 add_inode_backref(inode_cache, key->objectid, key->offset,
1690                                   index, namebuf, len, 0, key->type, error);
1691
1692                 len = sizeof(*ref) + name_len;
1693                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1694                 cur += len;
1695         }
1696         return 0;
1697 }
1698
1699 static int process_inode_extref(struct extent_buffer *eb,
1700                                 int slot, struct btrfs_key *key,
1701                                 struct shared_node *active_node)
1702 {
1703         u32 total;
1704         u32 cur = 0;
1705         u32 len;
1706         u32 name_len;
1707         u64 index;
1708         u64 parent;
1709         int error;
1710         struct cache_tree *inode_cache;
1711         struct btrfs_inode_extref *extref;
1712         char namebuf[BTRFS_NAME_LEN];
1713
1714         inode_cache = &active_node->inode_cache;
1715
1716         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1717         total = btrfs_item_size_nr(eb, slot);
1718         while (cur < total) {
1719                 name_len = btrfs_inode_extref_name_len(eb, extref);
1720                 index = btrfs_inode_extref_index(eb, extref);
1721                 parent = btrfs_inode_extref_parent(eb, extref);
1722                 if (name_len <= BTRFS_NAME_LEN) {
1723                         len = name_len;
1724                         error = 0;
1725                 } else {
1726                         len = BTRFS_NAME_LEN;
1727                         error = REF_ERR_NAME_TOO_LONG;
1728                 }
1729                 read_extent_buffer(eb, namebuf,
1730                                    (unsigned long)(extref + 1), len);
1731                 add_inode_backref(inode_cache, key->objectid, parent,
1732                                   index, namebuf, len, 0, key->type, error);
1733
1734                 len = sizeof(*extref) + name_len;
1735                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1736                 cur += len;
1737         }
1738         return 0;
1739
1740 }
1741
1742 static int count_csum_range(struct btrfs_root *root, u64 start,
1743                             u64 len, u64 *found)
1744 {
1745         struct btrfs_key key;
1746         struct btrfs_path path;
1747         struct extent_buffer *leaf;
1748         int ret;
1749         size_t size;
1750         *found = 0;
1751         u64 csum_end;
1752         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1753
1754         btrfs_init_path(&path);
1755
1756         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1757         key.offset = start;
1758         key.type = BTRFS_EXTENT_CSUM_KEY;
1759
1760         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1761                                 &key, &path, 0, 0);
1762         if (ret < 0)
1763                 goto out;
1764         if (ret > 0 && path.slots[0] > 0) {
1765                 leaf = path.nodes[0];
1766                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1767                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1768                     key.type == BTRFS_EXTENT_CSUM_KEY)
1769                         path.slots[0]--;
1770         }
1771
1772         while (len > 0) {
1773                 leaf = path.nodes[0];
1774                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1775                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1776                         if (ret > 0)
1777                                 break;
1778                         else if (ret < 0)
1779                                 goto out;
1780                         leaf = path.nodes[0];
1781                 }
1782
1783                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1784                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1785                     key.type != BTRFS_EXTENT_CSUM_KEY)
1786                         break;
1787
1788                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1789                 if (key.offset >= start + len)
1790                         break;
1791
1792                 if (key.offset > start)
1793                         start = key.offset;
1794
1795                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1796                 csum_end = key.offset + (size / csum_size) *
1797                            root->fs_info->sectorsize;
1798                 if (csum_end > start) {
1799                         size = min(csum_end - start, len);
1800                         len -= size;
1801                         start += size;
1802                         *found += size;
1803                 }
1804
1805                 path.slots[0]++;
1806         }
1807 out:
1808         btrfs_release_path(&path);
1809         if (ret < 0)
1810                 return ret;
1811         return 0;
1812 }
1813
1814 static int process_file_extent(struct btrfs_root *root,
1815                                 struct extent_buffer *eb,
1816                                 int slot, struct btrfs_key *key,
1817                                 struct shared_node *active_node)
1818 {
1819         struct inode_record *rec;
1820         struct btrfs_file_extent_item *fi;
1821         u64 num_bytes = 0;
1822         u64 disk_bytenr = 0;
1823         u64 extent_offset = 0;
1824         u64 mask = root->fs_info->sectorsize - 1;
1825         int extent_type;
1826         int ret;
1827
1828         rec = active_node->current;
1829         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1830         rec->found_file_extent = 1;
1831
1832         if (rec->extent_start == (u64)-1) {
1833                 rec->extent_start = key->offset;
1834                 rec->extent_end = key->offset;
1835         }
1836
1837         if (rec->extent_end > key->offset)
1838                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1839         else if (rec->extent_end < key->offset) {
1840                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1841                                            key->offset - rec->extent_end);
1842                 if (ret < 0)
1843                         return ret;
1844         }
1845
1846         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1847         extent_type = btrfs_file_extent_type(eb, fi);
1848
1849         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1850                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1851                 if (num_bytes == 0)
1852                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1853                 rec->found_size += num_bytes;
1854                 num_bytes = (num_bytes + mask) & ~mask;
1855         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1856                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1857                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1858                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1859                 extent_offset = btrfs_file_extent_offset(eb, fi);
1860                 if (num_bytes == 0 || (num_bytes & mask))
1861                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1862                 if (num_bytes + extent_offset >
1863                     btrfs_file_extent_ram_bytes(eb, fi))
1864                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1866                     (btrfs_file_extent_compression(eb, fi) ||
1867                      btrfs_file_extent_encryption(eb, fi) ||
1868                      btrfs_file_extent_other_encoding(eb, fi)))
1869                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1870                 if (disk_bytenr > 0)
1871                         rec->found_size += num_bytes;
1872         } else {
1873                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1874         }
1875         rec->extent_end = key->offset + num_bytes;
1876
1877         /*
1878          * The data reloc tree will copy full extents into its inode and then
1879          * copy the corresponding csums.  Because the extent it copied could be
1880          * a preallocated extent that hasn't been written to yet there may be no
1881          * csums to copy, ergo we won't have csums for our file extent.  This is
1882          * ok so just don't bother checking csums if the inode belongs to the
1883          * data reloc tree.
1884          */
1885         if (disk_bytenr > 0 &&
1886             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1887                 u64 found;
1888                 if (btrfs_file_extent_compression(eb, fi))
1889                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1890                 else
1891                         disk_bytenr += extent_offset;
1892
1893                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1894                 if (ret < 0)
1895                         return ret;
1896                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1897                         if (found > 0)
1898                                 rec->found_csum_item = 1;
1899                         if (found < num_bytes)
1900                                 rec->some_csum_missing = 1;
1901                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1902                         if (found > 0)
1903                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1904                 }
1905         }
1906         return 0;
1907 }
1908
1909 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1910                             struct walk_control *wc)
1911 {
1912         struct btrfs_key key;
1913         u32 nritems;
1914         int i;
1915         int ret = 0;
1916         struct cache_tree *inode_cache;
1917         struct shared_node *active_node;
1918
1919         if (wc->root_level == wc->active_node &&
1920             btrfs_root_refs(&root->root_item) == 0)
1921                 return 0;
1922
1923         active_node = wc->nodes[wc->active_node];
1924         inode_cache = &active_node->inode_cache;
1925         nritems = btrfs_header_nritems(eb);
1926         for (i = 0; i < nritems; i++) {
1927                 btrfs_item_key_to_cpu(eb, &key, i);
1928
1929                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1930                         continue;
1931                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1932                         continue;
1933
1934                 if (active_node->current == NULL ||
1935                     active_node->current->ino < key.objectid) {
1936                         if (active_node->current) {
1937                                 active_node->current->checked = 1;
1938                                 maybe_free_inode_rec(inode_cache,
1939                                                      active_node->current);
1940                         }
1941                         active_node->current = get_inode_rec(inode_cache,
1942                                                              key.objectid, 1);
1943                         BUG_ON(IS_ERR(active_node->current));
1944                 }
1945                 switch (key.type) {
1946                 case BTRFS_DIR_ITEM_KEY:
1947                 case BTRFS_DIR_INDEX_KEY:
1948                         ret = process_dir_item(eb, i, &key, active_node);
1949                         break;
1950                 case BTRFS_INODE_REF_KEY:
1951                         ret = process_inode_ref(eb, i, &key, active_node);
1952                         break;
1953                 case BTRFS_INODE_EXTREF_KEY:
1954                         ret = process_inode_extref(eb, i, &key, active_node);
1955                         break;
1956                 case BTRFS_INODE_ITEM_KEY:
1957                         ret = process_inode_item(eb, i, &key, active_node);
1958                         break;
1959                 case BTRFS_EXTENT_DATA_KEY:
1960                         ret = process_file_extent(root, eb, i, &key,
1961                                                   active_node);
1962                         break;
1963                 default:
1964                         break;
1965                 };
1966         }
1967         return ret;
1968 }
1969
1970 struct node_refs {
1971         u64 bytenr[BTRFS_MAX_LEVEL];
1972         u64 refs[BTRFS_MAX_LEVEL];
1973         int need_check[BTRFS_MAX_LEVEL];
1974 };
1975
1976 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1977                              struct node_refs *nrefs, u64 level);
1978 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1979                             unsigned int ext_ref);
1980
1981 /*
1982  * Returns >0  Found error, not fatal, should continue
1983  * Returns <0  Fatal error, must exit the whole check
1984  * Returns 0   No errors found
1985  */
1986 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1987                                struct node_refs *nrefs, int *level, int ext_ref)
1988 {
1989         struct extent_buffer *cur = path->nodes[0];
1990         struct btrfs_key key;
1991         u64 cur_bytenr;
1992         u32 nritems;
1993         u64 first_ino = 0;
1994         int root_level = btrfs_header_level(root->node);
1995         int i;
1996         int ret = 0; /* Final return value */
1997         int err = 0; /* Positive error bitmap */
1998
1999         cur_bytenr = cur->start;
2000
2001         /* skip to first inode item or the first inode number change */
2002         nritems = btrfs_header_nritems(cur);
2003         for (i = 0; i < nritems; i++) {
2004                 btrfs_item_key_to_cpu(cur, &key, i);
2005                 if (i == 0)
2006                         first_ino = key.objectid;
2007                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2008                     (first_ino && first_ino != key.objectid))
2009                         break;
2010         }
2011         if (i == nritems) {
2012                 path->slots[0] = nritems;
2013                 return 0;
2014         }
2015         path->slots[0] = i;
2016
2017 again:
2018         err |= check_inode_item(root, path, ext_ref);
2019
2020         /* modify cur since check_inode_item may change path */
2021         cur = path->nodes[0];
2022
2023         if (err & LAST_ITEM)
2024                 goto out;
2025
2026         /* still have inode items in thie leaf */
2027         if (cur->start == cur_bytenr)
2028                 goto again;
2029
2030         /*
2031          * we have switched to another leaf, above nodes may
2032          * have changed, here walk down the path, if a node
2033          * or leaf is shared, check whether we can skip this
2034          * node or leaf.
2035          */
2036         for (i = root_level; i >= 0; i--) {
2037                 if (path->nodes[i]->start == nrefs->bytenr[i])
2038                         continue;
2039
2040                 ret = update_nodes_refs(root,
2041                                 path->nodes[i]->start,
2042                                 nrefs, i);
2043                 if (ret)
2044                         goto out;
2045
2046                 if (!nrefs->need_check[i]) {
2047                         *level += 1;
2048                         break;
2049                 }
2050         }
2051
2052         for (i = 0; i < *level; i++) {
2053                 free_extent_buffer(path->nodes[i]);
2054                 path->nodes[i] = NULL;
2055         }
2056 out:
2057         err &= ~LAST_ITEM;
2058         if (err && !ret)
2059                 ret = err;
2060         return ret;
2061 }
2062
2063 static void reada_walk_down(struct btrfs_root *root,
2064                             struct extent_buffer *node, int slot)
2065 {
2066         struct btrfs_fs_info *fs_info = root->fs_info;
2067         u64 bytenr;
2068         u64 ptr_gen;
2069         u32 nritems;
2070         int i;
2071         int level;
2072
2073         level = btrfs_header_level(node);
2074         if (level != 1)
2075                 return;
2076
2077         nritems = btrfs_header_nritems(node);
2078         for (i = slot; i < nritems; i++) {
2079                 bytenr = btrfs_node_blockptr(node, i);
2080                 ptr_gen = btrfs_node_ptr_generation(node, i);
2081                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2082         }
2083 }
2084
2085 /*
2086  * Check the child node/leaf by the following condition:
2087  * 1. the first item key of the node/leaf should be the same with the one
2088  *    in parent.
2089  * 2. block in parent node should match the child node/leaf.
2090  * 3. generation of parent node and child's header should be consistent.
2091  *
2092  * Or the child node/leaf pointed by the key in parent is not valid.
2093  *
2094  * We hope to check leaf owner too, but since subvol may share leaves,
2095  * which makes leaf owner check not so strong, key check should be
2096  * sufficient enough for that case.
2097  */
2098 static int check_child_node(struct extent_buffer *parent, int slot,
2099                             struct extent_buffer *child)
2100 {
2101         struct btrfs_key parent_key;
2102         struct btrfs_key child_key;
2103         int ret = 0;
2104
2105         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2106         if (btrfs_header_level(child) == 0)
2107                 btrfs_item_key_to_cpu(child, &child_key, 0);
2108         else
2109                 btrfs_node_key_to_cpu(child, &child_key, 0);
2110
2111         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2112                 ret = -EINVAL;
2113                 fprintf(stderr,
2114                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2115                         parent_key.objectid, parent_key.type, parent_key.offset,
2116                         child_key.objectid, child_key.type, child_key.offset);
2117         }
2118         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2119                 ret = -EINVAL;
2120                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2121                         btrfs_node_blockptr(parent, slot),
2122                         btrfs_header_bytenr(child));
2123         }
2124         if (btrfs_node_ptr_generation(parent, slot) !=
2125             btrfs_header_generation(child)) {
2126                 ret = -EINVAL;
2127                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2128                         btrfs_header_generation(child),
2129                         btrfs_node_ptr_generation(parent, slot));
2130         }
2131         return ret;
2132 }
2133
2134 /*
2135  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2136  * in every fs or file tree check. Here we find its all root ids, and only check
2137  * it in the fs or file tree which has the smallest root id.
2138  */
2139 static int need_check(struct btrfs_root *root, struct ulist *roots)
2140 {
2141         struct rb_node *node;
2142         struct ulist_node *u;
2143
2144         if (roots->nnodes == 1)
2145                 return 1;
2146
2147         node = rb_first(&roots->root);
2148         u = rb_entry(node, struct ulist_node, rb_node);
2149         /*
2150          * current root id is not smallest, we skip it and let it be checked
2151          * in the fs or file tree who hash the smallest root id.
2152          */
2153         if (root->objectid != u->val)
2154                 return 0;
2155
2156         return 1;
2157 }
2158
2159 /*
2160  * for a tree node or leaf, we record its reference count, so later if we still
2161  * process this node or leaf, don't need to compute its reference count again.
2162  */
2163 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2164                              struct node_refs *nrefs, u64 level)
2165 {
2166         int check, ret;
2167         u64 refs;
2168         struct ulist *roots;
2169
2170         if (nrefs->bytenr[level] != bytenr) {
2171                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2172                                        level, 1, &refs, NULL);
2173                 if (ret < 0)
2174                         return ret;
2175
2176                 nrefs->bytenr[level] = bytenr;
2177                 nrefs->refs[level] = refs;
2178                 if (refs > 1) {
2179                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2180                                                    0, &roots);
2181                         if (ret)
2182                                 return -EIO;
2183
2184                         check = need_check(root, roots);
2185                         ulist_free(roots);
2186                         nrefs->need_check[level] = check;
2187                 } else {
2188                         nrefs->need_check[level] = 1;
2189                 }
2190         }
2191
2192         return 0;
2193 }
2194
2195 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2196                           struct walk_control *wc, int *level,
2197                           struct node_refs *nrefs)
2198 {
2199         enum btrfs_tree_block_status status;
2200         u64 bytenr;
2201         u64 ptr_gen;
2202         struct btrfs_fs_info *fs_info = root->fs_info;
2203         struct extent_buffer *next;
2204         struct extent_buffer *cur;
2205         int ret, err = 0;
2206         u64 refs;
2207
2208         WARN_ON(*level < 0);
2209         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2210
2211         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2212                 refs = nrefs->refs[*level];
2213                 ret = 0;
2214         } else {
2215                 ret = btrfs_lookup_extent_info(NULL, root,
2216                                        path->nodes[*level]->start,
2217                                        *level, 1, &refs, NULL);
2218                 if (ret < 0) {
2219                         err = ret;
2220                         goto out;
2221                 }
2222                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2223                 nrefs->refs[*level] = refs;
2224         }
2225
2226         if (refs > 1) {
2227                 ret = enter_shared_node(root, path->nodes[*level]->start,
2228                                         refs, wc, *level);
2229                 if (ret > 0) {
2230                         err = ret;
2231                         goto out;
2232                 }
2233         }
2234
2235         while (*level >= 0) {
2236                 WARN_ON(*level < 0);
2237                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2238                 cur = path->nodes[*level];
2239
2240                 if (btrfs_header_level(cur) != *level)
2241                         WARN_ON(1);
2242
2243                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2244                         break;
2245                 if (*level == 0) {
2246                         ret = process_one_leaf(root, cur, wc);
2247                         if (ret < 0)
2248                                 err = ret;
2249                         break;
2250                 }
2251                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2252                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2253
2254                 if (bytenr == nrefs->bytenr[*level - 1]) {
2255                         refs = nrefs->refs[*level - 1];
2256                 } else {
2257                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2258                                         *level - 1, 1, &refs, NULL);
2259                         if (ret < 0) {
2260                                 refs = 0;
2261                         } else {
2262                                 nrefs->bytenr[*level - 1] = bytenr;
2263                                 nrefs->refs[*level - 1] = refs;
2264                         }
2265                 }
2266
2267                 if (refs > 1) {
2268                         ret = enter_shared_node(root, bytenr, refs,
2269                                                 wc, *level - 1);
2270                         if (ret > 0) {
2271                                 path->slots[*level]++;
2272                                 continue;
2273                         }
2274                 }
2275
2276                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2277                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2278                         free_extent_buffer(next);
2279                         reada_walk_down(root, cur, path->slots[*level]);
2280                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2281                         if (!extent_buffer_uptodate(next)) {
2282                                 struct btrfs_key node_key;
2283
2284                                 btrfs_node_key_to_cpu(path->nodes[*level],
2285                                                       &node_key,
2286                                                       path->slots[*level]);
2287                                 btrfs_add_corrupt_extent_record(root->fs_info,
2288                                                 &node_key,
2289                                                 path->nodes[*level]->start,
2290                                                 root->fs_info->nodesize,
2291                                                 *level);
2292                                 err = -EIO;
2293                                 goto out;
2294                         }
2295                 }
2296
2297                 ret = check_child_node(cur, path->slots[*level], next);
2298                 if (ret) {
2299                         free_extent_buffer(next);
2300                         err = ret;
2301                         goto out;
2302                 }
2303
2304                 if (btrfs_is_leaf(next))
2305                         status = btrfs_check_leaf(root, NULL, next);
2306                 else
2307                         status = btrfs_check_node(root, NULL, next);
2308                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2309                         free_extent_buffer(next);
2310                         err = -EIO;
2311                         goto out;
2312                 }
2313
2314                 *level = *level - 1;
2315                 free_extent_buffer(path->nodes[*level]);
2316                 path->nodes[*level] = next;
2317                 path->slots[*level] = 0;
2318         }
2319 out:
2320         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2321         return err;
2322 }
2323
2324 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2325                             unsigned int ext_ref);
2326
2327 /*
2328  * Returns >0  Found error, should continue
2329  * Returns <0  Fatal error, must exit the whole check
2330  * Returns 0   No errors found
2331  */
2332 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2333                              int *level, struct node_refs *nrefs, int ext_ref)
2334 {
2335         enum btrfs_tree_block_status status;
2336         u64 bytenr;
2337         u64 ptr_gen;
2338         struct btrfs_fs_info *fs_info = root->fs_info;
2339         struct extent_buffer *next;
2340         struct extent_buffer *cur;
2341         int ret;
2342
2343         WARN_ON(*level < 0);
2344         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2345
2346         ret = update_nodes_refs(root, path->nodes[*level]->start,
2347                                 nrefs, *level);
2348         if (ret < 0)
2349                 return ret;
2350
2351         while (*level >= 0) {
2352                 WARN_ON(*level < 0);
2353                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2354                 cur = path->nodes[*level];
2355
2356                 if (btrfs_header_level(cur) != *level)
2357                         WARN_ON(1);
2358
2359                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2360                         break;
2361                 /* Don't forgot to check leaf/node validation */
2362                 if (*level == 0) {
2363                         ret = btrfs_check_leaf(root, NULL, cur);
2364                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2365                                 ret = -EIO;
2366                                 break;
2367                         }
2368                         ret = process_one_leaf_v2(root, path, nrefs,
2369                                                   level, ext_ref);
2370                         cur = path->nodes[*level];
2371                         break;
2372                 } else {
2373                         ret = btrfs_check_node(root, NULL, cur);
2374                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2375                                 ret = -EIO;
2376                                 break;
2377                         }
2378                 }
2379                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2380                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2381
2382                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2383                 if (ret)
2384                         break;
2385                 if (!nrefs->need_check[*level - 1]) {
2386                         path->slots[*level]++;
2387                         continue;
2388                 }
2389
2390                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2391                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2392                         free_extent_buffer(next);
2393                         reada_walk_down(root, cur, path->slots[*level]);
2394                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2395                         if (!extent_buffer_uptodate(next)) {
2396                                 struct btrfs_key node_key;
2397
2398                                 btrfs_node_key_to_cpu(path->nodes[*level],
2399                                                       &node_key,
2400                                                       path->slots[*level]);
2401                                 btrfs_add_corrupt_extent_record(fs_info,
2402                                                 &node_key,
2403                                                 path->nodes[*level]->start,
2404                                                 fs_info->nodesize,
2405                                                 *level);
2406                                 ret = -EIO;
2407                                 break;
2408                         }
2409                 }
2410
2411                 ret = check_child_node(cur, path->slots[*level], next);
2412                 if (ret < 0) 
2413                         break;
2414
2415                 if (btrfs_is_leaf(next))
2416                         status = btrfs_check_leaf(root, NULL, next);
2417                 else
2418                         status = btrfs_check_node(root, NULL, next);
2419                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2420                         free_extent_buffer(next);
2421                         ret = -EIO;
2422                         break;
2423                 }
2424
2425                 *level = *level - 1;
2426                 free_extent_buffer(path->nodes[*level]);
2427                 path->nodes[*level] = next;
2428                 path->slots[*level] = 0;
2429         }
2430         return ret;
2431 }
2432
2433 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2434                         struct walk_control *wc, int *level)
2435 {
2436         int i;
2437         struct extent_buffer *leaf;
2438
2439         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2440                 leaf = path->nodes[i];
2441                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2442                         path->slots[i]++;
2443                         *level = i;
2444                         return 0;
2445                 } else {
2446                         free_extent_buffer(path->nodes[*level]);
2447                         path->nodes[*level] = NULL;
2448                         BUG_ON(*level > wc->active_node);
2449                         if (*level == wc->active_node)
2450                                 leave_shared_node(root, wc, *level);
2451                         *level = i + 1;
2452                 }
2453         }
2454         return 1;
2455 }
2456
2457 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2458                            int *level)
2459 {
2460         int i;
2461         struct extent_buffer *leaf;
2462
2463         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2464                 leaf = path->nodes[i];
2465                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2466                         path->slots[i]++;
2467                         *level = i;
2468                         return 0;
2469                 } else {
2470                         free_extent_buffer(path->nodes[*level]);
2471                         path->nodes[*level] = NULL;
2472                         *level = i + 1;
2473                 }
2474         }
2475         return 1;
2476 }
2477
2478 static int check_root_dir(struct inode_record *rec)
2479 {
2480         struct inode_backref *backref;
2481         int ret = -1;
2482
2483         if (!rec->found_inode_item || rec->errors)
2484                 goto out;
2485         if (rec->nlink != 1 || rec->found_link != 0)
2486                 goto out;
2487         if (list_empty(&rec->backrefs))
2488                 goto out;
2489         backref = to_inode_backref(rec->backrefs.next);
2490         if (!backref->found_inode_ref)
2491                 goto out;
2492         if (backref->index != 0 || backref->namelen != 2 ||
2493             memcmp(backref->name, "..", 2))
2494                 goto out;
2495         if (backref->found_dir_index || backref->found_dir_item)
2496                 goto out;
2497         ret = 0;
2498 out:
2499         return ret;
2500 }
2501
2502 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2503                               struct btrfs_root *root, struct btrfs_path *path,
2504                               struct inode_record *rec)
2505 {
2506         struct btrfs_inode_item *ei;
2507         struct btrfs_key key;
2508         int ret;
2509
2510         key.objectid = rec->ino;
2511         key.type = BTRFS_INODE_ITEM_KEY;
2512         key.offset = (u64)-1;
2513
2514         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2515         if (ret < 0)
2516                 goto out;
2517         if (ret) {
2518                 if (!path->slots[0]) {
2519                         ret = -ENOENT;
2520                         goto out;
2521                 }
2522                 path->slots[0]--;
2523                 ret = 0;
2524         }
2525         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2526         if (key.objectid != rec->ino) {
2527                 ret = -ENOENT;
2528                 goto out;
2529         }
2530
2531         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2532                             struct btrfs_inode_item);
2533         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2534         btrfs_mark_buffer_dirty(path->nodes[0]);
2535         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2536         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2537                root->root_key.objectid);
2538 out:
2539         btrfs_release_path(path);
2540         return ret;
2541 }
2542
2543 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2544                                     struct btrfs_root *root,
2545                                     struct btrfs_path *path,
2546                                     struct inode_record *rec)
2547 {
2548         int ret;
2549
2550         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2551         btrfs_release_path(path);
2552         if (!ret)
2553                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2554         return ret;
2555 }
2556
2557 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2558                                struct btrfs_root *root,
2559                                struct btrfs_path *path,
2560                                struct inode_record *rec)
2561 {
2562         struct btrfs_inode_item *ei;
2563         struct btrfs_key key;
2564         int ret = 0;
2565
2566         key.objectid = rec->ino;
2567         key.type = BTRFS_INODE_ITEM_KEY;
2568         key.offset = 0;
2569
2570         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2571         if (ret) {
2572                 if (ret > 0)
2573                         ret = -ENOENT;
2574                 goto out;
2575         }
2576
2577         /* Since ret == 0, no need to check anything */
2578         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2579                             struct btrfs_inode_item);
2580         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2581         btrfs_mark_buffer_dirty(path->nodes[0]);
2582         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2583         printf("reset nbytes for ino %llu root %llu\n",
2584                rec->ino, root->root_key.objectid);
2585 out:
2586         btrfs_release_path(path);
2587         return ret;
2588 }
2589
2590 static int add_missing_dir_index(struct btrfs_root *root,
2591                                  struct cache_tree *inode_cache,
2592                                  struct inode_record *rec,
2593                                  struct inode_backref *backref)
2594 {
2595         struct btrfs_path path;
2596         struct btrfs_trans_handle *trans;
2597         struct btrfs_dir_item *dir_item;
2598         struct extent_buffer *leaf;
2599         struct btrfs_key key;
2600         struct btrfs_disk_key disk_key;
2601         struct inode_record *dir_rec;
2602         unsigned long name_ptr;
2603         u32 data_size = sizeof(*dir_item) + backref->namelen;
2604         int ret;
2605
2606         trans = btrfs_start_transaction(root, 1);
2607         if (IS_ERR(trans))
2608                 return PTR_ERR(trans);
2609
2610         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2611                 (unsigned long long)rec->ino);
2612
2613         btrfs_init_path(&path);
2614         key.objectid = backref->dir;
2615         key.type = BTRFS_DIR_INDEX_KEY;
2616         key.offset = backref->index;
2617         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2618         BUG_ON(ret);
2619
2620         leaf = path.nodes[0];
2621         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2622
2623         disk_key.objectid = cpu_to_le64(rec->ino);
2624         disk_key.type = BTRFS_INODE_ITEM_KEY;
2625         disk_key.offset = 0;
2626
2627         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2628         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2629         btrfs_set_dir_data_len(leaf, dir_item, 0);
2630         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2631         name_ptr = (unsigned long)(dir_item + 1);
2632         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2633         btrfs_mark_buffer_dirty(leaf);
2634         btrfs_release_path(&path);
2635         btrfs_commit_transaction(trans, root);
2636
2637         backref->found_dir_index = 1;
2638         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2639         BUG_ON(IS_ERR(dir_rec));
2640         if (!dir_rec)
2641                 return 0;
2642         dir_rec->found_size += backref->namelen;
2643         if (dir_rec->found_size == dir_rec->isize &&
2644             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2645                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2646         if (dir_rec->found_size != dir_rec->isize)
2647                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2648
2649         return 0;
2650 }
2651
2652 static int delete_dir_index(struct btrfs_root *root,
2653                             struct inode_backref *backref)
2654 {
2655         struct btrfs_trans_handle *trans;
2656         struct btrfs_dir_item *di;
2657         struct btrfs_path path;
2658         int ret = 0;
2659
2660         trans = btrfs_start_transaction(root, 1);
2661         if (IS_ERR(trans))
2662                 return PTR_ERR(trans);
2663
2664         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2665                 (unsigned long long)backref->dir,
2666                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2667                 (unsigned long long)root->objectid);
2668
2669         btrfs_init_path(&path);
2670         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2671                                     backref->name, backref->namelen,
2672                                     backref->index, -1);
2673         if (IS_ERR(di)) {
2674                 ret = PTR_ERR(di);
2675                 btrfs_release_path(&path);
2676                 btrfs_commit_transaction(trans, root);
2677                 if (ret == -ENOENT)
2678                         return 0;
2679                 return ret;
2680         }
2681
2682         if (!di)
2683                 ret = btrfs_del_item(trans, root, &path);
2684         else
2685                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2686         BUG_ON(ret);
2687         btrfs_release_path(&path);
2688         btrfs_commit_transaction(trans, root);
2689         return ret;
2690 }
2691
2692 static int create_inode_item(struct btrfs_root *root,
2693                              struct inode_record *rec,
2694                              int root_dir)
2695 {
2696         struct btrfs_trans_handle *trans;
2697         struct btrfs_inode_item inode_item;
2698         time_t now = time(NULL);
2699         int ret;
2700
2701         trans = btrfs_start_transaction(root, 1);
2702         if (IS_ERR(trans)) {
2703                 ret = PTR_ERR(trans);
2704                 return ret;
2705         }
2706
2707         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2708                 "be incomplete, please check permissions and content after "
2709                 "the fsck completes.\n", (unsigned long long)root->objectid,
2710                 (unsigned long long)rec->ino);
2711
2712         memset(&inode_item, 0, sizeof(inode_item));
2713         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2714         if (root_dir)
2715                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2716         else
2717                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2718         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2719         if (rec->found_dir_item) {
2720                 if (rec->found_file_extent)
2721                         fprintf(stderr, "root %llu inode %llu has both a dir "
2722                                 "item and extents, unsure if it is a dir or a "
2723                                 "regular file so setting it as a directory\n",
2724                                 (unsigned long long)root->objectid,
2725                                 (unsigned long long)rec->ino);
2726                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2727                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2728         } else if (!rec->found_dir_item) {
2729                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2730                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2731         }
2732         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2733         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2734         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2735         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2736         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2737         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2738         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2739         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2740
2741         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2742         BUG_ON(ret);
2743         btrfs_commit_transaction(trans, root);
2744         return 0;
2745 }
2746
2747 static int repair_inode_backrefs(struct btrfs_root *root,
2748                                  struct inode_record *rec,
2749                                  struct cache_tree *inode_cache,
2750                                  int delete)
2751 {
2752         struct inode_backref *tmp, *backref;
2753         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2754         int ret = 0;
2755         int repaired = 0;
2756
2757         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2758                 if (!delete && rec->ino == root_dirid) {
2759                         if (!rec->found_inode_item) {
2760                                 ret = create_inode_item(root, rec, 1);
2761                                 if (ret)
2762                                         break;
2763                                 repaired++;
2764                         }
2765                 }
2766
2767                 /* Index 0 for root dir's are special, don't mess with it */
2768                 if (rec->ino == root_dirid && backref->index == 0)
2769                         continue;
2770
2771                 if (delete &&
2772                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2773                      (backref->found_dir_index && backref->found_inode_ref &&
2774                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2775                         ret = delete_dir_index(root, backref);
2776                         if (ret)
2777                                 break;
2778                         repaired++;
2779                         list_del(&backref->list);
2780                         free(backref);
2781                         continue;
2782                 }
2783
2784                 if (!delete && !backref->found_dir_index &&
2785                     backref->found_dir_item && backref->found_inode_ref) {
2786                         ret = add_missing_dir_index(root, inode_cache, rec,
2787                                                     backref);
2788                         if (ret)
2789                                 break;
2790                         repaired++;
2791                         if (backref->found_dir_item &&
2792                             backref->found_dir_index) {
2793                                 if (!backref->errors &&
2794                                     backref->found_inode_ref) {
2795                                         list_del(&backref->list);
2796                                         free(backref);
2797                                         continue;
2798                                 }
2799                         }
2800                 }
2801
2802                 if (!delete && (!backref->found_dir_index &&
2803                                 !backref->found_dir_item &&
2804                                 backref->found_inode_ref)) {
2805                         struct btrfs_trans_handle *trans;
2806                         struct btrfs_key location;
2807
2808                         ret = check_dir_conflict(root, backref->name,
2809                                                  backref->namelen,
2810                                                  backref->dir,
2811                                                  backref->index);
2812                         if (ret) {
2813                                 /*
2814                                  * let nlink fixing routine to handle it,
2815                                  * which can do it better.
2816                                  */
2817                                 ret = 0;
2818                                 break;
2819                         }
2820                         location.objectid = rec->ino;
2821                         location.type = BTRFS_INODE_ITEM_KEY;
2822                         location.offset = 0;
2823
2824                         trans = btrfs_start_transaction(root, 1);
2825                         if (IS_ERR(trans)) {
2826                                 ret = PTR_ERR(trans);
2827                                 break;
2828                         }
2829                         fprintf(stderr, "adding missing dir index/item pair "
2830                                 "for inode %llu\n",
2831                                 (unsigned long long)rec->ino);
2832                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2833                                                     backref->namelen,
2834                                                     backref->dir, &location,
2835                                                     imode_to_type(rec->imode),
2836                                                     backref->index);
2837                         BUG_ON(ret);
2838                         btrfs_commit_transaction(trans, root);
2839                         repaired++;
2840                 }
2841
2842                 if (!delete && (backref->found_inode_ref &&
2843                                 backref->found_dir_index &&
2844                                 backref->found_dir_item &&
2845                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2846                                 !rec->found_inode_item)) {
2847                         ret = create_inode_item(root, rec, 0);
2848                         if (ret)
2849                                 break;
2850                         repaired++;
2851                 }
2852
2853         }
2854         return ret ? ret : repaired;
2855 }
2856
2857 /*
2858  * To determine the file type for nlink/inode_item repair
2859  *
2860  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2861  * Return -ENOENT if file type is not found.
2862  */
2863 static int find_file_type(struct inode_record *rec, u8 *type)
2864 {
2865         struct inode_backref *backref;
2866
2867         /* For inode item recovered case */
2868         if (rec->found_inode_item) {
2869                 *type = imode_to_type(rec->imode);
2870                 return 0;
2871         }
2872
2873         list_for_each_entry(backref, &rec->backrefs, list) {
2874                 if (backref->found_dir_index || backref->found_dir_item) {
2875                         *type = backref->filetype;
2876                         return 0;
2877                 }
2878         }
2879         return -ENOENT;
2880 }
2881
2882 /*
2883  * To determine the file name for nlink repair
2884  *
2885  * Return 0 if file name is found, set name and namelen.
2886  * Return -ENOENT if file name is not found.
2887  */
2888 static int find_file_name(struct inode_record *rec,
2889                           char *name, int *namelen)
2890 {
2891         struct inode_backref *backref;
2892
2893         list_for_each_entry(backref, &rec->backrefs, list) {
2894                 if (backref->found_dir_index || backref->found_dir_item ||
2895                     backref->found_inode_ref) {
2896                         memcpy(name, backref->name, backref->namelen);
2897                         *namelen = backref->namelen;
2898                         return 0;
2899                 }
2900         }
2901         return -ENOENT;
2902 }
2903
2904 /* Reset the nlink of the inode to the correct one */
2905 static int reset_nlink(struct btrfs_trans_handle *trans,
2906                        struct btrfs_root *root,
2907                        struct btrfs_path *path,
2908                        struct inode_record *rec)
2909 {
2910         struct inode_backref *backref;
2911         struct inode_backref *tmp;
2912         struct btrfs_key key;
2913         struct btrfs_inode_item *inode_item;
2914         int ret = 0;
2915
2916         /* We don't believe this either, reset it and iterate backref */
2917         rec->found_link = 0;
2918
2919         /* Remove all backref including the valid ones */
2920         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2921                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2922                                    backref->index, backref->name,
2923                                    backref->namelen, 0);
2924                 if (ret < 0)
2925                         goto out;
2926
2927                 /* remove invalid backref, so it won't be added back */
2928                 if (!(backref->found_dir_index &&
2929                       backref->found_dir_item &&
2930                       backref->found_inode_ref)) {
2931                         list_del(&backref->list);
2932                         free(backref);
2933                 } else {
2934                         rec->found_link++;
2935                 }
2936         }
2937
2938         /* Set nlink to 0 */
2939         key.objectid = rec->ino;
2940         key.type = BTRFS_INODE_ITEM_KEY;
2941         key.offset = 0;
2942         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2943         if (ret < 0)
2944                 goto out;
2945         if (ret > 0) {
2946                 ret = -ENOENT;
2947                 goto out;
2948         }
2949         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2950                                     struct btrfs_inode_item);
2951         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2952         btrfs_mark_buffer_dirty(path->nodes[0]);
2953         btrfs_release_path(path);
2954
2955         /*
2956          * Add back valid inode_ref/dir_item/dir_index,
2957          * add_link() will handle the nlink inc, so new nlink must be correct
2958          */
2959         list_for_each_entry(backref, &rec->backrefs, list) {
2960                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2961                                      backref->name, backref->namelen,
2962                                      backref->filetype, &backref->index, 1);
2963                 if (ret < 0)
2964                         goto out;
2965         }
2966 out:
2967         btrfs_release_path(path);
2968         return ret;
2969 }
2970
2971 static int get_highest_inode(struct btrfs_trans_handle *trans,
2972                                 struct btrfs_root *root,
2973                                 struct btrfs_path *path,
2974                                 u64 *highest_ino)
2975 {
2976         struct btrfs_key key, found_key;
2977         int ret;
2978
2979         btrfs_init_path(path);
2980         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2981         key.offset = -1;
2982         key.type = BTRFS_INODE_ITEM_KEY;
2983         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2984         if (ret == 1) {
2985                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2986                                 path->slots[0] - 1);
2987                 *highest_ino = found_key.objectid;
2988                 ret = 0;
2989         }
2990         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2991                 ret = -EOVERFLOW;
2992         btrfs_release_path(path);
2993         return ret;
2994 }
2995
2996 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2997                                struct btrfs_root *root,
2998                                struct btrfs_path *path,
2999                                struct inode_record *rec)
3000 {
3001         char *dir_name = "lost+found";
3002         char namebuf[BTRFS_NAME_LEN] = {0};
3003         u64 lost_found_ino;
3004         u32 mode = 0700;
3005         u8 type = 0;
3006         int namelen = 0;
3007         int name_recovered = 0;
3008         int type_recovered = 0;
3009         int ret = 0;
3010
3011         /*
3012          * Get file name and type first before these invalid inode ref
3013          * are deleted by remove_all_invalid_backref()
3014          */
3015         name_recovered = !find_file_name(rec, namebuf, &namelen);
3016         type_recovered = !find_file_type(rec, &type);
3017
3018         if (!name_recovered) {
3019                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3020                        rec->ino, rec->ino);
3021                 namelen = count_digits(rec->ino);
3022                 sprintf(namebuf, "%llu", rec->ino);
3023                 name_recovered = 1;
3024         }
3025         if (!type_recovered) {
3026                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3027                        rec->ino);
3028                 type = BTRFS_FT_REG_FILE;
3029                 type_recovered = 1;
3030         }
3031
3032         ret = reset_nlink(trans, root, path, rec);
3033         if (ret < 0) {
3034                 fprintf(stderr,
3035                         "Failed to reset nlink for inode %llu: %s\n",
3036                         rec->ino, strerror(-ret));
3037                 goto out;
3038         }
3039
3040         if (rec->found_link == 0) {
3041                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3042                 if (ret < 0)
3043                         goto out;
3044                 lost_found_ino++;
3045                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3046                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3047                                   mode);
3048                 if (ret < 0) {
3049                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
3050                                 dir_name, strerror(-ret));
3051                         goto out;
3052                 }
3053                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3054                                      namebuf, namelen, type, NULL, 1);
3055                 /*
3056                  * Add ".INO" suffix several times to handle case where
3057                  * "FILENAME.INO" is already taken by another file.
3058                  */
3059                 while (ret == -EEXIST) {
3060                         /*
3061                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
3062                          */
3063                         if (namelen + count_digits(rec->ino) + 1 >
3064                             BTRFS_NAME_LEN) {
3065                                 ret = -EFBIG;
3066                                 goto out;
3067                         }
3068                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3069                                  ".%llu", rec->ino);
3070                         namelen += count_digits(rec->ino) + 1;
3071                         ret = btrfs_add_link(trans, root, rec->ino,
3072                                              lost_found_ino, namebuf,
3073                                              namelen, type, NULL, 1);
3074                 }
3075                 if (ret < 0) {
3076                         fprintf(stderr,
3077                                 "Failed to link the inode %llu to %s dir: %s\n",
3078                                 rec->ino, dir_name, strerror(-ret));
3079                         goto out;
3080                 }
3081                 /*
3082                  * Just increase the found_link, don't actually add the
3083                  * backref. This will make things easier and this inode
3084                  * record will be freed after the repair is done.
3085                  * So fsck will not report problem about this inode.
3086                  */
3087                 rec->found_link++;
3088                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3089                        namelen, namebuf, dir_name);
3090         }
3091         printf("Fixed the nlink of inode %llu\n", rec->ino);
3092 out:
3093         /*
3094          * Clear the flag anyway, or we will loop forever for the same inode
3095          * as it will not be removed from the bad inode list and the dead loop
3096          * happens.
3097          */
3098         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3099         btrfs_release_path(path);
3100         return ret;
3101 }
3102
3103 /*
3104  * Check if there is any normal(reg or prealloc) file extent for given
3105  * ino.
3106  * This is used to determine the file type when neither its dir_index/item or
3107  * inode_item exists.
3108  *
3109  * This will *NOT* report error, if any error happens, just consider it does
3110  * not have any normal file extent.
3111  */
3112 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3113 {
3114         struct btrfs_path path;
3115         struct btrfs_key key;
3116         struct btrfs_key found_key;
3117         struct btrfs_file_extent_item *fi;
3118         u8 type;
3119         int ret = 0;
3120
3121         btrfs_init_path(&path);
3122         key.objectid = ino;
3123         key.type = BTRFS_EXTENT_DATA_KEY;
3124         key.offset = 0;
3125
3126         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3127         if (ret < 0) {
3128                 ret = 0;
3129                 goto out;
3130         }
3131         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3132                 ret = btrfs_next_leaf(root, &path);
3133                 if (ret) {
3134                         ret = 0;
3135                         goto out;
3136                 }
3137         }
3138         while (1) {
3139                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3140                                       path.slots[0]);
3141                 if (found_key.objectid != ino ||
3142                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3143                         break;
3144                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3145                                     struct btrfs_file_extent_item);
3146                 type = btrfs_file_extent_type(path.nodes[0], fi);
3147                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3148                         ret = 1;
3149                         goto out;
3150                 }
3151         }
3152 out:
3153         btrfs_release_path(&path);
3154         return ret;
3155 }
3156
3157 static u32 btrfs_type_to_imode(u8 type)
3158 {
3159         static u32 imode_by_btrfs_type[] = {
3160                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3161                 [BTRFS_FT_DIR]          = S_IFDIR,
3162                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3163                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3164                 [BTRFS_FT_FIFO]         = S_IFIFO,
3165                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3166                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3167         };
3168
3169         return imode_by_btrfs_type[(type)];
3170 }
3171
3172 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3173                                 struct btrfs_root *root,
3174                                 struct btrfs_path *path,
3175                                 struct inode_record *rec)
3176 {
3177         u8 filetype;
3178         u32 mode = 0700;
3179         int type_recovered = 0;
3180         int ret = 0;
3181
3182         printf("Trying to rebuild inode:%llu\n", rec->ino);
3183
3184         type_recovered = !find_file_type(rec, &filetype);
3185
3186         /*
3187          * Try to determine inode type if type not found.
3188          *
3189          * For found regular file extent, it must be FILE.
3190          * For found dir_item/index, it must be DIR.
3191          *
3192          * For undetermined one, use FILE as fallback.
3193          *
3194          * TODO:
3195          * 1. If found backref(inode_index/item is already handled) to it,
3196          *    it must be DIR.
3197          *    Need new inode-inode ref structure to allow search for that.
3198          */
3199         if (!type_recovered) {
3200                 if (rec->found_file_extent &&
3201                     find_normal_file_extent(root, rec->ino)) {
3202                         type_recovered = 1;
3203                         filetype = BTRFS_FT_REG_FILE;
3204                 } else if (rec->found_dir_item) {
3205                         type_recovered = 1;
3206                         filetype = BTRFS_FT_DIR;
3207                 } else if (!list_empty(&rec->orphan_extents)) {
3208                         type_recovered = 1;
3209                         filetype = BTRFS_FT_REG_FILE;
3210                 } else{
3211                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3212                                rec->ino);
3213                         type_recovered = 1;
3214                         filetype = BTRFS_FT_REG_FILE;
3215                 }
3216         }
3217
3218         ret = btrfs_new_inode(trans, root, rec->ino,
3219                               mode | btrfs_type_to_imode(filetype));
3220         if (ret < 0)
3221                 goto out;
3222
3223         /*
3224          * Here inode rebuild is done, we only rebuild the inode item,
3225          * don't repair the nlink(like move to lost+found).
3226          * That is the job of nlink repair.
3227          *
3228          * We just fill the record and return
3229          */
3230         rec->found_dir_item = 1;
3231         rec->imode = mode | btrfs_type_to_imode(filetype);
3232         rec->nlink = 0;
3233         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3234         /* Ensure the inode_nlinks repair function will be called */
3235         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3236 out:
3237         return ret;
3238 }
3239
3240 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3241                                       struct btrfs_root *root,
3242                                       struct btrfs_path *path,
3243                                       struct inode_record *rec)
3244 {
3245         struct orphan_data_extent *orphan;
3246         struct orphan_data_extent *tmp;
3247         int ret = 0;
3248
3249         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3250                 /*
3251                  * Check for conflicting file extents
3252                  *
3253                  * Here we don't know whether the extents is compressed or not,
3254                  * so we can only assume it not compressed nor data offset,
3255                  * and use its disk_len as extent length.
3256                  */
3257                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3258                                        orphan->offset, orphan->disk_len, 0);
3259                 btrfs_release_path(path);
3260                 if (ret < 0)
3261                         goto out;
3262                 if (!ret) {
3263                         fprintf(stderr,
3264                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3265                                 orphan->disk_bytenr, orphan->disk_len);
3266                         ret = btrfs_free_extent(trans,
3267                                         root->fs_info->extent_root,
3268                                         orphan->disk_bytenr, orphan->disk_len,
3269                                         0, root->objectid, orphan->objectid,
3270                                         orphan->offset);
3271                         if (ret < 0)
3272                                 goto out;
3273                 }
3274                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3275                                 orphan->offset, orphan->disk_bytenr,
3276                                 orphan->disk_len, orphan->disk_len);
3277                 if (ret < 0)
3278                         goto out;
3279
3280                 /* Update file size info */
3281                 rec->found_size += orphan->disk_len;
3282                 if (rec->found_size == rec->nbytes)
3283                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3284
3285                 /* Update the file extent hole info too */
3286                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3287                                            orphan->disk_len);
3288                 if (ret < 0)
3289                         goto out;
3290                 if (RB_EMPTY_ROOT(&rec->holes))
3291                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3292
3293                 list_del(&orphan->list);
3294                 free(orphan);
3295         }
3296         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3297 out:
3298         return ret;
3299 }
3300
3301 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3302                                         struct btrfs_root *root,
3303                                         struct btrfs_path *path,
3304                                         struct inode_record *rec)
3305 {
3306         struct rb_node *node;
3307         struct file_extent_hole *hole;
3308         int found = 0;
3309         int ret = 0;
3310
3311         node = rb_first(&rec->holes);
3312
3313         while (node) {
3314                 found = 1;
3315                 hole = rb_entry(node, struct file_extent_hole, node);
3316                 ret = btrfs_punch_hole(trans, root, rec->ino,
3317                                        hole->start, hole->len);
3318                 if (ret < 0)
3319                         goto out;
3320                 ret = del_file_extent_hole(&rec->holes, hole->start,
3321                                            hole->len);
3322                 if (ret < 0)
3323                         goto out;
3324                 if (RB_EMPTY_ROOT(&rec->holes))
3325                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3326                 node = rb_first(&rec->holes);
3327         }
3328         /* special case for a file losing all its file extent */
3329         if (!found) {
3330                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3331                                        round_up(rec->isize,
3332                                                 root->fs_info->sectorsize));
3333                 if (ret < 0)
3334                         goto out;
3335         }
3336         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3337                rec->ino, root->objectid);
3338 out:
3339         return ret;
3340 }
3341
3342 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3343 {
3344         struct btrfs_trans_handle *trans;
3345         struct btrfs_path path;
3346         int ret = 0;
3347
3348         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3349                              I_ERR_NO_ORPHAN_ITEM |
3350                              I_ERR_LINK_COUNT_WRONG |
3351                              I_ERR_NO_INODE_ITEM |
3352                              I_ERR_FILE_EXTENT_ORPHAN |
3353                              I_ERR_FILE_EXTENT_DISCOUNT|
3354                              I_ERR_FILE_NBYTES_WRONG)))
3355                 return rec->errors;
3356
3357         /*
3358          * For nlink repair, it may create a dir and add link, so
3359          * 2 for parent(256)'s dir_index and dir_item
3360          * 2 for lost+found dir's inode_item and inode_ref
3361          * 1 for the new inode_ref of the file
3362          * 2 for lost+found dir's dir_index and dir_item for the file
3363          */
3364         trans = btrfs_start_transaction(root, 7);
3365         if (IS_ERR(trans))
3366                 return PTR_ERR(trans);
3367
3368         btrfs_init_path(&path);
3369         if (rec->errors & I_ERR_NO_INODE_ITEM)
3370                 ret = repair_inode_no_item(trans, root, &path, rec);
3371         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3372                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3373         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3374                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3375         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3376                 ret = repair_inode_isize(trans, root, &path, rec);
3377         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3378                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3379         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3380                 ret = repair_inode_nlinks(trans, root, &path, rec);
3381         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3382                 ret = repair_inode_nbytes(trans, root, &path, rec);
3383         btrfs_commit_transaction(trans, root);
3384         btrfs_release_path(&path);
3385         return ret;
3386 }
3387
3388 static int check_inode_recs(struct btrfs_root *root,
3389                             struct cache_tree *inode_cache)
3390 {
3391         struct cache_extent *cache;
3392         struct ptr_node *node;
3393         struct inode_record *rec;
3394         struct inode_backref *backref;
3395         int stage = 0;
3396         int ret = 0;
3397         int err = 0;
3398         u64 error = 0;
3399         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3400
3401         if (btrfs_root_refs(&root->root_item) == 0) {
3402                 if (!cache_tree_empty(inode_cache))
3403                         fprintf(stderr, "warning line %d\n", __LINE__);
3404                 return 0;
3405         }
3406
3407         /*
3408          * We need to repair backrefs first because we could change some of the
3409          * errors in the inode recs.
3410          *
3411          * We also need to go through and delete invalid backrefs first and then
3412          * add the correct ones second.  We do this because we may get EEXIST
3413          * when adding back the correct index because we hadn't yet deleted the
3414          * invalid index.
3415          *
3416          * For example, if we were missing a dir index then the directories
3417          * isize would be wrong, so if we fixed the isize to what we thought it
3418          * would be and then fixed the backref we'd still have a invalid fs, so
3419          * we need to add back the dir index and then check to see if the isize
3420          * is still wrong.
3421          */
3422         while (stage < 3) {
3423                 stage++;
3424                 if (stage == 3 && !err)
3425                         break;
3426
3427                 cache = search_cache_extent(inode_cache, 0);
3428                 while (repair && cache) {
3429                         node = container_of(cache, struct ptr_node, cache);
3430                         rec = node->data;
3431                         cache = next_cache_extent(cache);
3432
3433                         /* Need to free everything up and rescan */
3434                         if (stage == 3) {
3435                                 remove_cache_extent(inode_cache, &node->cache);
3436                                 free(node);
3437                                 free_inode_rec(rec);
3438                                 continue;
3439                         }
3440
3441                         if (list_empty(&rec->backrefs))
3442                                 continue;
3443
3444                         ret = repair_inode_backrefs(root, rec, inode_cache,
3445                                                     stage == 1);
3446                         if (ret < 0) {
3447                                 err = ret;
3448                                 stage = 2;
3449                                 break;
3450                         } if (ret > 0) {
3451                                 err = -EAGAIN;
3452                         }
3453                 }
3454         }
3455         if (err)
3456                 return err;
3457
3458         rec = get_inode_rec(inode_cache, root_dirid, 0);
3459         BUG_ON(IS_ERR(rec));
3460         if (rec) {
3461                 ret = check_root_dir(rec);
3462                 if (ret) {
3463                         fprintf(stderr, "root %llu root dir %llu error\n",
3464                                 (unsigned long long)root->root_key.objectid,
3465                                 (unsigned long long)root_dirid);
3466                         print_inode_error(root, rec);
3467                         error++;
3468                 }
3469         } else {
3470                 if (repair) {
3471                         struct btrfs_trans_handle *trans;
3472
3473                         trans = btrfs_start_transaction(root, 1);
3474                         if (IS_ERR(trans)) {
3475                                 err = PTR_ERR(trans);
3476                                 return err;
3477                         }
3478
3479                         fprintf(stderr,
3480                                 "root %llu missing its root dir, recreating\n",
3481                                 (unsigned long long)root->objectid);
3482
3483                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3484                         BUG_ON(ret);
3485
3486                         btrfs_commit_transaction(trans, root);
3487                         return -EAGAIN;
3488                 }
3489
3490                 fprintf(stderr, "root %llu root dir %llu not found\n",
3491                         (unsigned long long)root->root_key.objectid,
3492                         (unsigned long long)root_dirid);
3493         }
3494
3495         while (1) {
3496                 cache = search_cache_extent(inode_cache, 0);
3497                 if (!cache)
3498                         break;
3499                 node = container_of(cache, struct ptr_node, cache);
3500                 rec = node->data;
3501                 remove_cache_extent(inode_cache, &node->cache);
3502                 free(node);
3503                 if (rec->ino == root_dirid ||
3504                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3505                         free_inode_rec(rec);
3506                         continue;
3507                 }
3508
3509                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3510                         ret = check_orphan_item(root, rec->ino);
3511                         if (ret == 0)
3512                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3513                         if (can_free_inode_rec(rec)) {
3514                                 free_inode_rec(rec);
3515                                 continue;
3516                         }
3517                 }
3518
3519                 if (!rec->found_inode_item)
3520                         rec->errors |= I_ERR_NO_INODE_ITEM;
3521                 if (rec->found_link != rec->nlink)
3522                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3523                 if (repair) {
3524                         ret = try_repair_inode(root, rec);
3525                         if (ret == 0 && can_free_inode_rec(rec)) {
3526                                 free_inode_rec(rec);
3527                                 continue;
3528                         }
3529                         ret = 0;
3530                 }
3531
3532                 if (!(repair && ret == 0))
3533                         error++;
3534                 print_inode_error(root, rec);
3535                 list_for_each_entry(backref, &rec->backrefs, list) {
3536                         if (!backref->found_dir_item)
3537                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3538                         if (!backref->found_dir_index)
3539                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3540                         if (!backref->found_inode_ref)
3541                                 backref->errors |= REF_ERR_NO_INODE_REF;
3542                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3543                                 " namelen %u name %s filetype %d errors %x",
3544                                 (unsigned long long)backref->dir,
3545                                 (unsigned long long)backref->index,
3546                                 backref->namelen, backref->name,
3547                                 backref->filetype, backref->errors);
3548                         print_ref_error(backref->errors);
3549                 }
3550                 free_inode_rec(rec);
3551         }
3552         return (error > 0) ? -1 : 0;
3553 }
3554
3555 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3556                                         u64 objectid)
3557 {
3558         struct cache_extent *cache;
3559         struct root_record *rec = NULL;
3560         int ret;
3561
3562         cache = lookup_cache_extent(root_cache, objectid, 1);
3563         if (cache) {
3564                 rec = container_of(cache, struct root_record, cache);
3565         } else {
3566                 rec = calloc(1, sizeof(*rec));
3567                 if (!rec)
3568                         return ERR_PTR(-ENOMEM);
3569                 rec->objectid = objectid;
3570                 INIT_LIST_HEAD(&rec->backrefs);
3571                 rec->cache.start = objectid;
3572                 rec->cache.size = 1;
3573
3574                 ret = insert_cache_extent(root_cache, &rec->cache);
3575                 if (ret)
3576                         return ERR_PTR(-EEXIST);
3577         }
3578         return rec;
3579 }
3580
3581 static struct root_backref *get_root_backref(struct root_record *rec,
3582                                              u64 ref_root, u64 dir, u64 index,
3583                                              const char *name, int namelen)
3584 {
3585         struct root_backref *backref;
3586
3587         list_for_each_entry(backref, &rec->backrefs, list) {
3588                 if (backref->ref_root != ref_root || backref->dir != dir ||
3589                     backref->namelen != namelen)
3590                         continue;
3591                 if (memcmp(name, backref->name, namelen))
3592                         continue;
3593                 return backref;
3594         }
3595
3596         backref = calloc(1, sizeof(*backref) + namelen + 1);
3597         if (!backref)
3598                 return NULL;
3599         backref->ref_root = ref_root;
3600         backref->dir = dir;
3601         backref->index = index;
3602         backref->namelen = namelen;
3603         memcpy(backref->name, name, namelen);
3604         backref->name[namelen] = '\0';
3605         list_add_tail(&backref->list, &rec->backrefs);
3606         return backref;
3607 }
3608
3609 static void free_root_record(struct cache_extent *cache)
3610 {
3611         struct root_record *rec;
3612         struct root_backref *backref;
3613
3614         rec = container_of(cache, struct root_record, cache);
3615         while (!list_empty(&rec->backrefs)) {
3616                 backref = to_root_backref(rec->backrefs.next);
3617                 list_del(&backref->list);
3618                 free(backref);
3619         }
3620
3621         free(rec);
3622 }
3623
3624 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3625
3626 static int add_root_backref(struct cache_tree *root_cache,
3627                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3628                             const char *name, int namelen,
3629                             int item_type, int errors)
3630 {
3631         struct root_record *rec;
3632         struct root_backref *backref;
3633
3634         rec = get_root_rec(root_cache, root_id);
3635         BUG_ON(IS_ERR(rec));
3636         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3637         BUG_ON(!backref);
3638
3639         backref->errors |= errors;
3640
3641         if (item_type != BTRFS_DIR_ITEM_KEY) {
3642                 if (backref->found_dir_index || backref->found_back_ref ||
3643                     backref->found_forward_ref) {
3644                         if (backref->index != index)
3645                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3646                 } else {
3647                         backref->index = index;
3648                 }
3649         }
3650
3651         if (item_type == BTRFS_DIR_ITEM_KEY) {
3652                 if (backref->found_forward_ref)
3653                         rec->found_ref++;
3654                 backref->found_dir_item = 1;
3655         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3656                 backref->found_dir_index = 1;
3657         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3658                 if (backref->found_forward_ref)
3659                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3660                 else if (backref->found_dir_item)
3661                         rec->found_ref++;
3662                 backref->found_forward_ref = 1;
3663         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3664                 if (backref->found_back_ref)
3665                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3666                 backref->found_back_ref = 1;
3667         } else {
3668                 BUG_ON(1);
3669         }
3670
3671         if (backref->found_forward_ref && backref->found_dir_item)
3672                 backref->reachable = 1;
3673         return 0;
3674 }
3675
3676 static int merge_root_recs(struct btrfs_root *root,
3677                            struct cache_tree *src_cache,
3678                            struct cache_tree *dst_cache)
3679 {
3680         struct cache_extent *cache;
3681         struct ptr_node *node;
3682         struct inode_record *rec;
3683         struct inode_backref *backref;
3684         int ret = 0;
3685
3686         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3687                 free_inode_recs_tree(src_cache);
3688                 return 0;
3689         }
3690
3691         while (1) {
3692                 cache = search_cache_extent(src_cache, 0);
3693                 if (!cache)
3694                         break;
3695                 node = container_of(cache, struct ptr_node, cache);
3696                 rec = node->data;
3697                 remove_cache_extent(src_cache, &node->cache);
3698                 free(node);
3699
3700                 ret = is_child_root(root, root->objectid, rec->ino);
3701                 if (ret < 0)
3702                         break;
3703                 else if (ret == 0)
3704                         goto skip;
3705
3706                 list_for_each_entry(backref, &rec->backrefs, list) {
3707                         BUG_ON(backref->found_inode_ref);
3708                         if (backref->found_dir_item)
3709                                 add_root_backref(dst_cache, rec->ino,
3710                                         root->root_key.objectid, backref->dir,
3711                                         backref->index, backref->name,
3712                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3713                                         backref->errors);
3714                         if (backref->found_dir_index)
3715                                 add_root_backref(dst_cache, rec->ino,
3716                                         root->root_key.objectid, backref->dir,
3717                                         backref->index, backref->name,
3718                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3719                                         backref->errors);
3720                 }
3721 skip:
3722                 free_inode_rec(rec);
3723         }
3724         if (ret < 0)
3725                 return ret;
3726         return 0;
3727 }
3728
3729 static int check_root_refs(struct btrfs_root *root,
3730                            struct cache_tree *root_cache)
3731 {
3732         struct root_record *rec;
3733         struct root_record *ref_root;
3734         struct root_backref *backref;
3735         struct cache_extent *cache;
3736         int loop = 1;
3737         int ret;
3738         int error;
3739         int errors = 0;
3740
3741         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3742         BUG_ON(IS_ERR(rec));
3743         rec->found_ref = 1;
3744
3745         /* fixme: this can not detect circular references */
3746         while (loop) {
3747                 loop = 0;
3748                 cache = search_cache_extent(root_cache, 0);
3749                 while (1) {
3750                         if (!cache)
3751                                 break;
3752                         rec = container_of(cache, struct root_record, cache);
3753                         cache = next_cache_extent(cache);
3754
3755                         if (rec->found_ref == 0)
3756                                 continue;
3757
3758                         list_for_each_entry(backref, &rec->backrefs, list) {
3759                                 if (!backref->reachable)
3760                                         continue;
3761
3762                                 ref_root = get_root_rec(root_cache,
3763                                                         backref->ref_root);
3764                                 BUG_ON(IS_ERR(ref_root));
3765                                 if (ref_root->found_ref > 0)
3766                                         continue;
3767
3768                                 backref->reachable = 0;
3769                                 rec->found_ref--;
3770                                 if (rec->found_ref == 0)
3771                                         loop = 1;
3772                         }
3773                 }
3774         }
3775
3776         cache = search_cache_extent(root_cache, 0);
3777         while (1) {
3778                 if (!cache)
3779                         break;
3780                 rec = container_of(cache, struct root_record, cache);
3781                 cache = next_cache_extent(cache);
3782
3783                 if (rec->found_ref == 0 &&
3784                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3785                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3786                         ret = check_orphan_item(root->fs_info->tree_root,
3787                                                 rec->objectid);
3788                         if (ret == 0)
3789                                 continue;
3790
3791                         /*
3792                          * If we don't have a root item then we likely just have
3793                          * a dir item in a snapshot for this root but no actual
3794                          * ref key or anything so it's meaningless.
3795                          */
3796                         if (!rec->found_root_item)
3797                                 continue;
3798                         errors++;
3799                         fprintf(stderr, "fs tree %llu not referenced\n",
3800                                 (unsigned long long)rec->objectid);
3801                 }
3802
3803                 error = 0;
3804                 if (rec->found_ref > 0 && !rec->found_root_item)
3805                         error = 1;
3806                 list_for_each_entry(backref, &rec->backrefs, list) {
3807                         if (!backref->found_dir_item)
3808                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3809                         if (!backref->found_dir_index)
3810                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3811                         if (!backref->found_back_ref)
3812                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3813                         if (!backref->found_forward_ref)
3814                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3815                         if (backref->reachable && backref->errors)
3816                                 error = 1;
3817                 }
3818                 if (!error)
3819                         continue;
3820
3821                 errors++;
3822                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3823                         (unsigned long long)rec->objectid, rec->found_ref,
3824                          rec->found_root_item ? "" : "not found");
3825
3826                 list_for_each_entry(backref, &rec->backrefs, list) {
3827                         if (!backref->reachable)
3828                                 continue;
3829                         if (!backref->errors && rec->found_root_item)
3830                                 continue;
3831                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3832                                 " index %llu namelen %u name %s errors %x\n",
3833                                 (unsigned long long)backref->ref_root,
3834                                 (unsigned long long)backref->dir,
3835                                 (unsigned long long)backref->index,
3836                                 backref->namelen, backref->name,
3837                                 backref->errors);
3838                         print_ref_error(backref->errors);
3839                 }
3840         }
3841         return errors > 0 ? 1 : 0;
3842 }
3843
3844 static int process_root_ref(struct extent_buffer *eb, int slot,
3845                             struct btrfs_key *key,
3846                             struct cache_tree *root_cache)
3847 {
3848         u64 dirid;
3849         u64 index;
3850         u32 len;
3851         u32 name_len;
3852         struct btrfs_root_ref *ref;
3853         char namebuf[BTRFS_NAME_LEN];
3854         int error;
3855
3856         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3857
3858         dirid = btrfs_root_ref_dirid(eb, ref);
3859         index = btrfs_root_ref_sequence(eb, ref);
3860         name_len = btrfs_root_ref_name_len(eb, ref);
3861
3862         if (name_len <= BTRFS_NAME_LEN) {
3863                 len = name_len;
3864                 error = 0;
3865         } else {
3866                 len = BTRFS_NAME_LEN;
3867                 error = REF_ERR_NAME_TOO_LONG;
3868         }
3869         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3870
3871         if (key->type == BTRFS_ROOT_REF_KEY) {
3872                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3873                                  index, namebuf, len, key->type, error);
3874         } else {
3875                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3876                                  index, namebuf, len, key->type, error);
3877         }
3878         return 0;
3879 }
3880
3881 static void free_corrupt_block(struct cache_extent *cache)
3882 {
3883         struct btrfs_corrupt_block *corrupt;
3884
3885         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3886         free(corrupt);
3887 }
3888
3889 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3890
3891 /*
3892  * Repair the btree of the given root.
3893  *
3894  * The fix is to remove the node key in corrupt_blocks cache_tree.
3895  * and rebalance the tree.
3896  * After the fix, the btree should be writeable.
3897  */
3898 static int repair_btree(struct btrfs_root *root,
3899                         struct cache_tree *corrupt_blocks)
3900 {
3901         struct btrfs_trans_handle *trans;
3902         struct btrfs_path path;
3903         struct btrfs_corrupt_block *corrupt;
3904         struct cache_extent *cache;
3905         struct btrfs_key key;
3906         u64 offset;
3907         int level;
3908         int ret = 0;
3909
3910         if (cache_tree_empty(corrupt_blocks))
3911                 return 0;
3912
3913         trans = btrfs_start_transaction(root, 1);
3914         if (IS_ERR(trans)) {
3915                 ret = PTR_ERR(trans);
3916                 fprintf(stderr, "Error starting transaction: %s\n",
3917                         strerror(-ret));
3918                 return ret;
3919         }
3920         btrfs_init_path(&path);
3921         cache = first_cache_extent(corrupt_blocks);
3922         while (cache) {
3923                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3924                                        cache);
3925                 level = corrupt->level;
3926                 path.lowest_level = level;
3927                 key.objectid = corrupt->key.objectid;
3928                 key.type = corrupt->key.type;
3929                 key.offset = corrupt->key.offset;
3930
3931                 /*
3932                  * Here we don't want to do any tree balance, since it may
3933                  * cause a balance with corrupted brother leaf/node,
3934                  * so ins_len set to 0 here.
3935                  * Balance will be done after all corrupt node/leaf is deleted.
3936                  */
3937                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3938                 if (ret < 0)
3939                         goto out;
3940                 offset = btrfs_node_blockptr(path.nodes[level],
3941                                              path.slots[level]);
3942
3943                 /* Remove the ptr */
3944                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3945                 if (ret < 0)
3946                         goto out;
3947                 /*
3948                  * Remove the corresponding extent
3949                  * return value is not concerned.
3950                  */
3951                 btrfs_release_path(&path);
3952                 ret = btrfs_free_extent(trans, root, offset,
3953                                 root->fs_info->nodesize, 0,
3954                                 root->root_key.objectid, level - 1, 0);
3955                 cache = next_cache_extent(cache);
3956         }
3957
3958         /* Balance the btree using btrfs_search_slot() */
3959         cache = first_cache_extent(corrupt_blocks);
3960         while (cache) {
3961                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3962                                        cache);
3963                 memcpy(&key, &corrupt->key, sizeof(key));
3964                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3965                 if (ret < 0)
3966                         goto out;
3967                 /* return will always >0 since it won't find the item */
3968                 ret = 0;
3969                 btrfs_release_path(&path);
3970                 cache = next_cache_extent(cache);
3971         }
3972 out:
3973         btrfs_commit_transaction(trans, root);
3974         btrfs_release_path(&path);
3975         return ret;
3976 }
3977
3978 static int check_fs_root(struct btrfs_root *root,
3979                          struct cache_tree *root_cache,
3980                          struct walk_control *wc)
3981 {
3982         int ret = 0;
3983         int err = 0;
3984         int wret;
3985         int level;
3986         struct btrfs_path path;
3987         struct shared_node root_node;
3988         struct root_record *rec;
3989         struct btrfs_root_item *root_item = &root->root_item;
3990         struct cache_tree corrupt_blocks;
3991         struct orphan_data_extent *orphan;
3992         struct orphan_data_extent *tmp;
3993         enum btrfs_tree_block_status status;
3994         struct node_refs nrefs;
3995
3996         /*
3997          * Reuse the corrupt_block cache tree to record corrupted tree block
3998          *
3999          * Unlike the usage in extent tree check, here we do it in a per
4000          * fs/subvol tree base.
4001          */
4002         cache_tree_init(&corrupt_blocks);
4003         root->fs_info->corrupt_blocks = &corrupt_blocks;
4004
4005         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4006                 rec = get_root_rec(root_cache, root->root_key.objectid);
4007                 BUG_ON(IS_ERR(rec));
4008                 if (btrfs_root_refs(root_item) > 0)
4009                         rec->found_root_item = 1;
4010         }
4011
4012         btrfs_init_path(&path);
4013         memset(&root_node, 0, sizeof(root_node));
4014         cache_tree_init(&root_node.root_cache);
4015         cache_tree_init(&root_node.inode_cache);
4016         memset(&nrefs, 0, sizeof(nrefs));
4017
4018         /* Move the orphan extent record to corresponding inode_record */
4019         list_for_each_entry_safe(orphan, tmp,
4020                                  &root->orphan_data_extents, list) {
4021                 struct inode_record *inode;
4022
4023                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4024                                       1);
4025                 BUG_ON(IS_ERR(inode));
4026                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4027                 list_move(&orphan->list, &inode->orphan_extents);
4028         }
4029
4030         level = btrfs_header_level(root->node);
4031         memset(wc->nodes, 0, sizeof(wc->nodes));
4032         wc->nodes[level] = &root_node;
4033         wc->active_node = level;
4034         wc->root_level = level;
4035
4036         /* We may not have checked the root block, lets do that now */
4037         if (btrfs_is_leaf(root->node))
4038                 status = btrfs_check_leaf(root, NULL, root->node);
4039         else
4040                 status = btrfs_check_node(root, NULL, root->node);
4041         if (status != BTRFS_TREE_BLOCK_CLEAN)
4042                 return -EIO;
4043
4044         if (btrfs_root_refs(root_item) > 0 ||
4045             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4046                 path.nodes[level] = root->node;
4047                 extent_buffer_get(root->node);
4048                 path.slots[level] = 0;
4049         } else {
4050                 struct btrfs_key key;
4051                 struct btrfs_disk_key found_key;
4052
4053                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4054                 level = root_item->drop_level;
4055                 path.lowest_level = level;
4056                 if (level > btrfs_header_level(root->node) ||
4057                     level >= BTRFS_MAX_LEVEL) {
4058                         error("ignoring invalid drop level: %u", level);
4059                         goto skip_walking;
4060                 }
4061                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4062                 if (wret < 0)
4063                         goto skip_walking;
4064                 btrfs_node_key(path.nodes[level], &found_key,
4065                                 path.slots[level]);
4066                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4067                                         sizeof(found_key)));
4068         }
4069
4070         while (1) {
4071                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4072                 if (wret < 0)
4073                         ret = wret;
4074                 if (wret != 0)
4075                         break;
4076
4077                 wret = walk_up_tree(root, &path, wc, &level);
4078                 if (wret < 0)
4079                         ret = wret;
4080                 if (wret != 0)
4081                         break;
4082         }
4083 skip_walking:
4084         btrfs_release_path(&path);
4085
4086         if (!cache_tree_empty(&corrupt_blocks)) {
4087                 struct cache_extent *cache;
4088                 struct btrfs_corrupt_block *corrupt;
4089
4090                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4091                        root->root_key.objectid);
4092                 cache = first_cache_extent(&corrupt_blocks);
4093                 while (cache) {
4094                         corrupt = container_of(cache,
4095                                                struct btrfs_corrupt_block,
4096                                                cache);
4097                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4098                                cache->start, corrupt->level,
4099                                corrupt->key.objectid, corrupt->key.type,
4100                                corrupt->key.offset);
4101                         cache = next_cache_extent(cache);
4102                 }
4103                 if (repair) {
4104                         printf("Try to repair the btree for root %llu\n",
4105                                root->root_key.objectid);
4106                         ret = repair_btree(root, &corrupt_blocks);
4107                         if (ret < 0)
4108                                 fprintf(stderr, "Failed to repair btree: %s\n",
4109                                         strerror(-ret));
4110                         if (!ret)
4111                                 printf("Btree for root %llu is fixed\n",
4112                                        root->root_key.objectid);
4113                 }
4114         }
4115
4116         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4117         if (err < 0)
4118                 ret = err;
4119
4120         if (root_node.current) {
4121                 root_node.current->checked = 1;
4122                 maybe_free_inode_rec(&root_node.inode_cache,
4123                                 root_node.current);
4124         }
4125
4126         err = check_inode_recs(root, &root_node.inode_cache);
4127         if (!ret)
4128                 ret = err;
4129
4130         free_corrupt_blocks_tree(&corrupt_blocks);
4131         root->fs_info->corrupt_blocks = NULL;
4132         free_orphan_data_extents(&root->orphan_data_extents);
4133         return ret;
4134 }
4135
4136 static int fs_root_objectid(u64 objectid)
4137 {
4138         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4139             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4140                 return 1;
4141         return is_fstree(objectid);
4142 }
4143
4144 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4145                           struct cache_tree *root_cache)
4146 {
4147         struct btrfs_path path;
4148         struct btrfs_key key;
4149         struct walk_control wc;
4150         struct extent_buffer *leaf, *tree_node;
4151         struct btrfs_root *tmp_root;
4152         struct btrfs_root *tree_root = fs_info->tree_root;
4153         int ret;
4154         int err = 0;
4155
4156         if (ctx.progress_enabled) {
4157                 ctx.tp = TASK_FS_ROOTS;
4158                 task_start(ctx.info);
4159         }
4160
4161         /*
4162          * Just in case we made any changes to the extent tree that weren't
4163          * reflected into the free space cache yet.
4164          */
4165         if (repair)
4166                 reset_cached_block_groups(fs_info);
4167         memset(&wc, 0, sizeof(wc));
4168         cache_tree_init(&wc.shared);
4169         btrfs_init_path(&path);
4170
4171 again:
4172         key.offset = 0;
4173         key.objectid = 0;
4174         key.type = BTRFS_ROOT_ITEM_KEY;
4175         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4176         if (ret < 0) {
4177                 err = 1;
4178                 goto out;
4179         }
4180         tree_node = tree_root->node;
4181         while (1) {
4182                 if (tree_node != tree_root->node) {
4183                         free_root_recs_tree(root_cache);
4184                         btrfs_release_path(&path);
4185                         goto again;
4186                 }
4187                 leaf = path.nodes[0];
4188                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4189                         ret = btrfs_next_leaf(tree_root, &path);
4190                         if (ret) {
4191                                 if (ret < 0)
4192                                         err = 1;
4193                                 break;
4194                         }
4195                         leaf = path.nodes[0];
4196                 }
4197                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4198                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4199                     fs_root_objectid(key.objectid)) {
4200                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4201                                 tmp_root = btrfs_read_fs_root_no_cache(
4202                                                 fs_info, &key);
4203                         } else {
4204                                 key.offset = (u64)-1;
4205                                 tmp_root = btrfs_read_fs_root(
4206                                                 fs_info, &key);
4207                         }
4208                         if (IS_ERR(tmp_root)) {
4209                                 err = 1;
4210                                 goto next;
4211                         }
4212                         ret = check_fs_root(tmp_root, root_cache, &wc);
4213                         if (ret == -EAGAIN) {
4214                                 free_root_recs_tree(root_cache);
4215                                 btrfs_release_path(&path);
4216                                 goto again;
4217                         }
4218                         if (ret)
4219                                 err = 1;
4220                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4221                                 btrfs_free_fs_root(tmp_root);
4222                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4223                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4224                         process_root_ref(leaf, path.slots[0], &key,
4225                                          root_cache);
4226                 }
4227 next:
4228                 path.slots[0]++;
4229         }
4230 out:
4231         btrfs_release_path(&path);
4232         if (err)
4233                 free_extent_cache_tree(&wc.shared);
4234         if (!cache_tree_empty(&wc.shared))
4235                 fprintf(stderr, "warning line %d\n", __LINE__);
4236
4237         task_stop(ctx.info);
4238
4239         return err;
4240 }
4241
4242 /*
4243  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4244  * INODE_REF/INODE_EXTREF match.
4245  *
4246  * @root:       the root of the fs/file tree
4247  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
4248  * @key:        the key of the DIR_ITEM/DIR_INDEX
4249  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
4250  *              distinguish root_dir between normal dir/file
4251  * @name:       the name in the INODE_REF/INODE_EXTREF
4252  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4253  * @mode:       the st_mode of INODE_ITEM
4254  *
4255  * Return 0 if no error occurred.
4256  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4257  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4258  * dir/file.
4259  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4260  * not match for normal dir/file.
4261  */
4262 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4263                          struct btrfs_key *key, u64 index, char *name,
4264                          u32 namelen, u32 mode)
4265 {
4266         struct btrfs_path path;
4267         struct extent_buffer *node;
4268         struct btrfs_dir_item *di;
4269         struct btrfs_key location;
4270         char namebuf[BTRFS_NAME_LEN] = {0};
4271         u32 total;
4272         u32 cur = 0;
4273         u32 len;
4274         u32 name_len;
4275         u32 data_len;
4276         u8 filetype;
4277         int slot;
4278         int ret;
4279
4280         btrfs_init_path(&path);
4281         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4282         if (ret < 0) {
4283                 ret = DIR_ITEM_MISSING;
4284                 goto out;
4285         }
4286
4287         /* Process root dir and goto out*/
4288         if (index == 0) {
4289                 if (ret == 0) {
4290                         ret = ROOT_DIR_ERROR;
4291                         error(
4292                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4293                                 root->objectid,
4294                                 ref_key->type == BTRFS_INODE_REF_KEY ?
4295                                         "REF" : "EXTREF",
4296                                 ref_key->objectid, ref_key->offset,
4297                                 key->type == BTRFS_DIR_ITEM_KEY ?
4298                                         "DIR_ITEM" : "DIR_INDEX");
4299                 } else {
4300                         ret = 0;
4301                 }
4302
4303                 goto out;
4304         }
4305
4306         /* Process normal file/dir */
4307         if (ret > 0) {
4308                 ret = DIR_ITEM_MISSING;
4309                 error(
4310                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4311                         root->objectid,
4312                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4313                         ref_key->objectid, ref_key->offset,
4314                         key->type == BTRFS_DIR_ITEM_KEY ?
4315                                 "DIR_ITEM" : "DIR_INDEX",
4316                         key->objectid, key->offset, namelen, name,
4317                         imode_to_type(mode));
4318                 goto out;
4319         }
4320
4321         /* Check whether inode_id/filetype/name match */
4322         node = path.nodes[0];
4323         slot = path.slots[0];
4324         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4325         total = btrfs_item_size_nr(node, slot);
4326         while (cur < total) {
4327                 ret = DIR_ITEM_MISMATCH;
4328                 name_len = btrfs_dir_name_len(node, di);
4329                 data_len = btrfs_dir_data_len(node, di);
4330
4331                 btrfs_dir_item_key_to_cpu(node, di, &location);
4332                 if (location.objectid != ref_key->objectid ||
4333                     location.type !=  BTRFS_INODE_ITEM_KEY ||
4334                     location.offset != 0)
4335                         goto next;
4336
4337                 filetype = btrfs_dir_type(node, di);
4338                 if (imode_to_type(mode) != filetype)
4339                         goto next;
4340
4341                 if (cur + sizeof(*di) + name_len > total ||
4342                     name_len > BTRFS_NAME_LEN) {
4343                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4344                                 root->objectid,
4345                                 key->type == BTRFS_DIR_ITEM_KEY ?
4346                                 "DIR_ITEM" : "DIR_INDEX",
4347                                 key->objectid, key->offset, name_len);
4348
4349                         if (cur + sizeof(*di) > total)
4350                                 break;
4351                         len = min_t(u32, total - cur - sizeof(*di),
4352                                     BTRFS_NAME_LEN);
4353                 } else {
4354                         len = name_len;
4355                 }
4356
4357                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4358                 if (len != namelen || strncmp(namebuf, name, len))
4359                         goto next;
4360
4361                 ret = 0;
4362                 goto out;
4363 next:
4364                 len = sizeof(*di) + name_len + data_len;
4365                 di = (struct btrfs_dir_item *)((char *)di + len);
4366                 cur += len;
4367         }
4368         if (ret == DIR_ITEM_MISMATCH)
4369                 error(
4370                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4371                         root->objectid,
4372                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4373                         ref_key->objectid, ref_key->offset,
4374                         key->type == BTRFS_DIR_ITEM_KEY ?
4375                                 "DIR_ITEM" : "DIR_INDEX",
4376                         key->objectid, key->offset, namelen, name,
4377                         imode_to_type(mode));
4378 out:
4379         btrfs_release_path(&path);
4380         return ret;
4381 }
4382
4383 /*
4384  * Traverse the given INODE_REF and call find_dir_item() to find related
4385  * DIR_ITEM/DIR_INDEX.
4386  *
4387  * @root:       the root of the fs/file tree
4388  * @ref_key:    the key of the INODE_REF
4389  * @refs:       the count of INODE_REF
4390  * @mode:       the st_mode of INODE_ITEM
4391  *
4392  * Return 0 if no error occurred.
4393  */
4394 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4395                            struct extent_buffer *node, int slot, u64 *refs,
4396                            int mode)
4397 {
4398         struct btrfs_key key;
4399         struct btrfs_inode_ref *ref;
4400         char namebuf[BTRFS_NAME_LEN] = {0};
4401         u32 total;
4402         u32 cur = 0;
4403         u32 len;
4404         u32 name_len;
4405         u64 index;
4406         int ret, err = 0;
4407
4408         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4409         total = btrfs_item_size_nr(node, slot);
4410
4411 next:
4412         /* Update inode ref count */
4413         (*refs)++;
4414
4415         index = btrfs_inode_ref_index(node, ref);
4416         name_len = btrfs_inode_ref_name_len(node, ref);
4417         if (cur + sizeof(*ref) + name_len > total ||
4418             name_len > BTRFS_NAME_LEN) {
4419                 warning("root %llu INODE_REF[%llu %llu] name too long",
4420                         root->objectid, ref_key->objectid, ref_key->offset);
4421
4422                 if (total < cur + sizeof(*ref))
4423                         goto out;
4424                 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4425         } else {
4426                 len = name_len;
4427         }
4428
4429         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4430
4431         /* Check root dir ref name */
4432         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4433                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4434                       root->objectid, ref_key->objectid, ref_key->offset,
4435                       namebuf);
4436                 err |= ROOT_DIR_ERROR;
4437         }
4438
4439         /* Find related DIR_INDEX */
4440         key.objectid = ref_key->offset;
4441         key.type = BTRFS_DIR_INDEX_KEY;
4442         key.offset = index;
4443         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4444         err |= ret;
4445
4446         /* Find related dir_item */
4447         key.objectid = ref_key->offset;
4448         key.type = BTRFS_DIR_ITEM_KEY;
4449         key.offset = btrfs_name_hash(namebuf, len);
4450         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4451         err |= ret;
4452
4453         len = sizeof(*ref) + name_len;
4454         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4455         cur += len;
4456         if (cur < total)
4457                 goto next;
4458
4459 out:
4460         return err;
4461 }
4462
4463 /*
4464  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4465  * DIR_ITEM/DIR_INDEX.
4466  *
4467  * @root:       the root of the fs/file tree
4468  * @ref_key:    the key of the INODE_EXTREF
4469  * @refs:       the count of INODE_EXTREF
4470  * @mode:       the st_mode of INODE_ITEM
4471  *
4472  * Return 0 if no error occurred.
4473  */
4474 static int check_inode_extref(struct btrfs_root *root,
4475                               struct btrfs_key *ref_key,
4476                               struct extent_buffer *node, int slot, u64 *refs,
4477                               int mode)
4478 {
4479         struct btrfs_key key;
4480         struct btrfs_inode_extref *extref;
4481         char namebuf[BTRFS_NAME_LEN] = {0};
4482         u32 total;
4483         u32 cur = 0;
4484         u32 len;
4485         u32 name_len;
4486         u64 index;
4487         u64 parent;
4488         int ret;
4489         int err = 0;
4490
4491         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4492         total = btrfs_item_size_nr(node, slot);
4493
4494 next:
4495         /* update inode ref count */
4496         (*refs)++;
4497         name_len = btrfs_inode_extref_name_len(node, extref);
4498         index = btrfs_inode_extref_index(node, extref);
4499         parent = btrfs_inode_extref_parent(node, extref);
4500         if (name_len <= BTRFS_NAME_LEN) {
4501                 len = name_len;
4502         } else {
4503                 len = BTRFS_NAME_LEN;
4504                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4505                         root->objectid, ref_key->objectid, ref_key->offset);
4506         }
4507         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4508
4509         /* Check root dir ref name */
4510         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4511                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4512                       root->objectid, ref_key->objectid, ref_key->offset,
4513                       namebuf);
4514                 err |= ROOT_DIR_ERROR;
4515         }
4516
4517         /* find related dir_index */
4518         key.objectid = parent;
4519         key.type = BTRFS_DIR_INDEX_KEY;
4520         key.offset = index;
4521         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4522         err |= ret;
4523
4524         /* find related dir_item */
4525         key.objectid = parent;
4526         key.type = BTRFS_DIR_ITEM_KEY;
4527         key.offset = btrfs_name_hash(namebuf, len);
4528         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4529         err |= ret;
4530
4531         len = sizeof(*extref) + name_len;
4532         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4533         cur += len;
4534
4535         if (cur < total)
4536                 goto next;
4537
4538         return err;
4539 }
4540
4541 /*
4542  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4543  * DIR_ITEM/DIR_INDEX match.
4544  *
4545  * @root:       the root of the fs/file tree
4546  * @key:        the key of the INODE_REF/INODE_EXTREF
4547  * @name:       the name in the INODE_REF/INODE_EXTREF
4548  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4549  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4550  * to (u64)-1
4551  * @ext_ref:    the EXTENDED_IREF feature
4552  *
4553  * Return 0 if no error occurred.
4554  * Return >0 for error bitmap
4555  */
4556 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4557                           char *name, int namelen, u64 index,
4558                           unsigned int ext_ref)
4559 {
4560         struct btrfs_path path;
4561         struct btrfs_inode_ref *ref;
4562         struct btrfs_inode_extref *extref;
4563         struct extent_buffer *node;
4564         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4565         u32 total;
4566         u32 cur = 0;
4567         u32 len;
4568         u32 ref_namelen;
4569         u64 ref_index;
4570         u64 parent;
4571         u64 dir_id;
4572         int slot;
4573         int ret;
4574
4575         btrfs_init_path(&path);
4576         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4577         if (ret) {
4578                 ret = INODE_REF_MISSING;
4579                 goto extref;
4580         }
4581
4582         node = path.nodes[0];
4583         slot = path.slots[0];
4584
4585         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4586         total = btrfs_item_size_nr(node, slot);
4587
4588         /* Iterate all entry of INODE_REF */
4589         while (cur < total) {
4590                 ret = INODE_REF_MISSING;
4591
4592                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4593                 ref_index = btrfs_inode_ref_index(node, ref);
4594                 if (index != (u64)-1 && index != ref_index)
4595                         goto next_ref;
4596
4597                 if (cur + sizeof(*ref) + ref_namelen > total ||
4598                     ref_namelen > BTRFS_NAME_LEN) {
4599                         warning("root %llu INODE %s[%llu %llu] name too long",
4600                                 root->objectid,
4601                                 key->type == BTRFS_INODE_REF_KEY ?
4602                                         "REF" : "EXTREF",
4603                                 key->objectid, key->offset);
4604
4605                         if (cur + sizeof(*ref) > total)
4606                                 break;
4607                         len = min_t(u32, total - cur - sizeof(*ref),
4608                                     BTRFS_NAME_LEN);
4609                 } else {
4610                         len = ref_namelen;
4611                 }
4612
4613                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4614                                    len);
4615
4616                 if (len != namelen || strncmp(ref_namebuf, name, len))
4617                         goto next_ref;
4618
4619                 ret = 0;
4620                 goto out;
4621 next_ref:
4622                 len = sizeof(*ref) + ref_namelen;
4623                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4624                 cur += len;
4625         }
4626
4627 extref:
4628         /* Skip if not support EXTENDED_IREF feature */
4629         if (!ext_ref)
4630                 goto out;
4631
4632         btrfs_release_path(&path);
4633         btrfs_init_path(&path);
4634
4635         dir_id = key->offset;
4636         key->type = BTRFS_INODE_EXTREF_KEY;
4637         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4638
4639         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4640         if (ret) {
4641                 ret = INODE_REF_MISSING;
4642                 goto out;
4643         }
4644
4645         node = path.nodes[0];
4646         slot = path.slots[0];
4647
4648         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4649         cur = 0;
4650         total = btrfs_item_size_nr(node, slot);
4651
4652         /* Iterate all entry of INODE_EXTREF */
4653         while (cur < total) {
4654                 ret = INODE_REF_MISSING;
4655
4656                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4657                 ref_index = btrfs_inode_extref_index(node, extref);
4658                 parent = btrfs_inode_extref_parent(node, extref);
4659                 if (index != (u64)-1 && index != ref_index)
4660                         goto next_extref;
4661
4662                 if (parent != dir_id)
4663                         goto next_extref;
4664
4665                 if (ref_namelen <= BTRFS_NAME_LEN) {
4666                         len = ref_namelen;
4667                 } else {
4668                         len = BTRFS_NAME_LEN;
4669                         warning("root %llu INODE %s[%llu %llu] name too long",
4670                                 root->objectid,
4671                                 key->type == BTRFS_INODE_REF_KEY ?
4672                                         "REF" : "EXTREF",
4673                                 key->objectid, key->offset);
4674                 }
4675                 read_extent_buffer(node, ref_namebuf,
4676                                    (unsigned long)(extref + 1), len);
4677
4678                 if (len != namelen || strncmp(ref_namebuf, name, len))
4679                         goto next_extref;
4680
4681                 ret = 0;
4682                 goto out;
4683
4684 next_extref:
4685                 len = sizeof(*extref) + ref_namelen;
4686                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4687                 cur += len;
4688
4689         }
4690 out:
4691         btrfs_release_path(&path);
4692         return ret;
4693 }
4694
4695 /*
4696  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4697  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4698  *
4699  * @root:       the root of the fs/file tree
4700  * @key:        the key of the INODE_REF/INODE_EXTREF
4701  * @size:       the st_size of the INODE_ITEM
4702  * @ext_ref:    the EXTENDED_IREF feature
4703  *
4704  * Return 0 if no error occurred.
4705  */
4706 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4707                           struct extent_buffer *node, int slot, u64 *size,
4708                           unsigned int ext_ref)
4709 {
4710         struct btrfs_dir_item *di;
4711         struct btrfs_inode_item *ii;
4712         struct btrfs_path path;
4713         struct btrfs_key location;
4714         char namebuf[BTRFS_NAME_LEN] = {0};
4715         u32 total;
4716         u32 cur = 0;
4717         u32 len;
4718         u32 name_len;
4719         u32 data_len;
4720         u8 filetype;
4721         u32 mode;
4722         u64 index;
4723         int ret;
4724         int err = 0;
4725
4726         /*
4727          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4728          * ignore index check.
4729          */
4730         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4731
4732         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4733         total = btrfs_item_size_nr(node, slot);
4734
4735         while (cur < total) {
4736                 data_len = btrfs_dir_data_len(node, di);
4737                 if (data_len)
4738                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4739                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4740                               "DIR_ITEM" : "DIR_INDEX",
4741                               key->objectid, key->offset, data_len);
4742
4743                 name_len = btrfs_dir_name_len(node, di);
4744                 if (cur + sizeof(*di) + name_len > total ||
4745                     name_len > BTRFS_NAME_LEN) {
4746                         warning("root %llu %s[%llu %llu] name too long",
4747                                 root->objectid,
4748                                 key->type == BTRFS_DIR_ITEM_KEY ?
4749                                 "DIR_ITEM" : "DIR_INDEX",
4750                                 key->objectid, key->offset);
4751
4752                         if (cur + sizeof(*di) > total)
4753                                 break;
4754                         len = min_t(u32, total - cur - sizeof(*di),
4755                                     BTRFS_NAME_LEN);
4756                 } else {
4757                         len = name_len;
4758                 }
4759                 (*size) += name_len;
4760
4761                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4762                 filetype = btrfs_dir_type(node, di);
4763
4764                 if (key->type == BTRFS_DIR_ITEM_KEY &&
4765                     key->offset != btrfs_name_hash(namebuf, len)) {
4766                         err |= -EIO;
4767                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4768                                 root->objectid, key->objectid, key->offset,
4769                                 namebuf, len, filetype, key->offset,
4770                                 btrfs_name_hash(namebuf, len));
4771                 }
4772
4773                 btrfs_init_path(&path);
4774                 btrfs_dir_item_key_to_cpu(node, di, &location);
4775
4776                 /* Ignore related ROOT_ITEM check */
4777                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4778                         goto next;
4779
4780                 /* Check relative INODE_ITEM(existence/filetype) */
4781                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4782                 if (ret) {
4783                         err |= INODE_ITEM_MISSING;
4784                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4785                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4786                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4787                               key->offset, location.objectid, name_len,
4788                               namebuf, filetype);
4789                         goto next;
4790                 }
4791
4792                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4793                                     struct btrfs_inode_item);
4794                 mode = btrfs_inode_mode(path.nodes[0], ii);
4795
4796                 if (imode_to_type(mode) != filetype) {
4797                         err |= INODE_ITEM_MISMATCH;
4798                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4799                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4800                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4801                               key->offset, name_len, namebuf, filetype);
4802                 }
4803
4804                 /* Check relative INODE_REF/INODE_EXTREF */
4805                 location.type = BTRFS_INODE_REF_KEY;
4806                 location.offset = key->objectid;
4807                 ret = find_inode_ref(root, &location, namebuf, len,
4808                                        index, ext_ref);
4809                 err |= ret;
4810                 if (ret & INODE_REF_MISSING)
4811                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4812                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4813                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4814                               key->offset, name_len, namebuf, filetype);
4815
4816 next:
4817                 btrfs_release_path(&path);
4818                 len = sizeof(*di) + name_len + data_len;
4819                 di = (struct btrfs_dir_item *)((char *)di + len);
4820                 cur += len;
4821
4822                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4823                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4824                               root->objectid, key->objectid, key->offset);
4825                         break;
4826                 }
4827         }
4828
4829         return err;
4830 }
4831
4832 /*
4833  * Check file extent datasum/hole, update the size of the file extents,
4834  * check and update the last offset of the file extent.
4835  *
4836  * @root:       the root of fs/file tree.
4837  * @fkey:       the key of the file extent.
4838  * @nodatasum:  INODE_NODATASUM feature.
4839  * @size:       the sum of all EXTENT_DATA items size for this inode.
4840  * @end:        the offset of the last extent.
4841  *
4842  * Return 0 if no error occurred.
4843  */
4844 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4845                              struct extent_buffer *node, int slot,
4846                              unsigned int nodatasum, u64 *size, u64 *end)
4847 {
4848         struct btrfs_file_extent_item *fi;
4849         u64 disk_bytenr;
4850         u64 disk_num_bytes;
4851         u64 extent_num_bytes;
4852         u64 extent_offset;
4853         u64 csum_found;         /* In byte size, sectorsize aligned */
4854         u64 search_start;       /* Logical range start we search for csum */
4855         u64 search_len;         /* Logical range len we search for csum */
4856         unsigned int extent_type;
4857         unsigned int is_hole;
4858         int compressed = 0;
4859         int ret;
4860         int err = 0;
4861
4862         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4863
4864         /* Check inline extent */
4865         extent_type = btrfs_file_extent_type(node, fi);
4866         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4867                 struct btrfs_item *e = btrfs_item_nr(slot);
4868                 u32 item_inline_len;
4869
4870                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4871                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4872                 compressed = btrfs_file_extent_compression(node, fi);
4873                 if (extent_num_bytes == 0) {
4874                         error(
4875                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4876                                 root->objectid, fkey->objectid, fkey->offset);
4877                         err |= FILE_EXTENT_ERROR;
4878                 }
4879                 if (!compressed && extent_num_bytes != item_inline_len) {
4880                         error(
4881                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4882                                 root->objectid, fkey->objectid, fkey->offset,
4883                                 extent_num_bytes, item_inline_len);
4884                         err |= FILE_EXTENT_ERROR;
4885                 }
4886                 *end += extent_num_bytes;
4887                 *size += extent_num_bytes;
4888                 return err;
4889         }
4890
4891         /* Check extent type */
4892         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4893                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4894                 err |= FILE_EXTENT_ERROR;
4895                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4896                       root->objectid, fkey->objectid, fkey->offset);
4897                 return err;
4898         }
4899
4900         /* Check REG_EXTENT/PREALLOC_EXTENT */
4901         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4902         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4903         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4904         extent_offset = btrfs_file_extent_offset(node, fi);
4905         compressed = btrfs_file_extent_compression(node, fi);
4906         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4907
4908         /*
4909          * Check EXTENT_DATA csum
4910          *
4911          * For plain (uncompressed) extent, we should only check the range
4912          * we're referring to, as it's possible that part of prealloc extent
4913          * has been written, and has csum:
4914          *
4915          * |<--- Original large preallocated extent A ---->|
4916          * |<- Prealloc File Extent ->|<- Regular Extent ->|
4917          *      No csum                         Has csum
4918          *
4919          * For compressed extent, we should check the whole range.
4920          */
4921         if (!compressed) {
4922                 search_start = disk_bytenr + extent_offset;
4923                 search_len = extent_num_bytes;
4924         } else {
4925                 search_start = disk_bytenr;
4926                 search_len = disk_num_bytes;
4927         }
4928         ret = count_csum_range(root, search_start, search_len, &csum_found);
4929         if (csum_found > 0 && nodatasum) {
4930                 err |= ODD_CSUM_ITEM;
4931                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4932                       root->objectid, fkey->objectid, fkey->offset);
4933         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4934                    !is_hole && (ret < 0 || csum_found < search_len)) {
4935                 err |= CSUM_ITEM_MISSING;
4936                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4937                       root->objectid, fkey->objectid, fkey->offset,
4938                       csum_found, search_len);
4939         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4940                 err |= ODD_CSUM_ITEM;
4941                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4942                       root->objectid, fkey->objectid, fkey->offset, csum_found);
4943         }
4944
4945         /* Check EXTENT_DATA hole */
4946         if (!no_holes && *end != fkey->offset) {
4947                 err |= FILE_EXTENT_ERROR;
4948                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4949                       root->objectid, fkey->objectid, fkey->offset);
4950         }
4951
4952         *end += extent_num_bytes;
4953         if (!is_hole)
4954                 *size += extent_num_bytes;
4955
4956         return err;
4957 }
4958
4959 /*
4960  * Set inode item nbytes to @nbytes
4961  *
4962  * Returns  0     on success
4963  * Returns  != 0  on error
4964  */
4965 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
4966                                       struct btrfs_path *path,
4967                                       u64 ino, u64 nbytes)
4968 {
4969         struct btrfs_trans_handle *trans;
4970         struct btrfs_inode_item *ii;
4971         struct btrfs_key key;
4972         struct btrfs_key research_key;
4973         int err = 0;
4974         int ret;
4975
4976         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
4977
4978         key.objectid = ino;
4979         key.type = BTRFS_INODE_ITEM_KEY;
4980         key.offset = 0;
4981
4982         trans = btrfs_start_transaction(root, 1);
4983         if (IS_ERR(trans)) {
4984                 ret = PTR_ERR(trans);
4985                 err |= ret;
4986                 goto out;
4987         }
4988
4989         btrfs_release_path(path);
4990         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4991         if (ret > 0)
4992                 ret = -ENOENT;
4993         if (ret) {
4994                 err |= ret;
4995                 goto fail;
4996         }
4997
4998         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
4999                             struct btrfs_inode_item);
5000         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5001         btrfs_mark_buffer_dirty(path->nodes[0]);
5002 fail:
5003         btrfs_commit_transaction(trans, root);
5004 out:
5005         if (ret)
5006                 error("failed to set nbytes in inode %llu root %llu",
5007                       ino, root->root_key.objectid);
5008         else
5009                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5010                        root->root_key.objectid, nbytes);
5011
5012         /* research path */
5013         btrfs_release_path(path);
5014         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5015         err |= ret;
5016
5017         return err;
5018 }
5019
5020 /*
5021  * Set directory inode isize to @isize.
5022  *
5023  * Returns 0     on success.
5024  * Returns != 0  on error.
5025  */
5026 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5027                                    struct btrfs_path *path,
5028                                    u64 ino, u64 isize)
5029 {
5030         struct btrfs_trans_handle *trans;
5031         struct btrfs_inode_item *ii;
5032         struct btrfs_key key;
5033         struct btrfs_key research_key;
5034         int ret;
5035         int err = 0;
5036
5037         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5038
5039         key.objectid = ino;
5040         key.type = BTRFS_INODE_ITEM_KEY;
5041         key.offset = 0;
5042
5043         trans = btrfs_start_transaction(root, 1);
5044         if (IS_ERR(trans)) {
5045                 ret = PTR_ERR(trans);
5046                 err |= ret;
5047                 goto out;
5048         }
5049
5050         btrfs_release_path(path);
5051         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5052         if (ret > 0)
5053                 ret = -ENOENT;
5054         if (ret) {
5055                 err |= ret;
5056                 goto fail;
5057         }
5058
5059         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5060                             struct btrfs_inode_item);
5061         btrfs_set_inode_size(path->nodes[0], ii, isize);
5062         btrfs_mark_buffer_dirty(path->nodes[0]);
5063 fail:
5064         btrfs_commit_transaction(trans, root);
5065 out:
5066         if (ret)
5067                 error("failed to set isize in inode %llu root %llu",
5068                       ino, root->root_key.objectid);
5069         else
5070                 printf("Set isize in inode %llu root %llu to %llu\n",
5071                        ino, root->root_key.objectid, isize);
5072
5073         btrfs_release_path(path);
5074         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5075         err |= ret;
5076
5077         return err;
5078 }
5079
5080 /*
5081  * Check INODE_ITEM and related ITEMs (the same inode number)
5082  * 1. check link count
5083  * 2. check inode ref/extref
5084  * 3. check dir item/index
5085  *
5086  * @ext_ref:    the EXTENDED_IREF feature
5087  *
5088  * Return 0 if no error occurred.
5089  * Return >0 for error or hit the traversal is done(by error bitmap)
5090  */
5091 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5092                             unsigned int ext_ref)
5093 {
5094         struct extent_buffer *node;
5095         struct btrfs_inode_item *ii;
5096         struct btrfs_key key;
5097         u64 inode_id;
5098         u32 mode;
5099         u64 nlink;
5100         u64 nbytes;
5101         u64 isize;
5102         u64 size = 0;
5103         u64 refs = 0;
5104         u64 extent_end = 0;
5105         u64 extent_size = 0;
5106         unsigned int dir;
5107         unsigned int nodatasum;
5108         int slot;
5109         int ret;
5110         int err = 0;
5111
5112         node = path->nodes[0];
5113         slot = path->slots[0];
5114
5115         btrfs_item_key_to_cpu(node, &key, slot);
5116         inode_id = key.objectid;
5117
5118         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5119                 ret = btrfs_next_item(root, path);
5120                 if (ret > 0)
5121                         err |= LAST_ITEM;
5122                 return err;
5123         }
5124
5125         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5126         isize = btrfs_inode_size(node, ii);
5127         nbytes = btrfs_inode_nbytes(node, ii);
5128         mode = btrfs_inode_mode(node, ii);
5129         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5130         nlink = btrfs_inode_nlink(node, ii);
5131         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5132
5133         while (1) {
5134                 ret = btrfs_next_item(root, path);
5135                 if (ret < 0) {
5136                         /* out will fill 'err' rusing current statistics */
5137                         goto out;
5138                 } else if (ret > 0) {
5139                         err |= LAST_ITEM;
5140                         goto out;
5141                 }
5142
5143                 node = path->nodes[0];
5144                 slot = path->slots[0];
5145                 btrfs_item_key_to_cpu(node, &key, slot);
5146                 if (key.objectid != inode_id)
5147                         goto out;
5148
5149                 switch (key.type) {
5150                 case BTRFS_INODE_REF_KEY:
5151                         ret = check_inode_ref(root, &key, node, slot, &refs,
5152                                               mode);
5153                         err |= ret;
5154                         break;
5155                 case BTRFS_INODE_EXTREF_KEY:
5156                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5157                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5158                                         root->objectid, key.objectid,
5159                                         key.offset);
5160                         ret = check_inode_extref(root, &key, node, slot, &refs,
5161                                                  mode);
5162                         err |= ret;
5163                         break;
5164                 case BTRFS_DIR_ITEM_KEY:
5165                 case BTRFS_DIR_INDEX_KEY:
5166                         if (!dir) {
5167                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5168                                         root->objectid, inode_id,
5169                                         imode_to_type(mode), key.objectid,
5170                                         key.offset);
5171                         }
5172                         ret = check_dir_item(root, &key, node, slot, &size,
5173                                              ext_ref);
5174                         err |= ret;
5175                         break;
5176                 case BTRFS_EXTENT_DATA_KEY:
5177                         if (dir) {
5178                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5179                                         root->objectid, inode_id, key.objectid,
5180                                         key.offset);
5181                         }
5182                         ret = check_file_extent(root, &key, node, slot,
5183                                                 nodatasum, &extent_size,
5184                                                 &extent_end);
5185                         err |= ret;
5186                         break;
5187                 case BTRFS_XATTR_ITEM_KEY:
5188                         break;
5189                 default:
5190                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5191                               key.objectid, key.type, key.offset);
5192                 }
5193         }
5194
5195 out:
5196         /* verify INODE_ITEM nlink/isize/nbytes */
5197         if (dir) {
5198                 if (nlink != 1) {
5199                         err |= LINK_COUNT_ERROR;
5200                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5201                               root->objectid, inode_id, nlink);
5202                 }
5203
5204                 /*
5205                  * Just a warning, as dir inode nbytes is just an
5206                  * instructive value.
5207                  */
5208                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5209                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5210                                 root->objectid, inode_id,
5211                                 root->fs_info->nodesize);
5212                 }
5213
5214                 if (isize != size) {
5215                         if (repair)
5216                                 ret = repair_dir_isize_lowmem(root, path,
5217                                                               inode_id, size);
5218                         if (!repair || ret) {
5219                                 err |= ISIZE_ERROR;
5220                                 error(
5221                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5222                                       root->objectid, inode_id, isize, size);
5223                         }
5224                 }
5225         } else {
5226                 if (nlink != refs) {
5227                         err |= LINK_COUNT_ERROR;
5228                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5229                               root->objectid, inode_id, nlink, refs);
5230                 } else if (!nlink) {
5231                         err |= ORPHAN_ITEM;
5232                 }
5233
5234                 if (!nbytes && !no_holes && extent_end < isize) {
5235                         err |= NBYTES_ERROR;
5236                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5237                               root->objectid, inode_id, isize);
5238                 }
5239
5240                 if (nbytes != extent_size) {
5241                         if (repair)
5242                                 ret = repair_inode_nbytes_lowmem(root, path,
5243                                                          inode_id, extent_size);
5244                         if (!repair || ret) {
5245                                 err |= NBYTES_ERROR;
5246                                 error(
5247         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5248                                       root->objectid, inode_id, nbytes,
5249                                       extent_size);
5250                         }
5251                 }
5252         }
5253
5254         return err;
5255 }
5256
5257 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5258 {
5259         struct btrfs_path path;
5260         struct btrfs_key key;
5261         int err = 0;
5262         int ret;
5263
5264         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5265         key.type = BTRFS_INODE_ITEM_KEY;
5266         key.offset = 0;
5267
5268         /* For root being dropped, we don't need to check first inode */
5269         if (btrfs_root_refs(&root->root_item) == 0 &&
5270             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5271             key.objectid)
5272                 return 0;
5273
5274         btrfs_init_path(&path);
5275
5276         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5277         if (ret < 0)
5278                 goto out;
5279         if (ret > 0) {
5280                 ret = 0;
5281                 err |= INODE_ITEM_MISSING;
5282                 error("first inode item of root %llu is missing",
5283                       root->objectid);
5284         }
5285
5286         err |= check_inode_item(root, &path, ext_ref);
5287         err &= ~LAST_ITEM;
5288         if (err && !ret)
5289                 ret = -EIO;
5290 out:
5291         btrfs_release_path(&path);
5292         return ret;
5293 }
5294
5295 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5296                                                 u64 parent, u64 root)
5297 {
5298         struct rb_node *node;
5299         struct tree_backref *back = NULL;
5300         struct tree_backref match = {
5301                 .node = {
5302                         .is_data = 0,
5303                 },
5304         };
5305
5306         if (parent) {
5307                 match.parent = parent;
5308                 match.node.full_backref = 1;
5309         } else {
5310                 match.root = root;
5311         }
5312
5313         node = rb_search(&rec->backref_tree, &match.node.node,
5314                          (rb_compare_keys)compare_extent_backref, NULL);
5315         if (node)
5316                 back = to_tree_backref(rb_node_to_extent_backref(node));
5317
5318         return back;
5319 }
5320
5321 static struct data_backref *find_data_backref(struct extent_record *rec,
5322                                                 u64 parent, u64 root,
5323                                                 u64 owner, u64 offset,
5324                                                 int found_ref,
5325                                                 u64 disk_bytenr, u64 bytes)
5326 {
5327         struct rb_node *node;
5328         struct data_backref *back = NULL;
5329         struct data_backref match = {
5330                 .node = {
5331                         .is_data = 1,
5332                 },
5333                 .owner = owner,
5334                 .offset = offset,
5335                 .bytes = bytes,
5336                 .found_ref = found_ref,
5337                 .disk_bytenr = disk_bytenr,
5338         };
5339
5340         if (parent) {
5341                 match.parent = parent;
5342                 match.node.full_backref = 1;
5343         } else {
5344                 match.root = root;
5345         }
5346
5347         node = rb_search(&rec->backref_tree, &match.node.node,
5348                          (rb_compare_keys)compare_extent_backref, NULL);
5349         if (node)
5350                 back = to_data_backref(rb_node_to_extent_backref(node));
5351
5352         return back;
5353 }
5354 /*
5355  * Iterate all item on the tree and call check_inode_item() to check.
5356  *
5357  * @root:       the root of the tree to be checked.
5358  * @ext_ref:    the EXTENDED_IREF feature
5359  *
5360  * Return 0 if no error found.
5361  * Return <0 for error.
5362  */
5363 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5364 {
5365         struct btrfs_path path;
5366         struct node_refs nrefs;
5367         struct btrfs_root_item *root_item = &root->root_item;
5368         int ret;
5369         int level;
5370         int err = 0;
5371
5372         /*
5373          * We need to manually check the first inode item(256)
5374          * As the following traversal function will only start from
5375          * the first inode item in the leaf, if inode item(256) is missing
5376          * we will just skip it forever.
5377          */
5378         ret = check_fs_first_inode(root, ext_ref);
5379         if (ret < 0)
5380                 return ret;
5381
5382         memset(&nrefs, 0, sizeof(nrefs));
5383         level = btrfs_header_level(root->node);
5384         btrfs_init_path(&path);
5385
5386         if (btrfs_root_refs(root_item) > 0 ||
5387             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5388                 path.nodes[level] = root->node;
5389                 path.slots[level] = 0;
5390                 extent_buffer_get(root->node);
5391         } else {
5392                 struct btrfs_key key;
5393
5394                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5395                 level = root_item->drop_level;
5396                 path.lowest_level = level;
5397                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5398                 if (ret < 0)
5399                         goto out;
5400                 ret = 0;
5401         }
5402
5403         while (1) {
5404                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5405                 err |= !!ret;
5406
5407                 /* if ret is negative, walk shall stop */
5408                 if (ret < 0) {
5409                         ret = err;
5410                         break;
5411                 }
5412
5413                 ret = walk_up_tree_v2(root, &path, &level);
5414                 if (ret != 0) {
5415                         /* Normal exit, reset ret to err */
5416                         ret = err;
5417                         break;
5418                 }
5419         }
5420
5421 out:
5422         btrfs_release_path(&path);
5423         return ret;
5424 }
5425
5426 /*
5427  * Find the relative ref for root_ref and root_backref.
5428  *
5429  * @root:       the root of the root tree.
5430  * @ref_key:    the key of the root ref.
5431  *
5432  * Return 0 if no error occurred.
5433  */
5434 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5435                           struct extent_buffer *node, int slot)
5436 {
5437         struct btrfs_path path;
5438         struct btrfs_key key;
5439         struct btrfs_root_ref *ref;
5440         struct btrfs_root_ref *backref;
5441         char ref_name[BTRFS_NAME_LEN] = {0};
5442         char backref_name[BTRFS_NAME_LEN] = {0};
5443         u64 ref_dirid;
5444         u64 ref_seq;
5445         u32 ref_namelen;
5446         u64 backref_dirid;
5447         u64 backref_seq;
5448         u32 backref_namelen;
5449         u32 len;
5450         int ret;
5451         int err = 0;
5452
5453         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5454         ref_dirid = btrfs_root_ref_dirid(node, ref);
5455         ref_seq = btrfs_root_ref_sequence(node, ref);
5456         ref_namelen = btrfs_root_ref_name_len(node, ref);
5457
5458         if (ref_namelen <= BTRFS_NAME_LEN) {
5459                 len = ref_namelen;
5460         } else {
5461                 len = BTRFS_NAME_LEN;
5462                 warning("%s[%llu %llu] ref_name too long",
5463                         ref_key->type == BTRFS_ROOT_REF_KEY ?
5464                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5465                         ref_key->offset);
5466         }
5467         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5468
5469         /* Find relative root_ref */
5470         key.objectid = ref_key->offset;
5471         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5472         key.offset = ref_key->objectid;
5473
5474         btrfs_init_path(&path);
5475         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5476         if (ret) {
5477                 err |= ROOT_REF_MISSING;
5478                 error("%s[%llu %llu] couldn't find relative ref",
5479                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5480                       "ROOT_REF" : "ROOT_BACKREF",
5481                       ref_key->objectid, ref_key->offset);
5482                 goto out;
5483         }
5484
5485         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5486                                  struct btrfs_root_ref);
5487         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5488         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5489         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5490
5491         if (backref_namelen <= BTRFS_NAME_LEN) {
5492                 len = backref_namelen;
5493         } else {
5494                 len = BTRFS_NAME_LEN;
5495                 warning("%s[%llu %llu] ref_name too long",
5496                         key.type == BTRFS_ROOT_REF_KEY ?
5497                         "ROOT_REF" : "ROOT_BACKREF",
5498                         key.objectid, key.offset);
5499         }
5500         read_extent_buffer(path.nodes[0], backref_name,
5501                            (unsigned long)(backref + 1), len);
5502
5503         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5504             ref_namelen != backref_namelen ||
5505             strncmp(ref_name, backref_name, len)) {
5506                 err |= ROOT_REF_MISMATCH;
5507                 error("%s[%llu %llu] mismatch relative ref",
5508                       ref_key->type == BTRFS_ROOT_REF_KEY ?
5509                       "ROOT_REF" : "ROOT_BACKREF",
5510                       ref_key->objectid, ref_key->offset);
5511         }
5512 out:
5513         btrfs_release_path(&path);
5514         return err;
5515 }
5516
5517 /*
5518  * Check all fs/file tree in low_memory mode.
5519  *
5520  * 1. for fs tree root item, call check_fs_root_v2()
5521  * 2. for fs tree root ref/backref, call check_root_ref()
5522  *
5523  * Return 0 if no error occurred.
5524  */
5525 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5526 {
5527         struct btrfs_root *tree_root = fs_info->tree_root;
5528         struct btrfs_root *cur_root = NULL;
5529         struct btrfs_path path;
5530         struct btrfs_key key;
5531         struct extent_buffer *node;
5532         unsigned int ext_ref;
5533         int slot;
5534         int ret;
5535         int err = 0;
5536
5537         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5538
5539         btrfs_init_path(&path);
5540         key.objectid = BTRFS_FS_TREE_OBJECTID;
5541         key.offset = 0;
5542         key.type = BTRFS_ROOT_ITEM_KEY;
5543
5544         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5545         if (ret < 0) {
5546                 err = ret;
5547                 goto out;
5548         } else if (ret > 0) {
5549                 err = -ENOENT;
5550                 goto out;
5551         }
5552
5553         while (1) {
5554                 node = path.nodes[0];
5555                 slot = path.slots[0];
5556                 btrfs_item_key_to_cpu(node, &key, slot);
5557                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5558                         goto out;
5559                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5560                     fs_root_objectid(key.objectid)) {
5561                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5562                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5563                                                                        &key);
5564                         } else {
5565                                 key.offset = (u64)-1;
5566                                 cur_root = btrfs_read_fs_root(fs_info, &key);
5567                         }
5568
5569                         if (IS_ERR(cur_root)) {
5570                                 error("Fail to read fs/subvol tree: %lld",
5571                                       key.objectid);
5572                                 err = -EIO;
5573                                 goto next;
5574                         }
5575
5576                         ret = check_fs_root_v2(cur_root, ext_ref);
5577                         err |= ret;
5578
5579                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5580                                 btrfs_free_fs_root(cur_root);
5581                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5582                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
5583                         ret = check_root_ref(tree_root, &key, node, slot);
5584                         err |= ret;
5585                 }
5586 next:
5587                 ret = btrfs_next_item(tree_root, &path);
5588                 if (ret > 0)
5589                         goto out;
5590                 if (ret < 0) {
5591                         err = ret;
5592                         goto out;
5593                 }
5594         }
5595
5596 out:
5597         btrfs_release_path(&path);
5598         return err;
5599 }
5600
5601 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5602                           struct cache_tree *root_cache)
5603 {
5604         int ret;
5605
5606         if (!ctx.progress_enabled)
5607                 fprintf(stderr, "checking fs roots\n");
5608         if (check_mode == CHECK_MODE_LOWMEM)
5609                 ret = check_fs_roots_v2(fs_info);
5610         else
5611                 ret = check_fs_roots(fs_info, root_cache);
5612
5613         return ret;
5614 }
5615
5616 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5617 {
5618         struct extent_backref *back, *tmp;
5619         struct tree_backref *tback;
5620         struct data_backref *dback;
5621         u64 found = 0;
5622         int err = 0;
5623
5624         rbtree_postorder_for_each_entry_safe(back, tmp,
5625                                              &rec->backref_tree, node) {
5626                 if (!back->found_extent_tree) {
5627                         err = 1;
5628                         if (!print_errs)
5629                                 goto out;
5630                         if (back->is_data) {
5631                                 dback = to_data_backref(back);
5632                                 fprintf(stderr, "Data backref %llu %s %llu"
5633                                         " owner %llu offset %llu num_refs %lu"
5634                                         " not found in extent tree\n",
5635                                         (unsigned long long)rec->start,
5636                                         back->full_backref ?
5637                                         "parent" : "root",
5638                                         back->full_backref ?
5639                                         (unsigned long long)dback->parent:
5640                                         (unsigned long long)dback->root,
5641                                         (unsigned long long)dback->owner,
5642                                         (unsigned long long)dback->offset,
5643                                         (unsigned long)dback->num_refs);
5644                         } else {
5645                                 tback = to_tree_backref(back);
5646                                 fprintf(stderr, "Tree backref %llu parent %llu"
5647                                         " root %llu not found in extent tree\n",
5648                                         (unsigned long long)rec->start,
5649                                         (unsigned long long)tback->parent,
5650                                         (unsigned long long)tback->root);
5651                         }
5652                 }
5653                 if (!back->is_data && !back->found_ref) {
5654                         err = 1;
5655                         if (!print_errs)
5656                                 goto out;
5657                         tback = to_tree_backref(back);
5658                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5659                                 (unsigned long long)rec->start,
5660                                 back->full_backref ? "parent" : "root",
5661                                 back->full_backref ?
5662                                 (unsigned long long)tback->parent :
5663                                 (unsigned long long)tback->root, back);
5664                 }
5665                 if (back->is_data) {
5666                         dback = to_data_backref(back);
5667                         if (dback->found_ref != dback->num_refs) {
5668                                 err = 1;
5669                                 if (!print_errs)
5670                                         goto out;
5671                                 fprintf(stderr, "Incorrect local backref count"
5672                                         " on %llu %s %llu owner %llu"
5673                                         " offset %llu found %u wanted %u back %p\n",
5674                                         (unsigned long long)rec->start,
5675                                         back->full_backref ?
5676                                         "parent" : "root",
5677                                         back->full_backref ?
5678                                         (unsigned long long)dback->parent:
5679                                         (unsigned long long)dback->root,
5680                                         (unsigned long long)dback->owner,
5681                                         (unsigned long long)dback->offset,
5682                                         dback->found_ref, dback->num_refs, back);
5683                         }
5684                         if (dback->disk_bytenr != rec->start) {
5685                                 err = 1;
5686                                 if (!print_errs)
5687                                         goto out;
5688                                 fprintf(stderr, "Backref disk bytenr does not"
5689                                         " match extent record, bytenr=%llu, "
5690                                         "ref bytenr=%llu\n",
5691                                         (unsigned long long)rec->start,
5692                                         (unsigned long long)dback->disk_bytenr);
5693                         }
5694
5695                         if (dback->bytes != rec->nr) {
5696                                 err = 1;
5697                                 if (!print_errs)
5698                                         goto out;
5699                                 fprintf(stderr, "Backref bytes do not match "
5700                                         "extent backref, bytenr=%llu, ref "
5701                                         "bytes=%llu, backref bytes=%llu\n",
5702                                         (unsigned long long)rec->start,
5703                                         (unsigned long long)rec->nr,
5704                                         (unsigned long long)dback->bytes);
5705                         }
5706                 }
5707                 if (!back->is_data) {
5708                         found += 1;
5709                 } else {
5710                         dback = to_data_backref(back);
5711                         found += dback->found_ref;
5712                 }
5713         }
5714         if (found != rec->refs) {
5715                 err = 1;
5716                 if (!print_errs)
5717                         goto out;
5718                 fprintf(stderr, "Incorrect global backref count "
5719                         "on %llu found %llu wanted %llu\n",
5720                         (unsigned long long)rec->start,
5721                         (unsigned long long)found,
5722                         (unsigned long long)rec->refs);
5723         }
5724 out:
5725         return err;
5726 }
5727
5728 static void __free_one_backref(struct rb_node *node)
5729 {
5730         struct extent_backref *back = rb_node_to_extent_backref(node);
5731
5732         free(back);
5733 }
5734
5735 static void free_all_extent_backrefs(struct extent_record *rec)
5736 {
5737         rb_free_nodes(&rec->backref_tree, __free_one_backref);
5738 }
5739
5740 static void free_extent_record_cache(struct cache_tree *extent_cache)
5741 {
5742         struct cache_extent *cache;
5743         struct extent_record *rec;
5744
5745         while (1) {
5746                 cache = first_cache_extent(extent_cache);
5747                 if (!cache)
5748                         break;
5749                 rec = container_of(cache, struct extent_record, cache);
5750                 remove_cache_extent(extent_cache, cache);
5751                 free_all_extent_backrefs(rec);
5752                 free(rec);
5753         }
5754 }
5755
5756 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5757                                  struct extent_record *rec)
5758 {
5759         if (rec->content_checked && rec->owner_ref_checked &&
5760             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5761             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5762             !rec->bad_full_backref && !rec->crossing_stripes &&
5763             !rec->wrong_chunk_type) {
5764                 remove_cache_extent(extent_cache, &rec->cache);
5765                 free_all_extent_backrefs(rec);
5766                 list_del_init(&rec->list);
5767                 free(rec);
5768         }
5769         return 0;
5770 }
5771
5772 static int check_owner_ref(struct btrfs_root *root,
5773                             struct extent_record *rec,
5774                             struct extent_buffer *buf)
5775 {
5776         struct extent_backref *node, *tmp;
5777         struct tree_backref *back;
5778         struct btrfs_root *ref_root;
5779         struct btrfs_key key;
5780         struct btrfs_path path;
5781         struct extent_buffer *parent;
5782         int level;
5783         int found = 0;
5784         int ret;
5785
5786         rbtree_postorder_for_each_entry_safe(node, tmp,
5787                                              &rec->backref_tree, node) {
5788                 if (node->is_data)
5789                         continue;
5790                 if (!node->found_ref)
5791                         continue;
5792                 if (node->full_backref)
5793                         continue;
5794                 back = to_tree_backref(node);
5795                 if (btrfs_header_owner(buf) == back->root)
5796                         return 0;
5797         }
5798         BUG_ON(rec->is_root);
5799
5800         /* try to find the block by search corresponding fs tree */
5801         key.objectid = btrfs_header_owner(buf);
5802         key.type = BTRFS_ROOT_ITEM_KEY;
5803         key.offset = (u64)-1;
5804
5805         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5806         if (IS_ERR(ref_root))
5807                 return 1;
5808
5809         level = btrfs_header_level(buf);
5810         if (level == 0)
5811                 btrfs_item_key_to_cpu(buf, &key, 0);
5812         else
5813                 btrfs_node_key_to_cpu(buf, &key, 0);
5814
5815         btrfs_init_path(&path);
5816         path.lowest_level = level + 1;
5817         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5818         if (ret < 0)
5819                 return 0;
5820
5821         parent = path.nodes[level + 1];
5822         if (parent && buf->start == btrfs_node_blockptr(parent,
5823                                                         path.slots[level + 1]))
5824                 found = 1;
5825
5826         btrfs_release_path(&path);
5827         return found ? 0 : 1;
5828 }
5829
5830 static int is_extent_tree_record(struct extent_record *rec)
5831 {
5832         struct extent_backref *node, *tmp;
5833         struct tree_backref *back;
5834         int is_extent = 0;
5835
5836         rbtree_postorder_for_each_entry_safe(node, tmp,
5837                                              &rec->backref_tree, node) {
5838                 if (node->is_data)
5839                         return 0;
5840                 back = to_tree_backref(node);
5841                 if (node->full_backref)
5842                         return 0;
5843                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5844                         is_extent = 1;
5845         }
5846         return is_extent;
5847 }
5848
5849
5850 static int record_bad_block_io(struct btrfs_fs_info *info,
5851                                struct cache_tree *extent_cache,
5852                                u64 start, u64 len)
5853 {
5854         struct extent_record *rec;
5855         struct cache_extent *cache;
5856         struct btrfs_key key;
5857
5858         cache = lookup_cache_extent(extent_cache, start, len);
5859         if (!cache)
5860                 return 0;
5861
5862         rec = container_of(cache, struct extent_record, cache);
5863         if (!is_extent_tree_record(rec))
5864                 return 0;
5865
5866         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5867         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5868 }
5869
5870 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5871                        struct extent_buffer *buf, int slot)
5872 {
5873         if (btrfs_header_level(buf)) {
5874                 struct btrfs_key_ptr ptr1, ptr2;
5875
5876                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5877                                    sizeof(struct btrfs_key_ptr));
5878                 read_extent_buffer(buf, &ptr2,
5879                                    btrfs_node_key_ptr_offset(slot + 1),
5880                                    sizeof(struct btrfs_key_ptr));
5881                 write_extent_buffer(buf, &ptr1,
5882                                     btrfs_node_key_ptr_offset(slot + 1),
5883                                     sizeof(struct btrfs_key_ptr));
5884                 write_extent_buffer(buf, &ptr2,
5885                                     btrfs_node_key_ptr_offset(slot),
5886                                     sizeof(struct btrfs_key_ptr));
5887                 if (slot == 0) {
5888                         struct btrfs_disk_key key;
5889                         btrfs_node_key(buf, &key, 0);
5890                         btrfs_fixup_low_keys(root, path, &key,
5891                                              btrfs_header_level(buf) + 1);
5892                 }
5893         } else {
5894                 struct btrfs_item *item1, *item2;
5895                 struct btrfs_key k1, k2;
5896                 char *item1_data, *item2_data;
5897                 u32 item1_offset, item2_offset, item1_size, item2_size;
5898
5899                 item1 = btrfs_item_nr(slot);
5900                 item2 = btrfs_item_nr(slot + 1);
5901                 btrfs_item_key_to_cpu(buf, &k1, slot);
5902                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5903                 item1_offset = btrfs_item_offset(buf, item1);
5904                 item2_offset = btrfs_item_offset(buf, item2);
5905                 item1_size = btrfs_item_size(buf, item1);
5906                 item2_size = btrfs_item_size(buf, item2);
5907
5908                 item1_data = malloc(item1_size);
5909                 if (!item1_data)
5910                         return -ENOMEM;
5911                 item2_data = malloc(item2_size);
5912                 if (!item2_data) {
5913                         free(item1_data);
5914                         return -ENOMEM;
5915                 }
5916
5917                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5918                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5919
5920                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5921                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5922                 free(item1_data);
5923                 free(item2_data);
5924
5925                 btrfs_set_item_offset(buf, item1, item2_offset);
5926                 btrfs_set_item_offset(buf, item2, item1_offset);
5927                 btrfs_set_item_size(buf, item1, item2_size);
5928                 btrfs_set_item_size(buf, item2, item1_size);
5929
5930                 path->slots[0] = slot;
5931                 btrfs_set_item_key_unsafe(root, path, &k2);
5932                 path->slots[0] = slot + 1;
5933                 btrfs_set_item_key_unsafe(root, path, &k1);
5934         }
5935         return 0;
5936 }
5937
5938 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5939 {
5940         struct extent_buffer *buf;
5941         struct btrfs_key k1, k2;
5942         int i;
5943         int level = path->lowest_level;
5944         int ret = -EIO;
5945
5946         buf = path->nodes[level];
5947         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5948                 if (level) {
5949                         btrfs_node_key_to_cpu(buf, &k1, i);
5950                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5951                 } else {
5952                         btrfs_item_key_to_cpu(buf, &k1, i);
5953                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5954                 }
5955                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5956                         continue;
5957                 ret = swap_values(root, path, buf, i);
5958                 if (ret)
5959                         break;
5960                 btrfs_mark_buffer_dirty(buf);
5961                 i = 0;
5962         }
5963         return ret;
5964 }
5965
5966 static int delete_bogus_item(struct btrfs_root *root,
5967                              struct btrfs_path *path,
5968                              struct extent_buffer *buf, int slot)
5969 {
5970         struct btrfs_key key;
5971         int nritems = btrfs_header_nritems(buf);
5972
5973         btrfs_item_key_to_cpu(buf, &key, slot);
5974
5975         /* These are all the keys we can deal with missing. */
5976         if (key.type != BTRFS_DIR_INDEX_KEY &&
5977             key.type != BTRFS_EXTENT_ITEM_KEY &&
5978             key.type != BTRFS_METADATA_ITEM_KEY &&
5979             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5980             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5981                 return -1;
5982
5983         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5984                (unsigned long long)key.objectid, key.type,
5985                (unsigned long long)key.offset, slot, buf->start);
5986         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5987                               btrfs_item_nr_offset(slot + 1),
5988                               sizeof(struct btrfs_item) *
5989                               (nritems - slot - 1));
5990         btrfs_set_header_nritems(buf, nritems - 1);
5991         if (slot == 0) {
5992                 struct btrfs_disk_key disk_key;
5993
5994                 btrfs_item_key(buf, &disk_key, 0);
5995                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5996         }
5997         btrfs_mark_buffer_dirty(buf);
5998         return 0;
5999 }
6000
6001 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6002 {
6003         struct extent_buffer *buf;
6004         int i;
6005         int ret = 0;
6006
6007         /* We should only get this for leaves */
6008         BUG_ON(path->lowest_level);
6009         buf = path->nodes[0];
6010 again:
6011         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6012                 unsigned int shift = 0, offset;
6013
6014                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6015                     BTRFS_LEAF_DATA_SIZE(root)) {
6016                         if (btrfs_item_end_nr(buf, i) >
6017                             BTRFS_LEAF_DATA_SIZE(root)) {
6018                                 ret = delete_bogus_item(root, path, buf, i);
6019                                 if (!ret)
6020                                         goto again;
6021                                 fprintf(stderr, "item is off the end of the "
6022                                         "leaf, can't fix\n");
6023                                 ret = -EIO;
6024                                 break;
6025                         }
6026                         shift = BTRFS_LEAF_DATA_SIZE(root) -
6027                                 btrfs_item_end_nr(buf, i);
6028                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6029                            btrfs_item_offset_nr(buf, i - 1)) {
6030                         if (btrfs_item_end_nr(buf, i) >
6031                             btrfs_item_offset_nr(buf, i - 1)) {
6032                                 ret = delete_bogus_item(root, path, buf, i);
6033                                 if (!ret)
6034                                         goto again;
6035                                 fprintf(stderr, "items overlap, can't fix\n");
6036                                 ret = -EIO;
6037                                 break;
6038                         }
6039                         shift = btrfs_item_offset_nr(buf, i - 1) -
6040                                 btrfs_item_end_nr(buf, i);
6041                 }
6042                 if (!shift)
6043                         continue;
6044
6045                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6046                        i, shift, (unsigned long long)buf->start);
6047                 offset = btrfs_item_offset_nr(buf, i);
6048                 memmove_extent_buffer(buf,
6049                                       btrfs_leaf_data(buf) + offset + shift,
6050                                       btrfs_leaf_data(buf) + offset,
6051                                       btrfs_item_size_nr(buf, i));
6052                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6053                                       offset + shift);
6054                 btrfs_mark_buffer_dirty(buf);
6055         }
6056
6057         /*
6058          * We may have moved things, in which case we want to exit so we don't
6059          * write those changes out.  Once we have proper abort functionality in
6060          * progs this can be changed to something nicer.
6061          */
6062         BUG_ON(ret);
6063         return ret;
6064 }
6065
6066 /*
6067  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6068  * then just return -EIO.
6069  */
6070 static int try_to_fix_bad_block(struct btrfs_root *root,
6071                                 struct extent_buffer *buf,
6072                                 enum btrfs_tree_block_status status)
6073 {
6074         struct btrfs_trans_handle *trans;
6075         struct ulist *roots;
6076         struct ulist_node *node;
6077         struct btrfs_root *search_root;
6078         struct btrfs_path path;
6079         struct ulist_iterator iter;
6080         struct btrfs_key root_key, key;
6081         int ret;
6082
6083         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6084             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6085                 return -EIO;
6086
6087         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6088         if (ret)
6089                 return -EIO;
6090
6091         btrfs_init_path(&path);
6092         ULIST_ITER_INIT(&iter);
6093         while ((node = ulist_next(roots, &iter))) {
6094                 root_key.objectid = node->val;
6095                 root_key.type = BTRFS_ROOT_ITEM_KEY;
6096                 root_key.offset = (u64)-1;
6097
6098                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6099                 if (IS_ERR(root)) {
6100                         ret = -EIO;
6101                         break;
6102                 }
6103
6104
6105                 trans = btrfs_start_transaction(search_root, 0);
6106                 if (IS_ERR(trans)) {
6107                         ret = PTR_ERR(trans);
6108                         break;
6109                 }
6110
6111                 path.lowest_level = btrfs_header_level(buf);
6112                 path.skip_check_block = 1;
6113                 if (path.lowest_level)
6114                         btrfs_node_key_to_cpu(buf, &key, 0);
6115                 else
6116                         btrfs_item_key_to_cpu(buf, &key, 0);
6117                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6118                 if (ret) {
6119                         ret = -EIO;
6120                         btrfs_commit_transaction(trans, search_root);
6121                         break;
6122                 }
6123                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6124                         ret = fix_key_order(search_root, &path);
6125                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6126                         ret = fix_item_offset(search_root, &path);
6127                 if (ret) {
6128                         btrfs_commit_transaction(trans, search_root);
6129                         break;
6130                 }
6131                 btrfs_release_path(&path);
6132                 btrfs_commit_transaction(trans, search_root);
6133         }
6134         ulist_free(roots);
6135         btrfs_release_path(&path);
6136         return ret;
6137 }
6138
6139 static int check_block(struct btrfs_root *root,
6140                        struct cache_tree *extent_cache,
6141                        struct extent_buffer *buf, u64 flags)
6142 {
6143         struct extent_record *rec;
6144         struct cache_extent *cache;
6145         struct btrfs_key key;
6146         enum btrfs_tree_block_status status;
6147         int ret = 0;
6148         int level;
6149
6150         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6151         if (!cache)
6152                 return 1;
6153         rec = container_of(cache, struct extent_record, cache);
6154         rec->generation = btrfs_header_generation(buf);
6155
6156         level = btrfs_header_level(buf);
6157         if (btrfs_header_nritems(buf) > 0) {
6158
6159                 if (level == 0)
6160                         btrfs_item_key_to_cpu(buf, &key, 0);
6161                 else
6162                         btrfs_node_key_to_cpu(buf, &key, 0);
6163
6164                 rec->info_objectid = key.objectid;
6165         }
6166         rec->info_level = level;
6167
6168         if (btrfs_is_leaf(buf))
6169                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6170         else
6171                 status = btrfs_check_node(root, &rec->parent_key, buf);
6172
6173         if (status != BTRFS_TREE_BLOCK_CLEAN) {
6174                 if (repair)
6175                         status = try_to_fix_bad_block(root, buf, status);
6176                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6177                         ret = -EIO;
6178                         fprintf(stderr, "bad block %llu\n",
6179                                 (unsigned long long)buf->start);
6180                 } else {
6181                         /*
6182                          * Signal to callers we need to start the scan over
6183                          * again since we'll have cowed blocks.
6184                          */
6185                         ret = -EAGAIN;
6186                 }
6187         } else {
6188                 rec->content_checked = 1;
6189                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6190                         rec->owner_ref_checked = 1;
6191                 else {
6192                         ret = check_owner_ref(root, rec, buf);
6193                         if (!ret)
6194                                 rec->owner_ref_checked = 1;
6195                 }
6196         }
6197         if (!ret)
6198                 maybe_free_extent_rec(extent_cache, rec);
6199         return ret;
6200 }
6201
6202 #if 0
6203 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6204                                                 u64 parent, u64 root)
6205 {
6206         struct list_head *cur = rec->backrefs.next;
6207         struct extent_backref *node;
6208         struct tree_backref *back;
6209
6210         while(cur != &rec->backrefs) {
6211                 node = to_extent_backref(cur);
6212                 cur = cur->next;
6213                 if (node->is_data)
6214                         continue;
6215                 back = to_tree_backref(node);
6216                 if (parent > 0) {
6217                         if (!node->full_backref)
6218                                 continue;
6219                         if (parent == back->parent)
6220                                 return back;
6221                 } else {
6222                         if (node->full_backref)
6223                                 continue;
6224                         if (back->root == root)
6225                                 return back;
6226                 }
6227         }
6228         return NULL;
6229 }
6230 #endif
6231
6232 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6233                                                 u64 parent, u64 root)
6234 {
6235         struct tree_backref *ref = malloc(sizeof(*ref));
6236
6237         if (!ref)
6238                 return NULL;
6239         memset(&ref->node, 0, sizeof(ref->node));
6240         if (parent > 0) {
6241                 ref->parent = parent;
6242                 ref->node.full_backref = 1;
6243         } else {
6244                 ref->root = root;
6245                 ref->node.full_backref = 0;
6246         }
6247
6248         return ref;
6249 }
6250
6251 #if 0
6252 static struct data_backref *find_data_backref(struct extent_record *rec,
6253                                                 u64 parent, u64 root,
6254                                                 u64 owner, u64 offset,
6255                                                 int found_ref,
6256                                                 u64 disk_bytenr, u64 bytes)
6257 {
6258         struct list_head *cur = rec->backrefs.next;
6259         struct extent_backref *node;
6260         struct data_backref *back;
6261
6262         while(cur != &rec->backrefs) {
6263                 node = to_extent_backref(cur);
6264                 cur = cur->next;
6265                 if (!node->is_data)
6266                         continue;
6267                 back = to_data_backref(node);
6268                 if (parent > 0) {
6269                         if (!node->full_backref)
6270                                 continue;
6271                         if (parent == back->parent)
6272                                 return back;
6273                 } else {
6274                         if (node->full_backref)
6275                                 continue;
6276                         if (back->root == root && back->owner == owner &&
6277                             back->offset == offset) {
6278                                 if (found_ref && node->found_ref &&
6279                                     (back->bytes != bytes ||
6280                                     back->disk_bytenr != disk_bytenr))
6281                                         continue;
6282                                 return back;
6283                         }
6284                 }
6285         }
6286         return NULL;
6287 }
6288 #endif
6289
6290 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6291                                                 u64 parent, u64 root,
6292                                                 u64 owner, u64 offset,
6293                                                 u64 max_size)
6294 {
6295         struct data_backref *ref = malloc(sizeof(*ref));
6296
6297         if (!ref)
6298                 return NULL;
6299         memset(&ref->node, 0, sizeof(ref->node));
6300         ref->node.is_data = 1;
6301
6302         if (parent > 0) {
6303                 ref->parent = parent;
6304                 ref->owner = 0;
6305                 ref->offset = 0;
6306                 ref->node.full_backref = 1;
6307         } else {
6308                 ref->root = root;
6309                 ref->owner = owner;
6310                 ref->offset = offset;
6311                 ref->node.full_backref = 0;
6312         }
6313         ref->bytes = max_size;
6314         ref->found_ref = 0;
6315         ref->num_refs = 0;
6316         if (max_size > rec->max_size)
6317                 rec->max_size = max_size;
6318         return ref;
6319 }
6320
6321 /* Check if the type of extent matches with its chunk */
6322 static void check_extent_type(struct extent_record *rec)
6323 {
6324         struct btrfs_block_group_cache *bg_cache;
6325
6326         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6327         if (!bg_cache)
6328                 return;
6329
6330         /* data extent, check chunk directly*/
6331         if (!rec->metadata) {
6332                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6333                         rec->wrong_chunk_type = 1;
6334                 return;
6335         }
6336
6337         /* metadata extent, check the obvious case first */
6338         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6339                                  BTRFS_BLOCK_GROUP_METADATA))) {
6340                 rec->wrong_chunk_type = 1;
6341                 return;
6342         }
6343
6344         /*
6345          * Check SYSTEM extent, as it's also marked as metadata, we can only
6346          * make sure it's a SYSTEM extent by its backref
6347          */
6348         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6349                 struct extent_backref *node;
6350                 struct tree_backref *tback;
6351                 u64 bg_type;
6352
6353                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6354                 if (node->is_data) {
6355                         /* tree block shouldn't have data backref */
6356                         rec->wrong_chunk_type = 1;
6357                         return;
6358                 }
6359                 tback = container_of(node, struct tree_backref, node);
6360
6361                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6362                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6363                 else
6364                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6365                 if (!(bg_cache->flags & bg_type))
6366                         rec->wrong_chunk_type = 1;
6367         }
6368 }
6369
6370 /*
6371  * Allocate a new extent record, fill default values from @tmpl and insert int
6372  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6373  * the cache, otherwise it fails.
6374  */
6375 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6376                 struct extent_record *tmpl)
6377 {
6378         struct extent_record *rec;
6379         int ret = 0;
6380
6381         BUG_ON(tmpl->max_size == 0);
6382         rec = malloc(sizeof(*rec));
6383         if (!rec)
6384                 return -ENOMEM;
6385         rec->start = tmpl->start;
6386         rec->max_size = tmpl->max_size;
6387         rec->nr = max(tmpl->nr, tmpl->max_size);
6388         rec->found_rec = tmpl->found_rec;
6389         rec->content_checked = tmpl->content_checked;
6390         rec->owner_ref_checked = tmpl->owner_ref_checked;
6391         rec->num_duplicates = 0;
6392         rec->metadata = tmpl->metadata;
6393         rec->flag_block_full_backref = FLAG_UNSET;
6394         rec->bad_full_backref = 0;
6395         rec->crossing_stripes = 0;
6396         rec->wrong_chunk_type = 0;
6397         rec->is_root = tmpl->is_root;
6398         rec->refs = tmpl->refs;
6399         rec->extent_item_refs = tmpl->extent_item_refs;
6400         rec->parent_generation = tmpl->parent_generation;
6401         INIT_LIST_HEAD(&rec->backrefs);
6402         INIT_LIST_HEAD(&rec->dups);
6403         INIT_LIST_HEAD(&rec->list);
6404         rec->backref_tree = RB_ROOT;
6405         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6406         rec->cache.start = tmpl->start;
6407         rec->cache.size = tmpl->nr;
6408         ret = insert_cache_extent(extent_cache, &rec->cache);
6409         if (ret) {
6410                 free(rec);
6411                 return ret;
6412         }
6413         bytes_used += rec->nr;
6414
6415         if (tmpl->metadata)
6416                 rec->crossing_stripes = check_crossing_stripes(global_info,
6417                                 rec->start, global_info->nodesize);
6418         check_extent_type(rec);
6419         return ret;
6420 }
6421
6422 /*
6423  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6424  * some are hints:
6425  * - refs              - if found, increase refs
6426  * - is_root           - if found, set
6427  * - content_checked   - if found, set
6428  * - owner_ref_checked - if found, set
6429  *
6430  * If not found, create a new one, initialize and insert.
6431  */
6432 static int add_extent_rec(struct cache_tree *extent_cache,
6433                 struct extent_record *tmpl)
6434 {
6435         struct extent_record *rec;
6436         struct cache_extent *cache;
6437         int ret = 0;
6438         int dup = 0;
6439
6440         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6441         if (cache) {
6442                 rec = container_of(cache, struct extent_record, cache);
6443                 if (tmpl->refs)
6444                         rec->refs++;
6445                 if (rec->nr == 1)
6446                         rec->nr = max(tmpl->nr, tmpl->max_size);
6447
6448                 /*
6449                  * We need to make sure to reset nr to whatever the extent
6450                  * record says was the real size, this way we can compare it to
6451                  * the backrefs.
6452                  */
6453                 if (tmpl->found_rec) {
6454                         if (tmpl->start != rec->start || rec->found_rec) {
6455                                 struct extent_record *tmp;
6456
6457                                 dup = 1;
6458                                 if (list_empty(&rec->list))
6459                                         list_add_tail(&rec->list,
6460                                                       &duplicate_extents);
6461
6462                                 /*
6463                                  * We have to do this song and dance in case we
6464                                  * find an extent record that falls inside of
6465                                  * our current extent record but does not have
6466                                  * the same objectid.
6467                                  */
6468                                 tmp = malloc(sizeof(*tmp));
6469                                 if (!tmp)
6470                                         return -ENOMEM;
6471                                 tmp->start = tmpl->start;
6472                                 tmp->max_size = tmpl->max_size;
6473                                 tmp->nr = tmpl->nr;
6474                                 tmp->found_rec = 1;
6475                                 tmp->metadata = tmpl->metadata;
6476                                 tmp->extent_item_refs = tmpl->extent_item_refs;
6477                                 INIT_LIST_HEAD(&tmp->list);
6478                                 list_add_tail(&tmp->list, &rec->dups);
6479                                 rec->num_duplicates++;
6480                         } else {
6481                                 rec->nr = tmpl->nr;
6482                                 rec->found_rec = 1;
6483                         }
6484                 }
6485
6486                 if (tmpl->extent_item_refs && !dup) {
6487                         if (rec->extent_item_refs) {
6488                                 fprintf(stderr, "block %llu rec "
6489                                         "extent_item_refs %llu, passed %llu\n",
6490                                         (unsigned long long)tmpl->start,
6491                                         (unsigned long long)
6492                                                         rec->extent_item_refs,
6493                                         (unsigned long long)tmpl->extent_item_refs);
6494                         }
6495                         rec->extent_item_refs = tmpl->extent_item_refs;
6496                 }
6497                 if (tmpl->is_root)
6498                         rec->is_root = 1;
6499                 if (tmpl->content_checked)
6500                         rec->content_checked = 1;
6501                 if (tmpl->owner_ref_checked)
6502                         rec->owner_ref_checked = 1;
6503                 memcpy(&rec->parent_key, &tmpl->parent_key,
6504                                 sizeof(tmpl->parent_key));
6505                 if (tmpl->parent_generation)
6506                         rec->parent_generation = tmpl->parent_generation;
6507                 if (rec->max_size < tmpl->max_size)
6508                         rec->max_size = tmpl->max_size;
6509
6510                 /*
6511                  * A metadata extent can't cross stripe_len boundary, otherwise
6512                  * kernel scrub won't be able to handle it.
6513                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6514                  * it.
6515                  */
6516                 if (tmpl->metadata)
6517                         rec->crossing_stripes = check_crossing_stripes(
6518                                         global_info, rec->start,
6519                                         global_info->nodesize);
6520                 check_extent_type(rec);
6521                 maybe_free_extent_rec(extent_cache, rec);
6522                 return ret;
6523         }
6524
6525         ret = add_extent_rec_nolookup(extent_cache, tmpl);
6526
6527         return ret;
6528 }
6529
6530 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6531                             u64 parent, u64 root, int found_ref)
6532 {
6533         struct extent_record *rec;
6534         struct tree_backref *back;
6535         struct cache_extent *cache;
6536         int ret;
6537         bool insert = false;
6538
6539         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6540         if (!cache) {
6541                 struct extent_record tmpl;
6542
6543                 memset(&tmpl, 0, sizeof(tmpl));
6544                 tmpl.start = bytenr;
6545                 tmpl.nr = 1;
6546                 tmpl.metadata = 1;
6547                 tmpl.max_size = 1;
6548
6549                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6550                 if (ret)
6551                         return ret;
6552
6553                 /* really a bug in cache_extent implement now */
6554                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6555                 if (!cache)
6556                         return -ENOENT;
6557         }
6558
6559         rec = container_of(cache, struct extent_record, cache);
6560         if (rec->start != bytenr) {
6561                 /*
6562                  * Several cause, from unaligned bytenr to over lapping extents
6563                  */
6564                 return -EEXIST;
6565         }
6566
6567         back = find_tree_backref(rec, parent, root);
6568         if (!back) {
6569                 back = alloc_tree_backref(rec, parent, root);
6570                 if (!back)
6571                         return -ENOMEM;
6572                 insert = true;
6573         }
6574
6575         if (found_ref) {
6576                 if (back->node.found_ref) {
6577                         fprintf(stderr, "Extent back ref already exists "
6578                                 "for %llu parent %llu root %llu \n",
6579                                 (unsigned long long)bytenr,
6580                                 (unsigned long long)parent,
6581                                 (unsigned long long)root);
6582                 }
6583                 back->node.found_ref = 1;
6584         } else {
6585                 if (back->node.found_extent_tree) {
6586                         fprintf(stderr, "Extent back ref already exists "
6587                                 "for %llu parent %llu root %llu \n",
6588                                 (unsigned long long)bytenr,
6589                                 (unsigned long long)parent,
6590                                 (unsigned long long)root);
6591                 }
6592                 back->node.found_extent_tree = 1;
6593         }
6594         if (insert)
6595                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6596                         compare_extent_backref));
6597         check_extent_type(rec);
6598         maybe_free_extent_rec(extent_cache, rec);
6599         return 0;
6600 }
6601
6602 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6603                             u64 parent, u64 root, u64 owner, u64 offset,
6604                             u32 num_refs, int found_ref, u64 max_size)
6605 {
6606         struct extent_record *rec;
6607         struct data_backref *back;
6608         struct cache_extent *cache;
6609         int ret;
6610         bool insert = false;
6611
6612         cache = lookup_cache_extent(extent_cache, bytenr, 1);
6613         if (!cache) {
6614                 struct extent_record tmpl;
6615
6616                 memset(&tmpl, 0, sizeof(tmpl));
6617                 tmpl.start = bytenr;
6618                 tmpl.nr = 1;
6619                 tmpl.max_size = max_size;
6620
6621                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6622                 if (ret)
6623                         return ret;
6624
6625                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6626                 if (!cache)
6627                         abort();
6628         }
6629
6630         rec = container_of(cache, struct extent_record, cache);
6631         if (rec->max_size < max_size)
6632                 rec->max_size = max_size;
6633
6634         /*
6635          * If found_ref is set then max_size is the real size and must match the
6636          * existing refs.  So if we have already found a ref then we need to
6637          * make sure that this ref matches the existing one, otherwise we need
6638          * to add a new backref so we can notice that the backrefs don't match
6639          * and we need to figure out who is telling the truth.  This is to
6640          * account for that awful fsync bug I introduced where we'd end up with
6641          * a btrfs_file_extent_item that would have its length include multiple
6642          * prealloc extents or point inside of a prealloc extent.
6643          */
6644         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6645                                  bytenr, max_size);
6646         if (!back) {
6647                 back = alloc_data_backref(rec, parent, root, owner, offset,
6648                                           max_size);
6649                 BUG_ON(!back);
6650                 insert = true;
6651         }
6652
6653         if (found_ref) {
6654                 BUG_ON(num_refs != 1);
6655                 if (back->node.found_ref)
6656                         BUG_ON(back->bytes != max_size);
6657                 back->node.found_ref = 1;
6658                 back->found_ref += 1;
6659                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
6660                         back->bytes = max_size;
6661                         back->disk_bytenr = bytenr;
6662
6663                         /* Need to reinsert if not already in the tree */
6664                         if (!insert) {
6665                                 rb_erase(&back->node.node, &rec->backref_tree);
6666                                 insert = true;
6667                         }
6668                 }
6669                 rec->refs += 1;
6670                 rec->content_checked = 1;
6671                 rec->owner_ref_checked = 1;
6672         } else {
6673                 if (back->node.found_extent_tree) {
6674                         fprintf(stderr, "Extent back ref already exists "
6675                                 "for %llu parent %llu root %llu "
6676                                 "owner %llu offset %llu num_refs %lu\n",
6677                                 (unsigned long long)bytenr,
6678                                 (unsigned long long)parent,
6679                                 (unsigned long long)root,
6680                                 (unsigned long long)owner,
6681                                 (unsigned long long)offset,
6682                                 (unsigned long)num_refs);
6683                 }
6684                 back->num_refs = num_refs;
6685                 back->node.found_extent_tree = 1;
6686         }
6687         if (insert)
6688                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6689                         compare_extent_backref));
6690
6691         maybe_free_extent_rec(extent_cache, rec);
6692         return 0;
6693 }
6694
6695 static int add_pending(struct cache_tree *pending,
6696                        struct cache_tree *seen, u64 bytenr, u32 size)
6697 {
6698         int ret;
6699         ret = add_cache_extent(seen, bytenr, size);
6700         if (ret)
6701                 return ret;
6702         add_cache_extent(pending, bytenr, size);
6703         return 0;
6704 }
6705
6706 static int pick_next_pending(struct cache_tree *pending,
6707                         struct cache_tree *reada,
6708                         struct cache_tree *nodes,
6709                         u64 last, struct block_info *bits, int bits_nr,
6710                         int *reada_bits)
6711 {
6712         unsigned long node_start = last;
6713         struct cache_extent *cache;
6714         int ret;
6715
6716         cache = search_cache_extent(reada, 0);
6717         if (cache) {
6718                 bits[0].start = cache->start;
6719                 bits[0].size = cache->size;
6720                 *reada_bits = 1;
6721                 return 1;
6722         }
6723         *reada_bits = 0;
6724         if (node_start > 32768)
6725                 node_start -= 32768;
6726
6727         cache = search_cache_extent(nodes, node_start);
6728         if (!cache)
6729                 cache = search_cache_extent(nodes, 0);
6730
6731         if (!cache) {
6732                  cache = search_cache_extent(pending, 0);
6733                  if (!cache)
6734                          return 0;
6735                  ret = 0;
6736                  do {
6737                          bits[ret].start = cache->start;
6738                          bits[ret].size = cache->size;
6739                          cache = next_cache_extent(cache);
6740                          ret++;
6741                  } while (cache && ret < bits_nr);
6742                  return ret;
6743         }
6744
6745         ret = 0;
6746         do {
6747                 bits[ret].start = cache->start;
6748                 bits[ret].size = cache->size;
6749                 cache = next_cache_extent(cache);
6750                 ret++;
6751         } while (cache && ret < bits_nr);
6752
6753         if (bits_nr - ret > 8) {
6754                 u64 lookup = bits[0].start + bits[0].size;
6755                 struct cache_extent *next;
6756                 next = search_cache_extent(pending, lookup);
6757                 while(next) {
6758                         if (next->start - lookup > 32768)
6759                                 break;
6760                         bits[ret].start = next->start;
6761                         bits[ret].size = next->size;
6762                         lookup = next->start + next->size;
6763                         ret++;
6764                         if (ret == bits_nr)
6765                                 break;
6766                         next = next_cache_extent(next);
6767                         if (!next)
6768                                 break;
6769                 }
6770         }
6771         return ret;
6772 }
6773
6774 static void free_chunk_record(struct cache_extent *cache)
6775 {
6776         struct chunk_record *rec;
6777
6778         rec = container_of(cache, struct chunk_record, cache);
6779         list_del_init(&rec->list);
6780         list_del_init(&rec->dextents);
6781         free(rec);
6782 }
6783
6784 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6785 {
6786         cache_tree_free_extents(chunk_cache, free_chunk_record);
6787 }
6788
6789 static void free_device_record(struct rb_node *node)
6790 {
6791         struct device_record *rec;
6792
6793         rec = container_of(node, struct device_record, node);
6794         free(rec);
6795 }
6796
6797 FREE_RB_BASED_TREE(device_cache, free_device_record);
6798
6799 int insert_block_group_record(struct block_group_tree *tree,
6800                               struct block_group_record *bg_rec)
6801 {
6802         int ret;
6803
6804         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6805         if (ret)
6806                 return ret;
6807
6808         list_add_tail(&bg_rec->list, &tree->block_groups);
6809         return 0;
6810 }
6811
6812 static void free_block_group_record(struct cache_extent *cache)
6813 {
6814         struct block_group_record *rec;
6815
6816         rec = container_of(cache, struct block_group_record, cache);
6817         list_del_init(&rec->list);
6818         free(rec);
6819 }
6820
6821 void free_block_group_tree(struct block_group_tree *tree)
6822 {
6823         cache_tree_free_extents(&tree->tree, free_block_group_record);
6824 }
6825
6826 int insert_device_extent_record(struct device_extent_tree *tree,
6827                                 struct device_extent_record *de_rec)
6828 {
6829         int ret;
6830
6831         /*
6832          * Device extent is a bit different from the other extents, because
6833          * the extents which belong to the different devices may have the
6834          * same start and size, so we need use the special extent cache
6835          * search/insert functions.
6836          */
6837         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6838         if (ret)
6839                 return ret;
6840
6841         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6842         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6843         return 0;
6844 }
6845
6846 static void free_device_extent_record(struct cache_extent *cache)
6847 {
6848         struct device_extent_record *rec;
6849
6850         rec = container_of(cache, struct device_extent_record, cache);
6851         if (!list_empty(&rec->chunk_list))
6852                 list_del_init(&rec->chunk_list);
6853         if (!list_empty(&rec->device_list))
6854                 list_del_init(&rec->device_list);
6855         free(rec);
6856 }
6857
6858 void free_device_extent_tree(struct device_extent_tree *tree)
6859 {
6860         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6861 }
6862
6863 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6864 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6865                                  struct extent_buffer *leaf, int slot)
6866 {
6867         struct btrfs_extent_ref_v0 *ref0;
6868         struct btrfs_key key;
6869         int ret;
6870
6871         btrfs_item_key_to_cpu(leaf, &key, slot);
6872         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6873         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6874                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6875                                 0, 0);
6876         } else {
6877                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6878                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6879         }
6880         return ret;
6881 }
6882 #endif
6883
6884 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6885                                             struct btrfs_key *key,
6886                                             int slot)
6887 {
6888         struct btrfs_chunk *ptr;
6889         struct chunk_record *rec;
6890         int num_stripes, i;
6891
6892         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6893         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6894
6895         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6896         if (!rec) {
6897                 fprintf(stderr, "memory allocation failed\n");
6898                 exit(-1);
6899         }
6900
6901         INIT_LIST_HEAD(&rec->list);
6902         INIT_LIST_HEAD(&rec->dextents);
6903         rec->bg_rec = NULL;
6904
6905         rec->cache.start = key->offset;
6906         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6907
6908         rec->generation = btrfs_header_generation(leaf);
6909
6910         rec->objectid = key->objectid;
6911         rec->type = key->type;
6912         rec->offset = key->offset;
6913
6914         rec->length = rec->cache.size;
6915         rec->owner = btrfs_chunk_owner(leaf, ptr);
6916         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6917         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6918         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6919         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6920         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6921         rec->num_stripes = num_stripes;
6922         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6923
6924         for (i = 0; i < rec->num_stripes; ++i) {
6925                 rec->stripes[i].devid =
6926                         btrfs_stripe_devid_nr(leaf, ptr, i);
6927                 rec->stripes[i].offset =
6928                         btrfs_stripe_offset_nr(leaf, ptr, i);
6929                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6930                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6931                                 BTRFS_UUID_SIZE);
6932         }
6933
6934         return rec;
6935 }
6936
6937 static int process_chunk_item(struct cache_tree *chunk_cache,
6938                               struct btrfs_key *key, struct extent_buffer *eb,
6939                               int slot)
6940 {
6941         struct chunk_record *rec;
6942         struct btrfs_chunk *chunk;
6943         int ret = 0;
6944
6945         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6946         /*
6947          * Do extra check for this chunk item,
6948          *
6949          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6950          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6951          * and owner<->key_type check.
6952          */
6953         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6954                                       key->offset);
6955         if (ret < 0) {
6956                 error("chunk(%llu, %llu) is not valid, ignore it",
6957                       key->offset, btrfs_chunk_length(eb, chunk));
6958                 return 0;
6959         }
6960         rec = btrfs_new_chunk_record(eb, key, slot);
6961         ret = insert_cache_extent(chunk_cache, &rec->cache);
6962         if (ret) {
6963                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6964                         rec->offset, rec->length);
6965                 free(rec);
6966         }
6967
6968         return ret;
6969 }
6970
6971 static int process_device_item(struct rb_root *dev_cache,
6972                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6973 {
6974         struct btrfs_dev_item *ptr;
6975         struct device_record *rec;
6976         int ret = 0;
6977
6978         ptr = btrfs_item_ptr(eb,
6979                 slot, struct btrfs_dev_item);
6980
6981         rec = malloc(sizeof(*rec));
6982         if (!rec) {
6983                 fprintf(stderr, "memory allocation failed\n");
6984                 return -ENOMEM;
6985         }
6986
6987         rec->devid = key->offset;
6988         rec->generation = btrfs_header_generation(eb);
6989
6990         rec->objectid = key->objectid;
6991         rec->type = key->type;
6992         rec->offset = key->offset;
6993
6994         rec->devid = btrfs_device_id(eb, ptr);
6995         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6996         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6997
6998         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6999         if (ret) {
7000                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7001                 free(rec);
7002         }
7003
7004         return ret;
7005 }
7006
7007 struct block_group_record *
7008 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7009                              int slot)
7010 {
7011         struct btrfs_block_group_item *ptr;
7012         struct block_group_record *rec;
7013
7014         rec = calloc(1, sizeof(*rec));
7015         if (!rec) {
7016                 fprintf(stderr, "memory allocation failed\n");
7017                 exit(-1);
7018         }
7019
7020         rec->cache.start = key->objectid;
7021         rec->cache.size = key->offset;
7022
7023         rec->generation = btrfs_header_generation(leaf);
7024
7025         rec->objectid = key->objectid;
7026         rec->type = key->type;
7027         rec->offset = key->offset;
7028
7029         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7030         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7031
7032         INIT_LIST_HEAD(&rec->list);
7033
7034         return rec;
7035 }
7036
7037 static int process_block_group_item(struct block_group_tree *block_group_cache,
7038                                     struct btrfs_key *key,
7039                                     struct extent_buffer *eb, int slot)
7040 {
7041         struct block_group_record *rec;
7042         int ret = 0;
7043
7044         rec = btrfs_new_block_group_record(eb, key, slot);
7045         ret = insert_block_group_record(block_group_cache, rec);
7046         if (ret) {
7047                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7048                         rec->objectid, rec->offset);
7049                 free(rec);
7050         }
7051
7052         return ret;
7053 }
7054
7055 struct device_extent_record *
7056 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7057                                struct btrfs_key *key, int slot)
7058 {
7059         struct device_extent_record *rec;
7060         struct btrfs_dev_extent *ptr;
7061
7062         rec = calloc(1, sizeof(*rec));
7063         if (!rec) {
7064                 fprintf(stderr, "memory allocation failed\n");
7065                 exit(-1);
7066         }
7067
7068         rec->cache.objectid = key->objectid;
7069         rec->cache.start = key->offset;
7070
7071         rec->generation = btrfs_header_generation(leaf);
7072
7073         rec->objectid = key->objectid;
7074         rec->type = key->type;
7075         rec->offset = key->offset;
7076
7077         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7078         rec->chunk_objecteid =
7079                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7080         rec->chunk_offset =
7081                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7082         rec->length = btrfs_dev_extent_length(leaf, ptr);
7083         rec->cache.size = rec->length;
7084
7085         INIT_LIST_HEAD(&rec->chunk_list);
7086         INIT_LIST_HEAD(&rec->device_list);
7087
7088         return rec;
7089 }
7090
7091 static int
7092 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7093                            struct btrfs_key *key, struct extent_buffer *eb,
7094                            int slot)
7095 {
7096         struct device_extent_record *rec;
7097         int ret;
7098
7099         rec = btrfs_new_device_extent_record(eb, key, slot);
7100         ret = insert_device_extent_record(dev_extent_cache, rec);
7101         if (ret) {
7102                 fprintf(stderr,
7103                         "Device extent[%llu, %llu, %llu] existed.\n",
7104                         rec->objectid, rec->offset, rec->length);
7105                 free(rec);
7106         }
7107
7108         return ret;
7109 }
7110
7111 static int process_extent_item(struct btrfs_root *root,
7112                                struct cache_tree *extent_cache,
7113                                struct extent_buffer *eb, int slot)
7114 {
7115         struct btrfs_extent_item *ei;
7116         struct btrfs_extent_inline_ref *iref;
7117         struct btrfs_extent_data_ref *dref;
7118         struct btrfs_shared_data_ref *sref;
7119         struct btrfs_key key;
7120         struct extent_record tmpl;
7121         unsigned long end;
7122         unsigned long ptr;
7123         int ret;
7124         int type;
7125         u32 item_size = btrfs_item_size_nr(eb, slot);
7126         u64 refs = 0;
7127         u64 offset;
7128         u64 num_bytes;
7129         int metadata = 0;
7130
7131         btrfs_item_key_to_cpu(eb, &key, slot);
7132
7133         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7134                 metadata = 1;
7135                 num_bytes = root->fs_info->nodesize;
7136         } else {
7137                 num_bytes = key.offset;
7138         }
7139
7140         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7141                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7142                       key.objectid, root->fs_info->sectorsize);
7143                 return -EIO;
7144         }
7145         if (item_size < sizeof(*ei)) {
7146 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7147                 struct btrfs_extent_item_v0 *ei0;
7148                 BUG_ON(item_size != sizeof(*ei0));
7149                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7150                 refs = btrfs_extent_refs_v0(eb, ei0);
7151 #else
7152                 BUG();
7153 #endif
7154                 memset(&tmpl, 0, sizeof(tmpl));
7155                 tmpl.start = key.objectid;
7156                 tmpl.nr = num_bytes;
7157                 tmpl.extent_item_refs = refs;
7158                 tmpl.metadata = metadata;
7159                 tmpl.found_rec = 1;
7160                 tmpl.max_size = num_bytes;
7161
7162                 return add_extent_rec(extent_cache, &tmpl);
7163         }
7164
7165         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7166         refs = btrfs_extent_refs(eb, ei);
7167         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7168                 metadata = 1;
7169         else
7170                 metadata = 0;
7171         if (metadata && num_bytes != root->fs_info->nodesize) {
7172                 error("ignore invalid metadata extent, length %llu does not equal to %u",
7173                       num_bytes, root->fs_info->nodesize);
7174                 return -EIO;
7175         }
7176         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7177                 error("ignore invalid data extent, length %llu is not aligned to %u",
7178                       num_bytes, root->fs_info->sectorsize);
7179                 return -EIO;
7180         }
7181
7182         memset(&tmpl, 0, sizeof(tmpl));
7183         tmpl.start = key.objectid;
7184         tmpl.nr = num_bytes;
7185         tmpl.extent_item_refs = refs;
7186         tmpl.metadata = metadata;
7187         tmpl.found_rec = 1;
7188         tmpl.max_size = num_bytes;
7189         add_extent_rec(extent_cache, &tmpl);
7190
7191         ptr = (unsigned long)(ei + 1);
7192         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7193             key.type == BTRFS_EXTENT_ITEM_KEY)
7194                 ptr += sizeof(struct btrfs_tree_block_info);
7195
7196         end = (unsigned long)ei + item_size;
7197         while (ptr < end) {
7198                 iref = (struct btrfs_extent_inline_ref *)ptr;
7199                 type = btrfs_extent_inline_ref_type(eb, iref);
7200                 offset = btrfs_extent_inline_ref_offset(eb, iref);
7201                 switch (type) {
7202                 case BTRFS_TREE_BLOCK_REF_KEY:
7203                         ret = add_tree_backref(extent_cache, key.objectid,
7204                                         0, offset, 0);
7205                         if (ret < 0)
7206                                 error(
7207                         "add_tree_backref failed (extent items tree block): %s",
7208                                       strerror(-ret));
7209                         break;
7210                 case BTRFS_SHARED_BLOCK_REF_KEY:
7211                         ret = add_tree_backref(extent_cache, key.objectid,
7212                                         offset, 0, 0);
7213                         if (ret < 0)
7214                                 error(
7215                         "add_tree_backref failed (extent items shared block): %s",
7216                                       strerror(-ret));
7217                         break;
7218                 case BTRFS_EXTENT_DATA_REF_KEY:
7219                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7220                         add_data_backref(extent_cache, key.objectid, 0,
7221                                         btrfs_extent_data_ref_root(eb, dref),
7222                                         btrfs_extent_data_ref_objectid(eb,
7223                                                                        dref),
7224                                         btrfs_extent_data_ref_offset(eb, dref),
7225                                         btrfs_extent_data_ref_count(eb, dref),
7226                                         0, num_bytes);
7227                         break;
7228                 case BTRFS_SHARED_DATA_REF_KEY:
7229                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
7230                         add_data_backref(extent_cache, key.objectid, offset,
7231                                         0, 0, 0,
7232                                         btrfs_shared_data_ref_count(eb, sref),
7233                                         0, num_bytes);
7234                         break;
7235                 default:
7236                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7237                                 key.objectid, key.type, num_bytes);
7238                         goto out;
7239                 }
7240                 ptr += btrfs_extent_inline_ref_size(type);
7241         }
7242         WARN_ON(ptr > end);
7243 out:
7244         return 0;
7245 }
7246
7247 static int check_cache_range(struct btrfs_root *root,
7248                              struct btrfs_block_group_cache *cache,
7249                              u64 offset, u64 bytes)
7250 {
7251         struct btrfs_free_space *entry;
7252         u64 *logical;
7253         u64 bytenr;
7254         int stripe_len;
7255         int i, nr, ret;
7256
7257         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7258                 bytenr = btrfs_sb_offset(i);
7259                 ret = btrfs_rmap_block(root->fs_info,
7260                                        cache->key.objectid, bytenr, 0,
7261                                        &logical, &nr, &stripe_len);
7262                 if (ret)
7263                         return ret;
7264
7265                 while (nr--) {
7266                         if (logical[nr] + stripe_len <= offset)
7267                                 continue;
7268                         if (offset + bytes <= logical[nr])
7269                                 continue;
7270                         if (logical[nr] == offset) {
7271                                 if (stripe_len >= bytes) {
7272                                         free(logical);
7273                                         return 0;
7274                                 }
7275                                 bytes -= stripe_len;
7276                                 offset += stripe_len;
7277                         } else if (logical[nr] < offset) {
7278                                 if (logical[nr] + stripe_len >=
7279                                     offset + bytes) {
7280                                         free(logical);
7281                                         return 0;
7282                                 }
7283                                 bytes = (offset + bytes) -
7284                                         (logical[nr] + stripe_len);
7285                                 offset = logical[nr] + stripe_len;
7286                         } else {
7287                                 /*
7288                                  * Could be tricky, the super may land in the
7289                                  * middle of the area we're checking.  First
7290                                  * check the easiest case, it's at the end.
7291                                  */
7292                                 if (logical[nr] + stripe_len >=
7293                                     bytes + offset) {
7294                                         bytes = logical[nr] - offset;
7295                                         continue;
7296                                 }
7297
7298                                 /* Check the left side */
7299                                 ret = check_cache_range(root, cache,
7300                                                         offset,
7301                                                         logical[nr] - offset);
7302                                 if (ret) {
7303                                         free(logical);
7304                                         return ret;
7305                                 }
7306
7307                                 /* Now we continue with the right side */
7308                                 bytes = (offset + bytes) -
7309                                         (logical[nr] + stripe_len);
7310                                 offset = logical[nr] + stripe_len;
7311                         }
7312                 }
7313
7314                 free(logical);
7315         }
7316
7317         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7318         if (!entry) {
7319                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7320                         offset, offset+bytes);
7321                 return -EINVAL;
7322         }
7323
7324         if (entry->offset != offset) {
7325                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7326                         entry->offset);
7327                 return -EINVAL;
7328         }
7329
7330         if (entry->bytes != bytes) {
7331                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7332                         bytes, entry->bytes, offset);
7333                 return -EINVAL;
7334         }
7335
7336         unlink_free_space(cache->free_space_ctl, entry);
7337         free(entry);
7338         return 0;
7339 }
7340
7341 static int verify_space_cache(struct btrfs_root *root,
7342                               struct btrfs_block_group_cache *cache)
7343 {
7344         struct btrfs_path path;
7345         struct extent_buffer *leaf;
7346         struct btrfs_key key;
7347         u64 last;
7348         int ret = 0;
7349
7350         root = root->fs_info->extent_root;
7351
7352         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7353
7354         btrfs_init_path(&path);
7355         key.objectid = last;
7356         key.offset = 0;
7357         key.type = BTRFS_EXTENT_ITEM_KEY;
7358         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7359         if (ret < 0)
7360                 goto out;
7361         ret = 0;
7362         while (1) {
7363                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7364                         ret = btrfs_next_leaf(root, &path);
7365                         if (ret < 0)
7366                                 goto out;
7367                         if (ret > 0) {
7368                                 ret = 0;
7369                                 break;
7370                         }
7371                 }
7372                 leaf = path.nodes[0];
7373                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7374                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7375                         break;
7376                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7377                     key.type != BTRFS_METADATA_ITEM_KEY) {
7378                         path.slots[0]++;
7379                         continue;
7380                 }
7381
7382                 if (last == key.objectid) {
7383                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
7384                                 last = key.objectid + key.offset;
7385                         else
7386                                 last = key.objectid + root->fs_info->nodesize;
7387                         path.slots[0]++;
7388                         continue;
7389                 }
7390
7391                 ret = check_cache_range(root, cache, last,
7392                                         key.objectid - last);
7393                 if (ret)
7394                         break;
7395                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7396                         last = key.objectid + key.offset;
7397                 else
7398                         last = key.objectid + root->fs_info->nodesize;
7399                 path.slots[0]++;
7400         }
7401
7402         if (last < cache->key.objectid + cache->key.offset)
7403                 ret = check_cache_range(root, cache, last,
7404                                         cache->key.objectid +
7405                                         cache->key.offset - last);
7406
7407 out:
7408         btrfs_release_path(&path);
7409
7410         if (!ret &&
7411             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7412                 fprintf(stderr, "There are still entries left in the space "
7413                         "cache\n");
7414                 ret = -EINVAL;
7415         }
7416
7417         return ret;
7418 }
7419
7420 static int check_space_cache(struct btrfs_root *root)
7421 {
7422         struct btrfs_block_group_cache *cache;
7423         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7424         int ret;
7425         int error = 0;
7426
7427         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7428             btrfs_super_generation(root->fs_info->super_copy) !=
7429             btrfs_super_cache_generation(root->fs_info->super_copy)) {
7430                 printf("cache and super generation don't match, space cache "
7431                        "will be invalidated\n");
7432                 return 0;
7433         }
7434
7435         if (ctx.progress_enabled) {
7436                 ctx.tp = TASK_FREE_SPACE;
7437                 task_start(ctx.info);
7438         }
7439
7440         while (1) {
7441                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7442                 if (!cache)
7443                         break;
7444
7445                 start = cache->key.objectid + cache->key.offset;
7446                 if (!cache->free_space_ctl) {
7447                         if (btrfs_init_free_space_ctl(cache,
7448                                                 root->fs_info->sectorsize)) {
7449                                 ret = -ENOMEM;
7450                                 break;
7451                         }
7452                 } else {
7453                         btrfs_remove_free_space_cache(cache);
7454                 }
7455
7456                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7457                         ret = exclude_super_stripes(root, cache);
7458                         if (ret) {
7459                                 fprintf(stderr, "could not exclude super stripes: %s\n",
7460                                         strerror(-ret));
7461                                 error++;
7462                                 continue;
7463                         }
7464                         ret = load_free_space_tree(root->fs_info, cache);
7465                         free_excluded_extents(root, cache);
7466                         if (ret < 0) {
7467                                 fprintf(stderr, "could not load free space tree: %s\n",
7468                                         strerror(-ret));
7469                                 error++;
7470                                 continue;
7471                         }
7472                         error += ret;
7473                 } else {
7474                         ret = load_free_space_cache(root->fs_info, cache);
7475                         if (!ret)
7476                                 continue;
7477                 }
7478
7479                 ret = verify_space_cache(root, cache);
7480                 if (ret) {
7481                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
7482                                 cache->key.objectid);
7483                         error++;
7484                 }
7485         }
7486
7487         task_stop(ctx.info);
7488
7489         return error ? -EINVAL : 0;
7490 }
7491
7492 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7493                         u64 num_bytes, unsigned long leaf_offset,
7494                         struct extent_buffer *eb) {
7495
7496         struct btrfs_fs_info *fs_info = root->fs_info;
7497         u64 offset = 0;
7498         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7499         char *data;
7500         unsigned long csum_offset;
7501         u32 csum;
7502         u32 csum_expected;
7503         u64 read_len;
7504         u64 data_checked = 0;
7505         u64 tmp;
7506         int ret = 0;
7507         int mirror;
7508         int num_copies;
7509
7510         if (num_bytes % fs_info->sectorsize)
7511                 return -EINVAL;
7512
7513         data = malloc(num_bytes);
7514         if (!data)
7515                 return -ENOMEM;
7516
7517         while (offset < num_bytes) {
7518                 mirror = 0;
7519 again:
7520                 read_len = num_bytes - offset;
7521                 /* read as much space once a time */
7522                 ret = read_extent_data(fs_info, data + offset,
7523                                 bytenr + offset, &read_len, mirror);
7524                 if (ret)
7525                         goto out;
7526                 data_checked = 0;
7527                 /* verify every 4k data's checksum */
7528                 while (data_checked < read_len) {
7529                         csum = ~(u32)0;
7530                         tmp = offset + data_checked;
7531
7532                         csum = btrfs_csum_data((char *)data + tmp,
7533                                                csum, fs_info->sectorsize);
7534                         btrfs_csum_final(csum, (u8 *)&csum);
7535
7536                         csum_offset = leaf_offset +
7537                                  tmp / fs_info->sectorsize * csum_size;
7538                         read_extent_buffer(eb, (char *)&csum_expected,
7539                                            csum_offset, csum_size);
7540                         /* try another mirror */
7541                         if (csum != csum_expected) {
7542                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7543                                                 mirror, bytenr + tmp,
7544                                                 csum, csum_expected);
7545                                 num_copies = btrfs_num_copies(root->fs_info,
7546                                                 bytenr, num_bytes);
7547                                 if (mirror < num_copies - 1) {
7548                                         mirror += 1;
7549                                         goto again;
7550                                 }
7551                         }
7552                         data_checked += fs_info->sectorsize;
7553                 }
7554                 offset += read_len;
7555         }
7556 out:
7557         free(data);
7558         return ret;
7559 }
7560
7561 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7562                                u64 num_bytes)
7563 {
7564         struct btrfs_path path;
7565         struct extent_buffer *leaf;
7566         struct btrfs_key key;
7567         int ret;
7568
7569         btrfs_init_path(&path);
7570         key.objectid = bytenr;
7571         key.type = BTRFS_EXTENT_ITEM_KEY;
7572         key.offset = (u64)-1;
7573
7574 again:
7575         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7576                                 0, 0);
7577         if (ret < 0) {
7578                 fprintf(stderr, "Error looking up extent record %d\n", ret);
7579                 btrfs_release_path(&path);
7580                 return ret;
7581         } else if (ret) {
7582                 if (path.slots[0] > 0) {
7583                         path.slots[0]--;
7584                 } else {
7585                         ret = btrfs_prev_leaf(root, &path);
7586                         if (ret < 0) {
7587                                 goto out;
7588                         } else if (ret > 0) {
7589                                 ret = 0;
7590                                 goto out;
7591                         }
7592                 }
7593         }
7594
7595         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7596
7597         /*
7598          * Block group items come before extent items if they have the same
7599          * bytenr, so walk back one more just in case.  Dear future traveller,
7600          * first congrats on mastering time travel.  Now if it's not too much
7601          * trouble could you go back to 2006 and tell Chris to make the
7602          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7603          * EXTENT_ITEM_KEY please?
7604          */
7605         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7606                 if (path.slots[0] > 0) {
7607                         path.slots[0]--;
7608                 } else {
7609                         ret = btrfs_prev_leaf(root, &path);
7610                         if (ret < 0) {
7611                                 goto out;
7612                         } else if (ret > 0) {
7613                                 ret = 0;
7614                                 goto out;
7615                         }
7616                 }
7617                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7618         }
7619
7620         while (num_bytes) {
7621                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7622                         ret = btrfs_next_leaf(root, &path);
7623                         if (ret < 0) {
7624                                 fprintf(stderr, "Error going to next leaf "
7625                                         "%d\n", ret);
7626                                 btrfs_release_path(&path);
7627                                 return ret;
7628                         } else if (ret) {
7629                                 break;
7630                         }
7631                 }
7632                 leaf = path.nodes[0];
7633                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7634                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7635                         path.slots[0]++;
7636                         continue;
7637                 }
7638                 if (key.objectid + key.offset < bytenr) {
7639                         path.slots[0]++;
7640                         continue;
7641                 }
7642                 if (key.objectid > bytenr + num_bytes)
7643                         break;
7644
7645                 if (key.objectid == bytenr) {
7646                         if (key.offset >= num_bytes) {
7647                                 num_bytes = 0;
7648                                 break;
7649                         }
7650                         num_bytes -= key.offset;
7651                         bytenr += key.offset;
7652                 } else if (key.objectid < bytenr) {
7653                         if (key.objectid + key.offset >= bytenr + num_bytes) {
7654                                 num_bytes = 0;
7655                                 break;
7656                         }
7657                         num_bytes = (bytenr + num_bytes) -
7658                                 (key.objectid + key.offset);
7659                         bytenr = key.objectid + key.offset;
7660                 } else {
7661                         if (key.objectid + key.offset < bytenr + num_bytes) {
7662                                 u64 new_start = key.objectid + key.offset;
7663                                 u64 new_bytes = bytenr + num_bytes - new_start;
7664
7665                                 /*
7666                                  * Weird case, the extent is in the middle of
7667                                  * our range, we'll have to search one side
7668                                  * and then the other.  Not sure if this happens
7669                                  * in real life, but no harm in coding it up
7670                                  * anyway just in case.
7671                                  */
7672                                 btrfs_release_path(&path);
7673                                 ret = check_extent_exists(root, new_start,
7674                                                           new_bytes);
7675                                 if (ret) {
7676                                         fprintf(stderr, "Right section didn't "
7677                                                 "have a record\n");
7678                                         break;
7679                                 }
7680                                 num_bytes = key.objectid - bytenr;
7681                                 goto again;
7682                         }
7683                         num_bytes = key.objectid - bytenr;
7684                 }
7685                 path.slots[0]++;
7686         }
7687         ret = 0;
7688
7689 out:
7690         if (num_bytes && !ret) {
7691                 fprintf(stderr, "There are no extents for csum range "
7692                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7693                 ret = 1;
7694         }
7695
7696         btrfs_release_path(&path);
7697         return ret;
7698 }
7699
7700 static int check_csums(struct btrfs_root *root)
7701 {
7702         struct btrfs_path path;
7703         struct extent_buffer *leaf;
7704         struct btrfs_key key;
7705         u64 offset = 0, num_bytes = 0;
7706         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7707         int errors = 0;
7708         int ret;
7709         u64 data_len;
7710         unsigned long leaf_offset;
7711
7712         root = root->fs_info->csum_root;
7713         if (!extent_buffer_uptodate(root->node)) {
7714                 fprintf(stderr, "No valid csum tree found\n");
7715                 return -ENOENT;
7716         }
7717
7718         btrfs_init_path(&path);
7719         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7720         key.type = BTRFS_EXTENT_CSUM_KEY;
7721         key.offset = 0;
7722         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7723         if (ret < 0) {
7724                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7725                 btrfs_release_path(&path);
7726                 return ret;
7727         }
7728
7729         if (ret > 0 && path.slots[0])
7730                 path.slots[0]--;
7731         ret = 0;
7732
7733         while (1) {
7734                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7735                         ret = btrfs_next_leaf(root, &path);
7736                         if (ret < 0) {
7737                                 fprintf(stderr, "Error going to next leaf "
7738                                         "%d\n", ret);
7739                                 break;
7740                         }
7741                         if (ret)
7742                                 break;
7743                 }
7744                 leaf = path.nodes[0];
7745
7746                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7747                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7748                         path.slots[0]++;
7749                         continue;
7750                 }
7751
7752                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7753                               csum_size) * root->fs_info->sectorsize;
7754                 if (!check_data_csum)
7755                         goto skip_csum_check;
7756                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7757                 ret = check_extent_csums(root, key.offset, data_len,
7758                                          leaf_offset, leaf);
7759                 if (ret)
7760                         break;
7761 skip_csum_check:
7762                 if (!num_bytes) {
7763                         offset = key.offset;
7764                 } else if (key.offset != offset + num_bytes) {
7765                         ret = check_extent_exists(root, offset, num_bytes);
7766                         if (ret) {
7767                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7768                                         "there is no extent record\n",
7769                                         offset, offset+num_bytes);
7770                                 errors++;
7771                         }
7772                         offset = key.offset;
7773                         num_bytes = 0;
7774                 }
7775                 num_bytes += data_len;
7776                 path.slots[0]++;
7777         }
7778
7779         btrfs_release_path(&path);
7780         return errors;
7781 }
7782
7783 static int is_dropped_key(struct btrfs_key *key,
7784                           struct btrfs_key *drop_key) {
7785         if (key->objectid < drop_key->objectid)
7786                 return 1;
7787         else if (key->objectid == drop_key->objectid) {
7788                 if (key->type < drop_key->type)
7789                         return 1;
7790                 else if (key->type == drop_key->type) {
7791                         if (key->offset < drop_key->offset)
7792                                 return 1;
7793                 }
7794         }
7795         return 0;
7796 }
7797
7798 /*
7799  * Here are the rules for FULL_BACKREF.
7800  *
7801  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7802  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7803  *      FULL_BACKREF set.
7804  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7805  *    if it happened after the relocation occurred since we'll have dropped the
7806  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7807  *    have no real way to know for sure.
7808  *
7809  * We process the blocks one root at a time, and we start from the lowest root
7810  * objectid and go to the highest.  So we can just lookup the owner backref for
7811  * the record and if we don't find it then we know it doesn't exist and we have
7812  * a FULL BACKREF.
7813  *
7814  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7815  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7816  * be set or not and then we can check later once we've gathered all the refs.
7817  */
7818 static int calc_extent_flag(struct cache_tree *extent_cache,
7819                            struct extent_buffer *buf,
7820                            struct root_item_record *ri,
7821                            u64 *flags)
7822 {
7823         struct extent_record *rec;
7824         struct cache_extent *cache;
7825         struct tree_backref *tback;
7826         u64 owner = 0;
7827
7828         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7829         /* we have added this extent before */
7830         if (!cache)
7831                 return -ENOENT;
7832
7833         rec = container_of(cache, struct extent_record, cache);
7834
7835         /*
7836          * Except file/reloc tree, we can not have
7837          * FULL BACKREF MODE
7838          */
7839         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7840                 goto normal;
7841         /*
7842          * root node
7843          */
7844         if (buf->start == ri->bytenr)
7845                 goto normal;
7846
7847         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7848                 goto full_backref;
7849
7850         owner = btrfs_header_owner(buf);
7851         if (owner == ri->objectid)
7852                 goto normal;
7853
7854         tback = find_tree_backref(rec, 0, owner);
7855         if (!tback)
7856                 goto full_backref;
7857 normal:
7858         *flags = 0;
7859         if (rec->flag_block_full_backref != FLAG_UNSET &&
7860             rec->flag_block_full_backref != 0)
7861                 rec->bad_full_backref = 1;
7862         return 0;
7863 full_backref:
7864         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7865         if (rec->flag_block_full_backref != FLAG_UNSET &&
7866             rec->flag_block_full_backref != 1)
7867                 rec->bad_full_backref = 1;
7868         return 0;
7869 }
7870
7871 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7872 {
7873         fprintf(stderr, "Invalid key type(");
7874         print_key_type(stderr, 0, key_type);
7875         fprintf(stderr, ") found in root(");
7876         print_objectid(stderr, rootid, 0);
7877         fprintf(stderr, ")\n");
7878 }
7879
7880 /*
7881  * Check if the key is valid with its extent buffer.
7882  *
7883  * This is a early check in case invalid key exists in a extent buffer
7884  * This is not comprehensive yet, but should prevent wrong key/item passed
7885  * further
7886  */
7887 static int check_type_with_root(u64 rootid, u8 key_type)
7888 {
7889         switch (key_type) {
7890         /* Only valid in chunk tree */
7891         case BTRFS_DEV_ITEM_KEY:
7892         case BTRFS_CHUNK_ITEM_KEY:
7893                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7894                         goto err;
7895                 break;
7896         /* valid in csum and log tree */
7897         case BTRFS_CSUM_TREE_OBJECTID:
7898                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7899                       is_fstree(rootid)))
7900                         goto err;
7901                 break;
7902         case BTRFS_EXTENT_ITEM_KEY:
7903         case BTRFS_METADATA_ITEM_KEY:
7904         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7905                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7906                         goto err;
7907                 break;
7908         case BTRFS_ROOT_ITEM_KEY:
7909                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7910                         goto err;
7911                 break;
7912         case BTRFS_DEV_EXTENT_KEY:
7913                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7914                         goto err;
7915                 break;
7916         }
7917         return 0;
7918 err:
7919         report_mismatch_key_root(key_type, rootid);
7920         return -EINVAL;
7921 }
7922
7923 static int run_next_block(struct btrfs_root *root,
7924                           struct block_info *bits,
7925                           int bits_nr,
7926                           u64 *last,
7927                           struct cache_tree *pending,
7928                           struct cache_tree *seen,
7929                           struct cache_tree *reada,
7930                           struct cache_tree *nodes,
7931                           struct cache_tree *extent_cache,
7932                           struct cache_tree *chunk_cache,
7933                           struct rb_root *dev_cache,
7934                           struct block_group_tree *block_group_cache,
7935                           struct device_extent_tree *dev_extent_cache,
7936                           struct root_item_record *ri)
7937 {
7938         struct btrfs_fs_info *fs_info = root->fs_info;
7939         struct extent_buffer *buf;
7940         struct extent_record *rec = NULL;
7941         u64 bytenr;
7942         u32 size;
7943         u64 parent;
7944         u64 owner;
7945         u64 flags;
7946         u64 ptr;
7947         u64 gen = 0;
7948         int ret = 0;
7949         int i;
7950         int nritems;
7951         struct btrfs_key key;
7952         struct cache_extent *cache;
7953         int reada_bits;
7954
7955         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7956                                     bits_nr, &reada_bits);
7957         if (nritems == 0)
7958                 return 1;
7959
7960         if (!reada_bits) {
7961                 for(i = 0; i < nritems; i++) {
7962                         ret = add_cache_extent(reada, bits[i].start,
7963                                                bits[i].size);
7964                         if (ret == -EEXIST)
7965                                 continue;
7966
7967                         /* fixme, get the parent transid */
7968                         readahead_tree_block(fs_info, bits[i].start, 0);
7969                 }
7970         }
7971         *last = bits[0].start;
7972         bytenr = bits[0].start;
7973         size = bits[0].size;
7974
7975         cache = lookup_cache_extent(pending, bytenr, size);
7976         if (cache) {
7977                 remove_cache_extent(pending, cache);
7978                 free(cache);
7979         }
7980         cache = lookup_cache_extent(reada, bytenr, size);
7981         if (cache) {
7982                 remove_cache_extent(reada, cache);
7983                 free(cache);
7984         }
7985         cache = lookup_cache_extent(nodes, bytenr, size);
7986         if (cache) {
7987                 remove_cache_extent(nodes, cache);
7988                 free(cache);
7989         }
7990         cache = lookup_cache_extent(extent_cache, bytenr, size);
7991         if (cache) {
7992                 rec = container_of(cache, struct extent_record, cache);
7993                 gen = rec->parent_generation;
7994         }
7995
7996         /* fixme, get the real parent transid */
7997         buf = read_tree_block(root->fs_info, bytenr, gen);
7998         if (!extent_buffer_uptodate(buf)) {
7999                 record_bad_block_io(root->fs_info,
8000                                     extent_cache, bytenr, size);
8001                 goto out;
8002         }
8003
8004         nritems = btrfs_header_nritems(buf);
8005
8006         flags = 0;
8007         if (!init_extent_tree) {
8008                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8009                                        btrfs_header_level(buf), 1, NULL,
8010                                        &flags);
8011                 if (ret < 0) {
8012                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8013                         if (ret < 0) {
8014                                 fprintf(stderr, "Couldn't calc extent flags\n");
8015                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8016                         }
8017                 }
8018         } else {
8019                 flags = 0;
8020                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8021                 if (ret < 0) {
8022                         fprintf(stderr, "Couldn't calc extent flags\n");
8023                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8024                 }
8025         }
8026
8027         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8028                 if (ri != NULL &&
8029                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8030                     ri->objectid == btrfs_header_owner(buf)) {
8031                         /*
8032                          * Ok we got to this block from it's original owner and
8033                          * we have FULL_BACKREF set.  Relocation can leave
8034                          * converted blocks over so this is altogether possible,
8035                          * however it's not possible if the generation > the
8036                          * last snapshot, so check for this case.
8037                          */
8038                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8039                             btrfs_header_generation(buf) > ri->last_snapshot) {
8040                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8041                                 rec->bad_full_backref = 1;
8042                         }
8043                 }
8044         } else {
8045                 if (ri != NULL &&
8046                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8047                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8048                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8049                         rec->bad_full_backref = 1;
8050                 }
8051         }
8052
8053         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8054                 rec->flag_block_full_backref = 1;
8055                 parent = bytenr;
8056                 owner = 0;
8057         } else {
8058                 rec->flag_block_full_backref = 0;
8059                 parent = 0;
8060                 owner = btrfs_header_owner(buf);
8061         }
8062
8063         ret = check_block(root, extent_cache, buf, flags);
8064         if (ret)
8065                 goto out;
8066
8067         if (btrfs_is_leaf(buf)) {
8068                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8069                 for (i = 0; i < nritems; i++) {
8070                         struct btrfs_file_extent_item *fi;
8071                         btrfs_item_key_to_cpu(buf, &key, i);
8072                         /*
8073                          * Check key type against the leaf owner.
8074                          * Could filter quite a lot of early error if
8075                          * owner is correct
8076                          */
8077                         if (check_type_with_root(btrfs_header_owner(buf),
8078                                                  key.type)) {
8079                                 fprintf(stderr, "ignoring invalid key\n");
8080                                 continue;
8081                         }
8082                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8083                                 process_extent_item(root, extent_cache, buf,
8084                                                     i);
8085                                 continue;
8086                         }
8087                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8088                                 process_extent_item(root, extent_cache, buf,
8089                                                     i);
8090                                 continue;
8091                         }
8092                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8093                                 total_csum_bytes +=
8094                                         btrfs_item_size_nr(buf, i);
8095                                 continue;
8096                         }
8097                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8098                                 process_chunk_item(chunk_cache, &key, buf, i);
8099                                 continue;
8100                         }
8101                         if (key.type == BTRFS_DEV_ITEM_KEY) {
8102                                 process_device_item(dev_cache, &key, buf, i);
8103                                 continue;
8104                         }
8105                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8106                                 process_block_group_item(block_group_cache,
8107                                         &key, buf, i);
8108                                 continue;
8109                         }
8110                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
8111                                 process_device_extent_item(dev_extent_cache,
8112                                         &key, buf, i);
8113                                 continue;
8114
8115                         }
8116                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8117 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8118                                 process_extent_ref_v0(extent_cache, buf, i);
8119 #else
8120                                 BUG();
8121 #endif
8122                                 continue;
8123                         }
8124
8125                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8126                                 ret = add_tree_backref(extent_cache,
8127                                                 key.objectid, 0, key.offset, 0);
8128                                 if (ret < 0)
8129                                         error(
8130                                 "add_tree_backref failed (leaf tree block): %s",
8131                                               strerror(-ret));
8132                                 continue;
8133                         }
8134                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8135                                 ret = add_tree_backref(extent_cache,
8136                                                 key.objectid, key.offset, 0, 0);
8137                                 if (ret < 0)
8138                                         error(
8139                                 "add_tree_backref failed (leaf shared block): %s",
8140                                               strerror(-ret));
8141                                 continue;
8142                         }
8143                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8144                                 struct btrfs_extent_data_ref *ref;
8145                                 ref = btrfs_item_ptr(buf, i,
8146                                                 struct btrfs_extent_data_ref);
8147                                 add_data_backref(extent_cache,
8148                                         key.objectid, 0,
8149                                         btrfs_extent_data_ref_root(buf, ref),
8150                                         btrfs_extent_data_ref_objectid(buf,
8151                                                                        ref),
8152                                         btrfs_extent_data_ref_offset(buf, ref),
8153                                         btrfs_extent_data_ref_count(buf, ref),
8154                                         0, root->fs_info->sectorsize);
8155                                 continue;
8156                         }
8157                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8158                                 struct btrfs_shared_data_ref *ref;
8159                                 ref = btrfs_item_ptr(buf, i,
8160                                                 struct btrfs_shared_data_ref);
8161                                 add_data_backref(extent_cache,
8162                                         key.objectid, key.offset, 0, 0, 0,
8163                                         btrfs_shared_data_ref_count(buf, ref),
8164                                         0, root->fs_info->sectorsize);
8165                                 continue;
8166                         }
8167                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8168                                 struct bad_item *bad;
8169
8170                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8171                                         continue;
8172                                 if (!owner)
8173                                         continue;
8174                                 bad = malloc(sizeof(struct bad_item));
8175                                 if (!bad)
8176                                         continue;
8177                                 INIT_LIST_HEAD(&bad->list);
8178                                 memcpy(&bad->key, &key,
8179                                        sizeof(struct btrfs_key));
8180                                 bad->root_id = owner;
8181                                 list_add_tail(&bad->list, &delete_items);
8182                                 continue;
8183                         }
8184                         if (key.type != BTRFS_EXTENT_DATA_KEY)
8185                                 continue;
8186                         fi = btrfs_item_ptr(buf, i,
8187                                             struct btrfs_file_extent_item);
8188                         if (btrfs_file_extent_type(buf, fi) ==
8189                             BTRFS_FILE_EXTENT_INLINE)
8190                                 continue;
8191                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8192                                 continue;
8193
8194                         data_bytes_allocated +=
8195                                 btrfs_file_extent_disk_num_bytes(buf, fi);
8196                         if (data_bytes_allocated < root->fs_info->sectorsize) {
8197                                 abort();
8198                         }
8199                         data_bytes_referenced +=
8200                                 btrfs_file_extent_num_bytes(buf, fi);
8201                         add_data_backref(extent_cache,
8202                                 btrfs_file_extent_disk_bytenr(buf, fi),
8203                                 parent, owner, key.objectid, key.offset -
8204                                 btrfs_file_extent_offset(buf, fi), 1, 1,
8205                                 btrfs_file_extent_disk_num_bytes(buf, fi));
8206                 }
8207         } else {
8208                 int level;
8209                 struct btrfs_key first_key;
8210
8211                 first_key.objectid = 0;
8212
8213                 if (nritems > 0)
8214                         btrfs_item_key_to_cpu(buf, &first_key, 0);
8215                 level = btrfs_header_level(buf);
8216                 for (i = 0; i < nritems; i++) {
8217                         struct extent_record tmpl;
8218
8219                         ptr = btrfs_node_blockptr(buf, i);
8220                         size = root->fs_info->nodesize;
8221                         btrfs_node_key_to_cpu(buf, &key, i);
8222                         if (ri != NULL) {
8223                                 if ((level == ri->drop_level)
8224                                     && is_dropped_key(&key, &ri->drop_key)) {
8225                                         continue;
8226                                 }
8227                         }
8228
8229                         memset(&tmpl, 0, sizeof(tmpl));
8230                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8231                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8232                         tmpl.start = ptr;
8233                         tmpl.nr = size;
8234                         tmpl.refs = 1;
8235                         tmpl.metadata = 1;
8236                         tmpl.max_size = size;
8237                         ret = add_extent_rec(extent_cache, &tmpl);
8238                         if (ret < 0)
8239                                 goto out;
8240
8241                         ret = add_tree_backref(extent_cache, ptr, parent,
8242                                         owner, 1);
8243                         if (ret < 0) {
8244                                 error(
8245                                 "add_tree_backref failed (non-leaf block): %s",
8246                                       strerror(-ret));
8247                                 continue;
8248                         }
8249
8250                         if (level > 1) {
8251                                 add_pending(nodes, seen, ptr, size);
8252                         } else {
8253                                 add_pending(pending, seen, ptr, size);
8254                         }
8255                 }
8256                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8257                                       nritems) * sizeof(struct btrfs_key_ptr);
8258         }
8259         total_btree_bytes += buf->len;
8260         if (fs_root_objectid(btrfs_header_owner(buf)))
8261                 total_fs_tree_bytes += buf->len;
8262         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8263                 total_extent_tree_bytes += buf->len;
8264 out:
8265         free_extent_buffer(buf);
8266         return ret;
8267 }
8268
8269 static int add_root_to_pending(struct extent_buffer *buf,
8270                                struct cache_tree *extent_cache,
8271                                struct cache_tree *pending,
8272                                struct cache_tree *seen,
8273                                struct cache_tree *nodes,
8274                                u64 objectid)
8275 {
8276         struct extent_record tmpl;
8277         int ret;
8278
8279         if (btrfs_header_level(buf) > 0)
8280                 add_pending(nodes, seen, buf->start, buf->len);
8281         else
8282                 add_pending(pending, seen, buf->start, buf->len);
8283
8284         memset(&tmpl, 0, sizeof(tmpl));
8285         tmpl.start = buf->start;
8286         tmpl.nr = buf->len;
8287         tmpl.is_root = 1;
8288         tmpl.refs = 1;
8289         tmpl.metadata = 1;
8290         tmpl.max_size = buf->len;
8291         add_extent_rec(extent_cache, &tmpl);
8292
8293         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8294             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8295                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8296                                 0, 1);
8297         else
8298                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8299                                 1);
8300         return ret;
8301 }
8302
8303 /* as we fix the tree, we might be deleting blocks that
8304  * we're tracking for repair.  This hook makes sure we
8305  * remove any backrefs for blocks as we are fixing them.
8306  */
8307 static int free_extent_hook(struct btrfs_trans_handle *trans,
8308                             struct btrfs_root *root,
8309                             u64 bytenr, u64 num_bytes, u64 parent,
8310                             u64 root_objectid, u64 owner, u64 offset,
8311                             int refs_to_drop)
8312 {
8313         struct extent_record *rec;
8314         struct cache_extent *cache;
8315         int is_data;
8316         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8317
8318         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8319         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8320         if (!cache)
8321                 return 0;
8322
8323         rec = container_of(cache, struct extent_record, cache);
8324         if (is_data) {
8325                 struct data_backref *back;
8326                 back = find_data_backref(rec, parent, root_objectid, owner,
8327                                          offset, 1, bytenr, num_bytes);
8328                 if (!back)
8329                         goto out;
8330                 if (back->node.found_ref) {
8331                         back->found_ref -= refs_to_drop;
8332                         if (rec->refs)
8333                                 rec->refs -= refs_to_drop;
8334                 }
8335                 if (back->node.found_extent_tree) {
8336                         back->num_refs -= refs_to_drop;
8337                         if (rec->extent_item_refs)
8338                                 rec->extent_item_refs -= refs_to_drop;
8339                 }
8340                 if (back->found_ref == 0)
8341                         back->node.found_ref = 0;
8342                 if (back->num_refs == 0)
8343                         back->node.found_extent_tree = 0;
8344
8345                 if (!back->node.found_extent_tree && back->node.found_ref) {
8346                         rb_erase(&back->node.node, &rec->backref_tree);
8347                         free(back);
8348                 }
8349         } else {
8350                 struct tree_backref *back;
8351                 back = find_tree_backref(rec, parent, root_objectid);
8352                 if (!back)
8353                         goto out;
8354                 if (back->node.found_ref) {
8355                         if (rec->refs)
8356                                 rec->refs--;
8357                         back->node.found_ref = 0;
8358                 }
8359                 if (back->node.found_extent_tree) {
8360                         if (rec->extent_item_refs)
8361                                 rec->extent_item_refs--;
8362                         back->node.found_extent_tree = 0;
8363                 }
8364                 if (!back->node.found_extent_tree && back->node.found_ref) {
8365                         rb_erase(&back->node.node, &rec->backref_tree);
8366                         free(back);
8367                 }
8368         }
8369         maybe_free_extent_rec(extent_cache, rec);
8370 out:
8371         return 0;
8372 }
8373
8374 static int delete_extent_records(struct btrfs_trans_handle *trans,
8375                                  struct btrfs_root *root,
8376                                  struct btrfs_path *path,
8377                                  u64 bytenr)
8378 {
8379         struct btrfs_key key;
8380         struct btrfs_key found_key;
8381         struct extent_buffer *leaf;
8382         int ret;
8383         int slot;
8384
8385
8386         key.objectid = bytenr;
8387         key.type = (u8)-1;
8388         key.offset = (u64)-1;
8389
8390         while(1) {
8391                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8392                                         &key, path, 0, 1);
8393                 if (ret < 0)
8394                         break;
8395
8396                 if (ret > 0) {
8397                         ret = 0;
8398                         if (path->slots[0] == 0)
8399                                 break;
8400                         path->slots[0]--;
8401                 }
8402                 ret = 0;
8403
8404                 leaf = path->nodes[0];
8405                 slot = path->slots[0];
8406
8407                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8408                 if (found_key.objectid != bytenr)
8409                         break;
8410
8411                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8412                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
8413                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8414                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8415                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8416                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8417                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8418                         btrfs_release_path(path);
8419                         if (found_key.type == 0) {
8420                                 if (found_key.offset == 0)
8421                                         break;
8422                                 key.offset = found_key.offset - 1;
8423                                 key.type = found_key.type;
8424                         }
8425                         key.type = found_key.type - 1;
8426                         key.offset = (u64)-1;
8427                         continue;
8428                 }
8429
8430                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8431                         found_key.objectid, found_key.type, found_key.offset);
8432
8433                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8434                 if (ret)
8435                         break;
8436                 btrfs_release_path(path);
8437
8438                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8439                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
8440                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8441                                 found_key.offset : root->fs_info->nodesize;
8442
8443                         ret = btrfs_update_block_group(trans, root, bytenr,
8444                                                        bytes, 0, 0);
8445                         if (ret)
8446                                 break;
8447                 }
8448         }
8449
8450         btrfs_release_path(path);
8451         return ret;
8452 }
8453
8454 /*
8455  * for a single backref, this will allocate a new extent
8456  * and add the backref to it.
8457  */
8458 static int record_extent(struct btrfs_trans_handle *trans,
8459                          struct btrfs_fs_info *info,
8460                          struct btrfs_path *path,
8461                          struct extent_record *rec,
8462                          struct extent_backref *back,
8463                          int allocated, u64 flags)
8464 {
8465         int ret = 0;
8466         struct btrfs_root *extent_root = info->extent_root;
8467         struct extent_buffer *leaf;
8468         struct btrfs_key ins_key;
8469         struct btrfs_extent_item *ei;
8470         struct data_backref *dback;
8471         struct btrfs_tree_block_info *bi;
8472
8473         if (!back->is_data)
8474                 rec->max_size = max_t(u64, rec->max_size,
8475                                     info->nodesize);
8476
8477         if (!allocated) {
8478                 u32 item_size = sizeof(*ei);
8479
8480                 if (!back->is_data)
8481                         item_size += sizeof(*bi);
8482
8483                 ins_key.objectid = rec->start;
8484                 ins_key.offset = rec->max_size;
8485                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8486
8487                 ret = btrfs_insert_empty_item(trans, extent_root, path,
8488                                         &ins_key, item_size);
8489                 if (ret)
8490                         goto fail;
8491
8492                 leaf = path->nodes[0];
8493                 ei = btrfs_item_ptr(leaf, path->slots[0],
8494                                     struct btrfs_extent_item);
8495
8496                 btrfs_set_extent_refs(leaf, ei, 0);
8497                 btrfs_set_extent_generation(leaf, ei, rec->generation);
8498
8499                 if (back->is_data) {
8500                         btrfs_set_extent_flags(leaf, ei,
8501                                                BTRFS_EXTENT_FLAG_DATA);
8502                 } else {
8503                         struct btrfs_disk_key copy_key;;
8504
8505                         bi = (struct btrfs_tree_block_info *)(ei + 1);
8506                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
8507                                              sizeof(*bi));
8508
8509                         btrfs_set_disk_key_objectid(&copy_key,
8510                                                     rec->info_objectid);
8511                         btrfs_set_disk_key_type(&copy_key, 0);
8512                         btrfs_set_disk_key_offset(&copy_key, 0);
8513
8514                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8515                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
8516
8517                         btrfs_set_extent_flags(leaf, ei,
8518                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8519                 }
8520
8521                 btrfs_mark_buffer_dirty(leaf);
8522                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8523                                                rec->max_size, 1, 0);
8524                 if (ret)
8525                         goto fail;
8526                 btrfs_release_path(path);
8527         }
8528
8529         if (back->is_data) {
8530                 u64 parent;
8531                 int i;
8532
8533                 dback = to_data_backref(back);
8534                 if (back->full_backref)
8535                         parent = dback->parent;
8536                 else
8537                         parent = 0;
8538
8539                 for (i = 0; i < dback->found_ref; i++) {
8540                         /* if parent != 0, we're doing a full backref
8541                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8542                          * just makes the backref allocator create a data
8543                          * backref
8544                          */
8545                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
8546                                                    rec->start, rec->max_size,
8547                                                    parent,
8548                                                    dback->root,
8549                                                    parent ?
8550                                                    BTRFS_FIRST_FREE_OBJECTID :
8551                                                    dback->owner,
8552                                                    dback->offset);
8553                         if (ret)
8554                                 break;
8555                 }
8556                 fprintf(stderr, "adding new data backref"
8557                                 " on %llu %s %llu owner %llu"
8558                                 " offset %llu found %d\n",
8559                                 (unsigned long long)rec->start,
8560                                 back->full_backref ?
8561                                 "parent" : "root",
8562                                 back->full_backref ?
8563                                 (unsigned long long)parent :
8564                                 (unsigned long long)dback->root,
8565                                 (unsigned long long)dback->owner,
8566                                 (unsigned long long)dback->offset,
8567                                 dback->found_ref);
8568         } else {
8569                 u64 parent;
8570                 struct tree_backref *tback;
8571
8572                 tback = to_tree_backref(back);
8573                 if (back->full_backref)
8574                         parent = tback->parent;
8575                 else
8576                         parent = 0;
8577
8578                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8579                                            rec->start, rec->max_size,
8580                                            parent, tback->root, 0, 0);
8581                 fprintf(stderr, "adding new tree backref on "
8582                         "start %llu len %llu parent %llu root %llu\n",
8583                         rec->start, rec->max_size, parent, tback->root);
8584         }
8585 fail:
8586         btrfs_release_path(path);
8587         return ret;
8588 }
8589
8590 static struct extent_entry *find_entry(struct list_head *entries,
8591                                        u64 bytenr, u64 bytes)
8592 {
8593         struct extent_entry *entry = NULL;
8594
8595         list_for_each_entry(entry, entries, list) {
8596                 if (entry->bytenr == bytenr && entry->bytes == bytes)
8597                         return entry;
8598         }
8599
8600         return NULL;
8601 }
8602
8603 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8604 {
8605         struct extent_entry *entry, *best = NULL, *prev = NULL;
8606
8607         list_for_each_entry(entry, entries, list) {
8608                 /*
8609                  * If there are as many broken entries as entries then we know
8610                  * not to trust this particular entry.
8611                  */
8612                 if (entry->broken == entry->count)
8613                         continue;
8614
8615                 /*
8616                  * Special case, when there are only two entries and 'best' is
8617                  * the first one
8618                  */
8619                 if (!prev) {
8620                         best = entry;
8621                         prev = entry;
8622                         continue;
8623                 }
8624
8625                 /*
8626                  * If our current entry == best then we can't be sure our best
8627                  * is really the best, so we need to keep searching.
8628                  */
8629                 if (best && best->count == entry->count) {
8630                         prev = entry;
8631                         best = NULL;
8632                         continue;
8633                 }
8634
8635                 /* Prev == entry, not good enough, have to keep searching */
8636                 if (!prev->broken && prev->count == entry->count)
8637                         continue;
8638
8639                 if (!best)
8640                         best = (prev->count > entry->count) ? prev : entry;
8641                 else if (best->count < entry->count)
8642                         best = entry;
8643                 prev = entry;
8644         }
8645
8646         return best;
8647 }
8648
8649 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8650                       struct data_backref *dback, struct extent_entry *entry)
8651 {
8652         struct btrfs_trans_handle *trans;
8653         struct btrfs_root *root;
8654         struct btrfs_file_extent_item *fi;
8655         struct extent_buffer *leaf;
8656         struct btrfs_key key;
8657         u64 bytenr, bytes;
8658         int ret, err;
8659
8660         key.objectid = dback->root;
8661         key.type = BTRFS_ROOT_ITEM_KEY;
8662         key.offset = (u64)-1;
8663         root = btrfs_read_fs_root(info, &key);
8664         if (IS_ERR(root)) {
8665                 fprintf(stderr, "Couldn't find root for our ref\n");
8666                 return -EINVAL;
8667         }
8668
8669         /*
8670          * The backref points to the original offset of the extent if it was
8671          * split, so we need to search down to the offset we have and then walk
8672          * forward until we find the backref we're looking for.
8673          */
8674         key.objectid = dback->owner;
8675         key.type = BTRFS_EXTENT_DATA_KEY;
8676         key.offset = dback->offset;
8677         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8678         if (ret < 0) {
8679                 fprintf(stderr, "Error looking up ref %d\n", ret);
8680                 return ret;
8681         }
8682
8683         while (1) {
8684                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8685                         ret = btrfs_next_leaf(root, path);
8686                         if (ret) {
8687                                 fprintf(stderr, "Couldn't find our ref, next\n");
8688                                 return -EINVAL;
8689                         }
8690                 }
8691                 leaf = path->nodes[0];
8692                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8693                 if (key.objectid != dback->owner ||
8694                     key.type != BTRFS_EXTENT_DATA_KEY) {
8695                         fprintf(stderr, "Couldn't find our ref, search\n");
8696                         return -EINVAL;
8697                 }
8698                 fi = btrfs_item_ptr(leaf, path->slots[0],
8699                                     struct btrfs_file_extent_item);
8700                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8701                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8702
8703                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8704                         break;
8705                 path->slots[0]++;
8706         }
8707
8708         btrfs_release_path(path);
8709
8710         trans = btrfs_start_transaction(root, 1);
8711         if (IS_ERR(trans))
8712                 return PTR_ERR(trans);
8713
8714         /*
8715          * Ok we have the key of the file extent we want to fix, now we can cow
8716          * down to the thing and fix it.
8717          */
8718         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8719         if (ret < 0) {
8720                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8721                         key.objectid, key.type, key.offset, ret);
8722                 goto out;
8723         }
8724         if (ret > 0) {
8725                 fprintf(stderr, "Well that's odd, we just found this key "
8726                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8727                         key.offset);
8728                 ret = -EINVAL;
8729                 goto out;
8730         }
8731         leaf = path->nodes[0];
8732         fi = btrfs_item_ptr(leaf, path->slots[0],
8733                             struct btrfs_file_extent_item);
8734
8735         if (btrfs_file_extent_compression(leaf, fi) &&
8736             dback->disk_bytenr != entry->bytenr) {
8737                 fprintf(stderr, "Ref doesn't match the record start and is "
8738                         "compressed, please take a btrfs-image of this file "
8739                         "system and send it to a btrfs developer so they can "
8740                         "complete this functionality for bytenr %Lu\n",
8741                         dback->disk_bytenr);
8742                 ret = -EINVAL;
8743                 goto out;
8744         }
8745
8746         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8747                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8748         } else if (dback->disk_bytenr > entry->bytenr) {
8749                 u64 off_diff, offset;
8750
8751                 off_diff = dback->disk_bytenr - entry->bytenr;
8752                 offset = btrfs_file_extent_offset(leaf, fi);
8753                 if (dback->disk_bytenr + offset +
8754                     btrfs_file_extent_num_bytes(leaf, fi) >
8755                     entry->bytenr + entry->bytes) {
8756                         fprintf(stderr, "Ref is past the entry end, please "
8757                                 "take a btrfs-image of this file system and "
8758                                 "send it to a btrfs developer, ref %Lu\n",
8759                                 dback->disk_bytenr);
8760                         ret = -EINVAL;
8761                         goto out;
8762                 }
8763                 offset += off_diff;
8764                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8765                 btrfs_set_file_extent_offset(leaf, fi, offset);
8766         } else if (dback->disk_bytenr < entry->bytenr) {
8767                 u64 offset;
8768
8769                 offset = btrfs_file_extent_offset(leaf, fi);
8770                 if (dback->disk_bytenr + offset < entry->bytenr) {
8771                         fprintf(stderr, "Ref is before the entry start, please"
8772                                 " take a btrfs-image of this file system and "
8773                                 "send it to a btrfs developer, ref %Lu\n",
8774                                 dback->disk_bytenr);
8775                         ret = -EINVAL;
8776                         goto out;
8777                 }
8778
8779                 offset += dback->disk_bytenr;
8780                 offset -= entry->bytenr;
8781                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8782                 btrfs_set_file_extent_offset(leaf, fi, offset);
8783         }
8784
8785         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8786
8787         /*
8788          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8789          * only do this if we aren't using compression, otherwise it's a
8790          * trickier case.
8791          */
8792         if (!btrfs_file_extent_compression(leaf, fi))
8793                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8794         else
8795                 printf("ram bytes may be wrong?\n");
8796         btrfs_mark_buffer_dirty(leaf);
8797 out:
8798         err = btrfs_commit_transaction(trans, root);
8799         btrfs_release_path(path);
8800         return ret ? ret : err;
8801 }
8802
8803 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8804                            struct extent_record *rec)
8805 {
8806         struct extent_backref *back, *tmp;
8807         struct data_backref *dback;
8808         struct extent_entry *entry, *best = NULL;
8809         LIST_HEAD(entries);
8810         int nr_entries = 0;
8811         int broken_entries = 0;
8812         int ret = 0;
8813         short mismatch = 0;
8814
8815         /*
8816          * Metadata is easy and the backrefs should always agree on bytenr and
8817          * size, if not we've got bigger issues.
8818          */
8819         if (rec->metadata)
8820                 return 0;
8821
8822         rbtree_postorder_for_each_entry_safe(back, tmp,
8823                                              &rec->backref_tree, node) {
8824                 if (back->full_backref || !back->is_data)
8825                         continue;
8826
8827                 dback = to_data_backref(back);
8828
8829                 /*
8830                  * We only pay attention to backrefs that we found a real
8831                  * backref for.
8832                  */
8833                 if (dback->found_ref == 0)
8834                         continue;
8835
8836                 /*
8837                  * For now we only catch when the bytes don't match, not the
8838                  * bytenr.  We can easily do this at the same time, but I want
8839                  * to have a fs image to test on before we just add repair
8840                  * functionality willy-nilly so we know we won't screw up the
8841                  * repair.
8842                  */
8843
8844                 entry = find_entry(&entries, dback->disk_bytenr,
8845                                    dback->bytes);
8846                 if (!entry) {
8847                         entry = malloc(sizeof(struct extent_entry));
8848                         if (!entry) {
8849                                 ret = -ENOMEM;
8850                                 goto out;
8851                         }
8852                         memset(entry, 0, sizeof(*entry));
8853                         entry->bytenr = dback->disk_bytenr;
8854                         entry->bytes = dback->bytes;
8855                         list_add_tail(&entry->list, &entries);
8856                         nr_entries++;
8857                 }
8858
8859                 /*
8860                  * If we only have on entry we may think the entries agree when
8861                  * in reality they don't so we have to do some extra checking.
8862                  */
8863                 if (dback->disk_bytenr != rec->start ||
8864                     dback->bytes != rec->nr || back->broken)
8865                         mismatch = 1;
8866
8867                 if (back->broken) {
8868                         entry->broken++;
8869                         broken_entries++;
8870                 }
8871
8872                 entry->count++;
8873         }
8874
8875         /* Yay all the backrefs agree, carry on good sir */
8876         if (nr_entries <= 1 && !mismatch)
8877                 goto out;
8878
8879         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8880                 "%Lu\n", rec->start);
8881
8882         /*
8883          * First we want to see if the backrefs can agree amongst themselves who
8884          * is right, so figure out which one of the entries has the highest
8885          * count.
8886          */
8887         best = find_most_right_entry(&entries);
8888
8889         /*
8890          * Ok so we may have an even split between what the backrefs think, so
8891          * this is where we use the extent ref to see what it thinks.
8892          */
8893         if (!best) {
8894                 entry = find_entry(&entries, rec->start, rec->nr);
8895                 if (!entry && (!broken_entries || !rec->found_rec)) {
8896                         fprintf(stderr, "Backrefs don't agree with each other "
8897                                 "and extent record doesn't agree with anybody,"
8898                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8899                                 rec->start, rec->nr);
8900                         ret = -EINVAL;
8901                         goto out;
8902                 } else if (!entry) {
8903                         /*
8904                          * Ok our backrefs were broken, we'll assume this is the
8905                          * correct value and add an entry for this range.
8906                          */
8907                         entry = malloc(sizeof(struct extent_entry));
8908                         if (!entry) {
8909                                 ret = -ENOMEM;
8910                                 goto out;
8911                         }
8912                         memset(entry, 0, sizeof(*entry));
8913                         entry->bytenr = rec->start;
8914                         entry->bytes = rec->nr;
8915                         list_add_tail(&entry->list, &entries);
8916                         nr_entries++;
8917                 }
8918                 entry->count++;
8919                 best = find_most_right_entry(&entries);
8920                 if (!best) {
8921                         fprintf(stderr, "Backrefs and extent record evenly "
8922                                 "split on who is right, this is going to "
8923                                 "require user input to fix bytenr %Lu bytes "
8924                                 "%Lu\n", rec->start, rec->nr);
8925                         ret = -EINVAL;
8926                         goto out;
8927                 }
8928         }
8929
8930         /*
8931          * I don't think this can happen currently as we'll abort() if we catch
8932          * this case higher up, but in case somebody removes that we still can't
8933          * deal with it properly here yet, so just bail out of that's the case.
8934          */
8935         if (best->bytenr != rec->start) {
8936                 fprintf(stderr, "Extent start and backref starts don't match, "
8937                         "please use btrfs-image on this file system and send "
8938                         "it to a btrfs developer so they can make fsck fix "
8939                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8940                         rec->start, rec->nr);
8941                 ret = -EINVAL;
8942                 goto out;
8943         }
8944
8945         /*
8946          * Ok great we all agreed on an extent record, let's go find the real
8947          * references and fix up the ones that don't match.
8948          */
8949         rbtree_postorder_for_each_entry_safe(back, tmp,
8950                                              &rec->backref_tree, node) {
8951                 if (back->full_backref || !back->is_data)
8952                         continue;
8953
8954                 dback = to_data_backref(back);
8955
8956                 /*
8957                  * Still ignoring backrefs that don't have a real ref attached
8958                  * to them.
8959                  */
8960                 if (dback->found_ref == 0)
8961                         continue;
8962
8963                 if (dback->bytes == best->bytes &&
8964                     dback->disk_bytenr == best->bytenr)
8965                         continue;
8966
8967                 ret = repair_ref(info, path, dback, best);
8968                 if (ret)
8969                         goto out;
8970         }
8971
8972         /*
8973          * Ok we messed with the actual refs, which means we need to drop our
8974          * entire cache and go back and rescan.  I know this is a huge pain and
8975          * adds a lot of extra work, but it's the only way to be safe.  Once all
8976          * the backrefs agree we may not need to do anything to the extent
8977          * record itself.
8978          */
8979         ret = -EAGAIN;
8980 out:
8981         while (!list_empty(&entries)) {
8982                 entry = list_entry(entries.next, struct extent_entry, list);
8983                 list_del_init(&entry->list);
8984                 free(entry);
8985         }
8986         return ret;
8987 }
8988
8989 static int process_duplicates(struct cache_tree *extent_cache,
8990                               struct extent_record *rec)
8991 {
8992         struct extent_record *good, *tmp;
8993         struct cache_extent *cache;
8994         int ret;
8995
8996         /*
8997          * If we found a extent record for this extent then return, or if we
8998          * have more than one duplicate we are likely going to need to delete
8999          * something.
9000          */
9001         if (rec->found_rec || rec->num_duplicates > 1)
9002                 return 0;
9003
9004         /* Shouldn't happen but just in case */
9005         BUG_ON(!rec->num_duplicates);
9006
9007         /*
9008          * So this happens if we end up with a backref that doesn't match the
9009          * actual extent entry.  So either the backref is bad or the extent
9010          * entry is bad.  Either way we want to have the extent_record actually
9011          * reflect what we found in the extent_tree, so we need to take the
9012          * duplicate out and use that as the extent_record since the only way we
9013          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9014          */
9015         remove_cache_extent(extent_cache, &rec->cache);
9016
9017         good = to_extent_record(rec->dups.next);
9018         list_del_init(&good->list);
9019         INIT_LIST_HEAD(&good->backrefs);
9020         INIT_LIST_HEAD(&good->dups);
9021         good->cache.start = good->start;
9022         good->cache.size = good->nr;
9023         good->content_checked = 0;
9024         good->owner_ref_checked = 0;
9025         good->num_duplicates = 0;
9026         good->refs = rec->refs;
9027         list_splice_init(&rec->backrefs, &good->backrefs);
9028         while (1) {
9029                 cache = lookup_cache_extent(extent_cache, good->start,
9030                                             good->nr);
9031                 if (!cache)
9032                         break;
9033                 tmp = container_of(cache, struct extent_record, cache);
9034
9035                 /*
9036                  * If we find another overlapping extent and it's found_rec is
9037                  * set then it's a duplicate and we need to try and delete
9038                  * something.
9039                  */
9040                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9041                         if (list_empty(&good->list))
9042                                 list_add_tail(&good->list,
9043                                               &duplicate_extents);
9044                         good->num_duplicates += tmp->num_duplicates + 1;
9045                         list_splice_init(&tmp->dups, &good->dups);
9046                         list_del_init(&tmp->list);
9047                         list_add_tail(&tmp->list, &good->dups);
9048                         remove_cache_extent(extent_cache, &tmp->cache);
9049                         continue;
9050                 }
9051
9052                 /*
9053                  * Ok we have another non extent item backed extent rec, so lets
9054                  * just add it to this extent and carry on like we did above.
9055                  */
9056                 good->refs += tmp->refs;
9057                 list_splice_init(&tmp->backrefs, &good->backrefs);
9058                 remove_cache_extent(extent_cache, &tmp->cache);
9059                 free(tmp);
9060         }
9061         ret = insert_cache_extent(extent_cache, &good->cache);
9062         BUG_ON(ret);
9063         free(rec);
9064         return good->num_duplicates ? 0 : 1;
9065 }
9066
9067 static int delete_duplicate_records(struct btrfs_root *root,
9068                                     struct extent_record *rec)
9069 {
9070         struct btrfs_trans_handle *trans;
9071         LIST_HEAD(delete_list);
9072         struct btrfs_path path;
9073         struct extent_record *tmp, *good, *n;
9074         int nr_del = 0;
9075         int ret = 0, err;
9076         struct btrfs_key key;
9077
9078         btrfs_init_path(&path);
9079
9080         good = rec;
9081         /* Find the record that covers all of the duplicates. */
9082         list_for_each_entry(tmp, &rec->dups, list) {
9083                 if (good->start < tmp->start)
9084                         continue;
9085                 if (good->nr > tmp->nr)
9086                         continue;
9087
9088                 if (tmp->start + tmp->nr < good->start + good->nr) {
9089                         fprintf(stderr, "Ok we have overlapping extents that "
9090                                 "aren't completely covered by each other, this "
9091                                 "is going to require more careful thought.  "
9092                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9093                                 tmp->start, tmp->nr, good->start, good->nr);
9094                         abort();
9095                 }
9096                 good = tmp;
9097         }
9098
9099         if (good != rec)
9100                 list_add_tail(&rec->list, &delete_list);
9101
9102         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9103                 if (tmp == good)
9104                         continue;
9105                 list_move_tail(&tmp->list, &delete_list);
9106         }
9107
9108         root = root->fs_info->extent_root;
9109         trans = btrfs_start_transaction(root, 1);
9110         if (IS_ERR(trans)) {
9111                 ret = PTR_ERR(trans);
9112                 goto out;
9113         }
9114
9115         list_for_each_entry(tmp, &delete_list, list) {
9116                 if (tmp->found_rec == 0)
9117                         continue;
9118                 key.objectid = tmp->start;
9119                 key.type = BTRFS_EXTENT_ITEM_KEY;
9120                 key.offset = tmp->nr;
9121
9122                 /* Shouldn't happen but just in case */
9123                 if (tmp->metadata) {
9124                         fprintf(stderr, "Well this shouldn't happen, extent "
9125                                 "record overlaps but is metadata? "
9126                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9127                         abort();
9128                 }
9129
9130                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9131                 if (ret) {
9132                         if (ret > 0)
9133                                 ret = -EINVAL;
9134                         break;
9135                 }
9136                 ret = btrfs_del_item(trans, root, &path);
9137                 if (ret)
9138                         break;
9139                 btrfs_release_path(&path);
9140                 nr_del++;
9141         }
9142         err = btrfs_commit_transaction(trans, root);
9143         if (err && !ret)
9144                 ret = err;
9145 out:
9146         while (!list_empty(&delete_list)) {
9147                 tmp = to_extent_record(delete_list.next);
9148                 list_del_init(&tmp->list);
9149                 if (tmp == rec)
9150                         continue;
9151                 free(tmp);
9152         }
9153
9154         while (!list_empty(&rec->dups)) {
9155                 tmp = to_extent_record(rec->dups.next);
9156                 list_del_init(&tmp->list);
9157                 free(tmp);
9158         }
9159
9160         btrfs_release_path(&path);
9161
9162         if (!ret && !nr_del)
9163                 rec->num_duplicates = 0;
9164
9165         return ret ? ret : nr_del;
9166 }
9167
9168 static int find_possible_backrefs(struct btrfs_fs_info *info,
9169                                   struct btrfs_path *path,
9170                                   struct cache_tree *extent_cache,
9171                                   struct extent_record *rec)
9172 {
9173         struct btrfs_root *root;
9174         struct extent_backref *back, *tmp;
9175         struct data_backref *dback;
9176         struct cache_extent *cache;
9177         struct btrfs_file_extent_item *fi;
9178         struct btrfs_key key;
9179         u64 bytenr, bytes;
9180         int ret;
9181
9182         rbtree_postorder_for_each_entry_safe(back, tmp,
9183                                              &rec->backref_tree, node) {
9184                 /* Don't care about full backrefs (poor unloved backrefs) */
9185                 if (back->full_backref || !back->is_data)
9186                         continue;
9187
9188                 dback = to_data_backref(back);
9189
9190                 /* We found this one, we don't need to do a lookup */
9191                 if (dback->found_ref)
9192                         continue;
9193
9194                 key.objectid = dback->root;
9195                 key.type = BTRFS_ROOT_ITEM_KEY;
9196                 key.offset = (u64)-1;
9197
9198                 root = btrfs_read_fs_root(info, &key);
9199
9200                 /* No root, definitely a bad ref, skip */
9201                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9202                         continue;
9203                 /* Other err, exit */
9204                 if (IS_ERR(root))
9205                         return PTR_ERR(root);
9206
9207                 key.objectid = dback->owner;
9208                 key.type = BTRFS_EXTENT_DATA_KEY;
9209                 key.offset = dback->offset;
9210                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9211                 if (ret) {
9212                         btrfs_release_path(path);
9213                         if (ret < 0)
9214                                 return ret;
9215                         /* Didn't find it, we can carry on */
9216                         ret = 0;
9217                         continue;
9218                 }
9219
9220                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9221                                     struct btrfs_file_extent_item);
9222                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9223                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9224                 btrfs_release_path(path);
9225                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9226                 if (cache) {
9227                         struct extent_record *tmp;
9228                         tmp = container_of(cache, struct extent_record, cache);
9229
9230                         /*
9231                          * If we found an extent record for the bytenr for this
9232                          * particular backref then we can't add it to our
9233                          * current extent record.  We only want to add backrefs
9234                          * that don't have a corresponding extent item in the
9235                          * extent tree since they likely belong to this record
9236                          * and we need to fix it if it doesn't match bytenrs.
9237                          */
9238                         if  (tmp->found_rec)
9239                                 continue;
9240                 }
9241
9242                 dback->found_ref += 1;
9243                 dback->disk_bytenr = bytenr;
9244                 dback->bytes = bytes;
9245
9246                 /*
9247                  * Set this so the verify backref code knows not to trust the
9248                  * values in this backref.
9249                  */
9250                 back->broken = 1;
9251         }
9252
9253         return 0;
9254 }
9255
9256 /*
9257  * Record orphan data ref into corresponding root.
9258  *
9259  * Return 0 if the extent item contains data ref and recorded.
9260  * Return 1 if the extent item contains no useful data ref
9261  *   On that case, it may contains only shared_dataref or metadata backref
9262  *   or the file extent exists(this should be handled by the extent bytenr
9263  *   recovery routine)
9264  * Return <0 if something goes wrong.
9265  */
9266 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9267                                       struct extent_record *rec)
9268 {
9269         struct btrfs_key key;
9270         struct btrfs_root *dest_root;
9271         struct extent_backref *back, *tmp;
9272         struct data_backref *dback;
9273         struct orphan_data_extent *orphan;
9274         struct btrfs_path path;
9275         int recorded_data_ref = 0;
9276         int ret = 0;
9277
9278         if (rec->metadata)
9279                 return 1;
9280         btrfs_init_path(&path);
9281         rbtree_postorder_for_each_entry_safe(back, tmp,
9282                                              &rec->backref_tree, node) {
9283                 if (back->full_backref || !back->is_data ||
9284                     !back->found_extent_tree)
9285                         continue;
9286                 dback = to_data_backref(back);
9287                 if (dback->found_ref)
9288                         continue;
9289                 key.objectid = dback->root;
9290                 key.type = BTRFS_ROOT_ITEM_KEY;
9291                 key.offset = (u64)-1;
9292
9293                 dest_root = btrfs_read_fs_root(fs_info, &key);
9294
9295                 /* For non-exist root we just skip it */
9296                 if (IS_ERR(dest_root) || !dest_root)
9297                         continue;
9298
9299                 key.objectid = dback->owner;
9300                 key.type = BTRFS_EXTENT_DATA_KEY;
9301                 key.offset = dback->offset;
9302
9303                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9304                 btrfs_release_path(&path);
9305                 /*
9306                  * For ret < 0, it's OK since the fs-tree may be corrupted,
9307                  * we need to record it for inode/file extent rebuild.
9308                  * For ret > 0, we record it only for file extent rebuild.
9309                  * For ret == 0, the file extent exists but only bytenr
9310                  * mismatch, let the original bytenr fix routine to handle,
9311                  * don't record it.
9312                  */
9313                 if (ret == 0)
9314                         continue;
9315                 ret = 0;
9316                 orphan = malloc(sizeof(*orphan));
9317                 if (!orphan) {
9318                         ret = -ENOMEM;
9319                         goto out;
9320                 }
9321                 INIT_LIST_HEAD(&orphan->list);
9322                 orphan->root = dback->root;
9323                 orphan->objectid = dback->owner;
9324                 orphan->offset = dback->offset;
9325                 orphan->disk_bytenr = rec->cache.start;
9326                 orphan->disk_len = rec->cache.size;
9327                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9328                 recorded_data_ref = 1;
9329         }
9330 out:
9331         btrfs_release_path(&path);
9332         if (!ret)
9333                 return !recorded_data_ref;
9334         else
9335                 return ret;
9336 }
9337
9338 /*
9339  * when an incorrect extent item is found, this will delete
9340  * all of the existing entries for it and recreate them
9341  * based on what the tree scan found.
9342  */
9343 static int fixup_extent_refs(struct btrfs_fs_info *info,
9344                              struct cache_tree *extent_cache,
9345                              struct extent_record *rec)
9346 {
9347         struct btrfs_trans_handle *trans = NULL;
9348         int ret;
9349         struct btrfs_path path;
9350         struct cache_extent *cache;
9351         struct extent_backref *back, *tmp;
9352         int allocated = 0;
9353         u64 flags = 0;
9354
9355         if (rec->flag_block_full_backref)
9356                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9357
9358         btrfs_init_path(&path);
9359         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9360                 /*
9361                  * Sometimes the backrefs themselves are so broken they don't
9362                  * get attached to any meaningful rec, so first go back and
9363                  * check any of our backrefs that we couldn't find and throw
9364                  * them into the list if we find the backref so that
9365                  * verify_backrefs can figure out what to do.
9366                  */
9367                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9368                 if (ret < 0)
9369                         goto out;
9370         }
9371
9372         /* step one, make sure all of the backrefs agree */
9373         ret = verify_backrefs(info, &path, rec);
9374         if (ret < 0)
9375                 goto out;
9376
9377         trans = btrfs_start_transaction(info->extent_root, 1);
9378         if (IS_ERR(trans)) {
9379                 ret = PTR_ERR(trans);
9380                 goto out;
9381         }
9382
9383         /* step two, delete all the existing records */
9384         ret = delete_extent_records(trans, info->extent_root, &path,
9385                                     rec->start);
9386
9387         if (ret < 0)
9388                 goto out;
9389
9390         /* was this block corrupt?  If so, don't add references to it */
9391         cache = lookup_cache_extent(info->corrupt_blocks,
9392                                     rec->start, rec->max_size);
9393         if (cache) {
9394                 ret = 0;
9395                 goto out;
9396         }
9397
9398         /* step three, recreate all the refs we did find */
9399         rbtree_postorder_for_each_entry_safe(back, tmp,
9400                                              &rec->backref_tree, node) {
9401                 /*
9402                  * if we didn't find any references, don't create a
9403                  * new extent record
9404                  */
9405                 if (!back->found_ref)
9406                         continue;
9407
9408                 rec->bad_full_backref = 0;
9409                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9410                 allocated = 1;
9411
9412                 if (ret)
9413                         goto out;
9414         }
9415 out:
9416         if (trans) {
9417                 int err = btrfs_commit_transaction(trans, info->extent_root);
9418                 if (!ret)
9419                         ret = err;
9420         }
9421
9422         if (!ret)
9423                 fprintf(stderr, "Repaired extent references for %llu\n",
9424                                 (unsigned long long)rec->start);
9425
9426         btrfs_release_path(&path);
9427         return ret;
9428 }
9429
9430 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9431                               struct extent_record *rec)
9432 {
9433         struct btrfs_trans_handle *trans;
9434         struct btrfs_root *root = fs_info->extent_root;
9435         struct btrfs_path path;
9436         struct btrfs_extent_item *ei;
9437         struct btrfs_key key;
9438         u64 flags;
9439         int ret = 0;
9440
9441         key.objectid = rec->start;
9442         if (rec->metadata) {
9443                 key.type = BTRFS_METADATA_ITEM_KEY;
9444                 key.offset = rec->info_level;
9445         } else {
9446                 key.type = BTRFS_EXTENT_ITEM_KEY;
9447                 key.offset = rec->max_size;
9448         }
9449
9450         trans = btrfs_start_transaction(root, 0);
9451         if (IS_ERR(trans))
9452                 return PTR_ERR(trans);
9453
9454         btrfs_init_path(&path);
9455         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9456         if (ret < 0) {
9457                 btrfs_release_path(&path);
9458                 btrfs_commit_transaction(trans, root);
9459                 return ret;
9460         } else if (ret) {
9461                 fprintf(stderr, "Didn't find extent for %llu\n",
9462                         (unsigned long long)rec->start);
9463                 btrfs_release_path(&path);
9464                 btrfs_commit_transaction(trans, root);
9465                 return -ENOENT;
9466         }
9467
9468         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9469                             struct btrfs_extent_item);
9470         flags = btrfs_extent_flags(path.nodes[0], ei);
9471         if (rec->flag_block_full_backref) {
9472                 fprintf(stderr, "setting full backref on %llu\n",
9473                         (unsigned long long)key.objectid);
9474                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9475         } else {
9476                 fprintf(stderr, "clearing full backref on %llu\n",
9477                         (unsigned long long)key.objectid);
9478                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9479         }
9480         btrfs_set_extent_flags(path.nodes[0], ei, flags);
9481         btrfs_mark_buffer_dirty(path.nodes[0]);
9482         btrfs_release_path(&path);
9483         ret = btrfs_commit_transaction(trans, root);
9484         if (!ret)
9485                 fprintf(stderr, "Repaired extent flags for %llu\n",
9486                                 (unsigned long long)rec->start);
9487
9488         return ret;
9489 }
9490
9491 /* right now we only prune from the extent allocation tree */
9492 static int prune_one_block(struct btrfs_trans_handle *trans,
9493                            struct btrfs_fs_info *info,
9494                            struct btrfs_corrupt_block *corrupt)
9495 {
9496         int ret;
9497         struct btrfs_path path;
9498         struct extent_buffer *eb;
9499         u64 found;
9500         int slot;
9501         int nritems;
9502         int level = corrupt->level + 1;
9503
9504         btrfs_init_path(&path);
9505 again:
9506         /* we want to stop at the parent to our busted block */
9507         path.lowest_level = level;
9508
9509         ret = btrfs_search_slot(trans, info->extent_root,
9510                                 &corrupt->key, &path, -1, 1);
9511
9512         if (ret < 0)
9513                 goto out;
9514
9515         eb = path.nodes[level];
9516         if (!eb) {
9517                 ret = -ENOENT;
9518                 goto out;
9519         }
9520
9521         /*
9522          * hopefully the search gave us the block we want to prune,
9523          * lets try that first
9524          */
9525         slot = path.slots[level];
9526         found =  btrfs_node_blockptr(eb, slot);
9527         if (found == corrupt->cache.start)
9528                 goto del_ptr;
9529
9530         nritems = btrfs_header_nritems(eb);
9531
9532         /* the search failed, lets scan this node and hope we find it */
9533         for (slot = 0; slot < nritems; slot++) {
9534                 found =  btrfs_node_blockptr(eb, slot);
9535                 if (found == corrupt->cache.start)
9536                         goto del_ptr;
9537         }
9538         /*
9539          * we couldn't find the bad block.  TODO, search all the nodes for pointers
9540          * to this block
9541          */
9542         if (eb == info->extent_root->node) {
9543                 ret = -ENOENT;
9544                 goto out;
9545         } else {
9546                 level++;
9547                 btrfs_release_path(&path);
9548                 goto again;
9549         }
9550
9551 del_ptr:
9552         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9553         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9554
9555 out:
9556         btrfs_release_path(&path);
9557         return ret;
9558 }
9559
9560 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9561 {
9562         struct btrfs_trans_handle *trans = NULL;
9563         struct cache_extent *cache;
9564         struct btrfs_corrupt_block *corrupt;
9565
9566         while (1) {
9567                 cache = search_cache_extent(info->corrupt_blocks, 0);
9568                 if (!cache)
9569                         break;
9570                 if (!trans) {
9571                         trans = btrfs_start_transaction(info->extent_root, 1);
9572                         if (IS_ERR(trans))
9573                                 return PTR_ERR(trans);
9574                 }
9575                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9576                 prune_one_block(trans, info, corrupt);
9577                 remove_cache_extent(info->corrupt_blocks, cache);
9578         }
9579         if (trans)
9580                 return btrfs_commit_transaction(trans, info->extent_root);
9581         return 0;
9582 }
9583
9584 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9585 {
9586         struct btrfs_block_group_cache *cache;
9587         u64 start, end;
9588         int ret;
9589
9590         while (1) {
9591                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9592                                             &start, &end, EXTENT_DIRTY);
9593                 if (ret)
9594                         break;
9595                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9596         }
9597
9598         start = 0;
9599         while (1) {
9600                 cache = btrfs_lookup_first_block_group(fs_info, start);
9601                 if (!cache)
9602                         break;
9603                 if (cache->cached)
9604                         cache->cached = 0;
9605                 start = cache->key.objectid + cache->key.offset;
9606         }
9607 }
9608
9609 static int check_extent_refs(struct btrfs_root *root,
9610                              struct cache_tree *extent_cache)
9611 {
9612         struct extent_record *rec;
9613         struct cache_extent *cache;
9614         int ret = 0;
9615         int had_dups = 0;
9616
9617         if (repair) {
9618                 /*
9619                  * if we're doing a repair, we have to make sure
9620                  * we don't allocate from the problem extents.
9621                  * In the worst case, this will be all the
9622                  * extents in the FS
9623                  */
9624                 cache = search_cache_extent(extent_cache, 0);
9625                 while(cache) {
9626                         rec = container_of(cache, struct extent_record, cache);
9627                         set_extent_dirty(root->fs_info->excluded_extents,
9628                                          rec->start,
9629                                          rec->start + rec->max_size - 1);
9630                         cache = next_cache_extent(cache);
9631                 }
9632
9633                 /* pin down all the corrupted blocks too */
9634                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9635                 while(cache) {
9636                         set_extent_dirty(root->fs_info->excluded_extents,
9637                                          cache->start,
9638                                          cache->start + cache->size - 1);
9639                         cache = next_cache_extent(cache);
9640                 }
9641                 prune_corrupt_blocks(root->fs_info);
9642                 reset_cached_block_groups(root->fs_info);
9643         }
9644
9645         reset_cached_block_groups(root->fs_info);
9646
9647         /*
9648          * We need to delete any duplicate entries we find first otherwise we
9649          * could mess up the extent tree when we have backrefs that actually
9650          * belong to a different extent item and not the weird duplicate one.
9651          */
9652         while (repair && !list_empty(&duplicate_extents)) {
9653                 rec = to_extent_record(duplicate_extents.next);
9654                 list_del_init(&rec->list);
9655
9656                 /* Sometimes we can find a backref before we find an actual
9657                  * extent, so we need to process it a little bit to see if there
9658                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9659                  * if this is a backref screwup.  If we need to delete stuff
9660                  * process_duplicates() will return 0, otherwise it will return
9661                  * 1 and we
9662                  */
9663                 if (process_duplicates(extent_cache, rec))
9664                         continue;
9665                 ret = delete_duplicate_records(root, rec);
9666                 if (ret < 0)
9667                         return ret;
9668                 /*
9669                  * delete_duplicate_records will return the number of entries
9670                  * deleted, so if it's greater than 0 then we know we actually
9671                  * did something and we need to remove.
9672                  */
9673                 if (ret)
9674                         had_dups = 1;
9675         }
9676
9677         if (had_dups)
9678                 return -EAGAIN;
9679
9680         while(1) {
9681                 int cur_err = 0;
9682                 int fix = 0;
9683
9684                 cache = search_cache_extent(extent_cache, 0);
9685                 if (!cache)
9686                         break;
9687                 rec = container_of(cache, struct extent_record, cache);
9688                 if (rec->num_duplicates) {
9689                         fprintf(stderr, "extent item %llu has multiple extent "
9690                                 "items\n", (unsigned long long)rec->start);
9691                         cur_err = 1;
9692                 }
9693
9694                 if (rec->refs != rec->extent_item_refs) {
9695                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
9696                                 (unsigned long long)rec->start,
9697                                 (unsigned long long)rec->nr);
9698                         fprintf(stderr, "extent item %llu, found %llu\n",
9699                                 (unsigned long long)rec->extent_item_refs,
9700                                 (unsigned long long)rec->refs);
9701                         ret = record_orphan_data_extents(root->fs_info, rec);
9702                         if (ret < 0)
9703                                 goto repair_abort;
9704                         fix = ret;
9705                         cur_err = 1;
9706                 }
9707                 if (all_backpointers_checked(rec, 1)) {
9708                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9709                                 (unsigned long long)rec->start,
9710                                 (unsigned long long)rec->nr);
9711                         fix = 1;
9712                         cur_err = 1;
9713                 }
9714                 if (!rec->owner_ref_checked) {
9715                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9716                                 (unsigned long long)rec->start,
9717                                 (unsigned long long)rec->nr);
9718                         fix = 1;
9719                         cur_err = 1;
9720                 }
9721
9722                 if (repair && fix) {
9723                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9724                         if (ret)
9725                                 goto repair_abort;
9726                 }
9727
9728
9729                 if (rec->bad_full_backref) {
9730                         fprintf(stderr, "bad full backref, on [%llu]\n",
9731                                 (unsigned long long)rec->start);
9732                         if (repair) {
9733                                 ret = fixup_extent_flags(root->fs_info, rec);
9734                                 if (ret)
9735                                         goto repair_abort;
9736                                 fix = 1;
9737                         }
9738                         cur_err = 1;
9739                 }
9740                 /*
9741                  * Although it's not a extent ref's problem, we reuse this
9742                  * routine for error reporting.
9743                  * No repair function yet.
9744                  */
9745                 if (rec->crossing_stripes) {
9746                         fprintf(stderr,
9747                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9748                                 rec->start, rec->start + rec->max_size);
9749                         cur_err = 1;
9750                 }
9751
9752                 if (rec->wrong_chunk_type) {
9753                         fprintf(stderr,
9754                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9755                                 rec->start, rec->start + rec->max_size);
9756                         cur_err = 1;
9757                 }
9758
9759                 remove_cache_extent(extent_cache, cache);
9760                 free_all_extent_backrefs(rec);
9761                 if (!init_extent_tree && repair && (!cur_err || fix))
9762                         clear_extent_dirty(root->fs_info->excluded_extents,
9763                                            rec->start,
9764                                            rec->start + rec->max_size - 1);
9765                 free(rec);
9766         }
9767 repair_abort:
9768         if (repair) {
9769                 if (ret && ret != -EAGAIN) {
9770                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9771                         exit(1);
9772                 } else if (!ret) {
9773                         struct btrfs_trans_handle *trans;
9774
9775                         root = root->fs_info->extent_root;
9776                         trans = btrfs_start_transaction(root, 1);
9777                         if (IS_ERR(trans)) {
9778                                 ret = PTR_ERR(trans);
9779                                 goto repair_abort;
9780                         }
9781
9782                         ret = btrfs_fix_block_accounting(trans, root);
9783                         if (ret)
9784                                 goto repair_abort;
9785                         ret = btrfs_commit_transaction(trans, root);
9786                         if (ret)
9787                                 goto repair_abort;
9788                 }
9789                 return ret;
9790         }
9791         return 0;
9792 }
9793
9794 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9795 {
9796         u64 stripe_size;
9797
9798         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9799                 stripe_size = length;
9800                 stripe_size /= num_stripes;
9801         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9802                 stripe_size = length * 2;
9803                 stripe_size /= num_stripes;
9804         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9805                 stripe_size = length;
9806                 stripe_size /= (num_stripes - 1);
9807         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9808                 stripe_size = length;
9809                 stripe_size /= (num_stripes - 2);
9810         } else {
9811                 stripe_size = length;
9812         }
9813         return stripe_size;
9814 }
9815
9816 /*
9817  * Check the chunk with its block group/dev list ref:
9818  * Return 0 if all refs seems valid.
9819  * Return 1 if part of refs seems valid, need later check for rebuild ref
9820  * like missing block group and needs to search extent tree to rebuild them.
9821  * Return -1 if essential refs are missing and unable to rebuild.
9822  */
9823 static int check_chunk_refs(struct chunk_record *chunk_rec,
9824                             struct block_group_tree *block_group_cache,
9825                             struct device_extent_tree *dev_extent_cache,
9826                             int silent)
9827 {
9828         struct cache_extent *block_group_item;
9829         struct block_group_record *block_group_rec;
9830         struct cache_extent *dev_extent_item;
9831         struct device_extent_record *dev_extent_rec;
9832         u64 devid;
9833         u64 offset;
9834         u64 length;
9835         int metadump_v2 = 0;
9836         int i;
9837         int ret = 0;
9838
9839         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9840                                                chunk_rec->offset,
9841                                                chunk_rec->length);
9842         if (block_group_item) {
9843                 block_group_rec = container_of(block_group_item,
9844                                                struct block_group_record,
9845                                                cache);
9846                 if (chunk_rec->length != block_group_rec->offset ||
9847                     chunk_rec->offset != block_group_rec->objectid ||
9848                     (!metadump_v2 &&
9849                      chunk_rec->type_flags != block_group_rec->flags)) {
9850                         if (!silent)
9851                                 fprintf(stderr,
9852                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9853                                         chunk_rec->objectid,
9854                                         chunk_rec->type,
9855                                         chunk_rec->offset,
9856                                         chunk_rec->length,
9857                                         chunk_rec->offset,
9858                                         chunk_rec->type_flags,
9859                                         block_group_rec->objectid,
9860                                         block_group_rec->type,
9861                                         block_group_rec->offset,
9862                                         block_group_rec->offset,
9863                                         block_group_rec->objectid,
9864                                         block_group_rec->flags);
9865                         ret = -1;
9866                 } else {
9867                         list_del_init(&block_group_rec->list);
9868                         chunk_rec->bg_rec = block_group_rec;
9869                 }
9870         } else {
9871                 if (!silent)
9872                         fprintf(stderr,
9873                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9874                                 chunk_rec->objectid,
9875                                 chunk_rec->type,
9876                                 chunk_rec->offset,
9877                                 chunk_rec->length,
9878                                 chunk_rec->offset,
9879                                 chunk_rec->type_flags);
9880                 ret = 1;
9881         }
9882
9883         if (metadump_v2)
9884                 return ret;
9885
9886         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9887                                     chunk_rec->num_stripes);
9888         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9889                 devid = chunk_rec->stripes[i].devid;
9890                 offset = chunk_rec->stripes[i].offset;
9891                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9892                                                        devid, offset, length);
9893                 if (dev_extent_item) {
9894                         dev_extent_rec = container_of(dev_extent_item,
9895                                                 struct device_extent_record,
9896                                                 cache);
9897                         if (dev_extent_rec->objectid != devid ||
9898                             dev_extent_rec->offset != offset ||
9899                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9900                             dev_extent_rec->length != length) {
9901                                 if (!silent)
9902                                         fprintf(stderr,
9903                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9904                                                 chunk_rec->objectid,
9905                                                 chunk_rec->type,
9906                                                 chunk_rec->offset,
9907                                                 chunk_rec->stripes[i].devid,
9908                                                 chunk_rec->stripes[i].offset,
9909                                                 dev_extent_rec->objectid,
9910                                                 dev_extent_rec->offset,
9911                                                 dev_extent_rec->length);
9912                                 ret = -1;
9913                         } else {
9914                                 list_move(&dev_extent_rec->chunk_list,
9915                                           &chunk_rec->dextents);
9916                         }
9917                 } else {
9918                         if (!silent)
9919                                 fprintf(stderr,
9920                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9921                                         chunk_rec->objectid,
9922                                         chunk_rec->type,
9923                                         chunk_rec->offset,
9924                                         chunk_rec->stripes[i].devid,
9925                                         chunk_rec->stripes[i].offset);
9926                         ret = -1;
9927                 }
9928         }
9929         return ret;
9930 }
9931
9932 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9933 int check_chunks(struct cache_tree *chunk_cache,
9934                  struct block_group_tree *block_group_cache,
9935                  struct device_extent_tree *dev_extent_cache,
9936                  struct list_head *good, struct list_head *bad,
9937                  struct list_head *rebuild, int silent)
9938 {
9939         struct cache_extent *chunk_item;
9940         struct chunk_record *chunk_rec;
9941         struct block_group_record *bg_rec;
9942         struct device_extent_record *dext_rec;
9943         int err;
9944         int ret = 0;
9945
9946         chunk_item = first_cache_extent(chunk_cache);
9947         while (chunk_item) {
9948                 chunk_rec = container_of(chunk_item, struct chunk_record,
9949                                          cache);
9950                 err = check_chunk_refs(chunk_rec, block_group_cache,
9951                                        dev_extent_cache, silent);
9952                 if (err < 0)
9953                         ret = err;
9954                 if (err == 0 && good)
9955                         list_add_tail(&chunk_rec->list, good);
9956                 if (err > 0 && rebuild)
9957                         list_add_tail(&chunk_rec->list, rebuild);
9958                 if (err < 0 && bad)
9959                         list_add_tail(&chunk_rec->list, bad);
9960                 chunk_item = next_cache_extent(chunk_item);
9961         }
9962
9963         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9964                 if (!silent)
9965                         fprintf(stderr,
9966                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9967                                 bg_rec->objectid,
9968                                 bg_rec->offset,
9969                                 bg_rec->flags);
9970                 if (!ret)
9971                         ret = 1;
9972         }
9973
9974         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9975                             chunk_list) {
9976                 if (!silent)
9977                         fprintf(stderr,
9978                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9979                                 dext_rec->objectid,
9980                                 dext_rec->offset,
9981                                 dext_rec->length);
9982                 if (!ret)
9983                         ret = 1;
9984         }
9985         return ret;
9986 }
9987
9988
9989 static int check_device_used(struct device_record *dev_rec,
9990                              struct device_extent_tree *dext_cache)
9991 {
9992         struct cache_extent *cache;
9993         struct device_extent_record *dev_extent_rec;
9994         u64 total_byte = 0;
9995
9996         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9997         while (cache) {
9998                 dev_extent_rec = container_of(cache,
9999                                               struct device_extent_record,
10000                                               cache);
10001                 if (dev_extent_rec->objectid != dev_rec->devid)
10002                         break;
10003
10004                 list_del_init(&dev_extent_rec->device_list);
10005                 total_byte += dev_extent_rec->length;
10006                 cache = next_cache_extent(cache);
10007         }
10008
10009         if (total_byte != dev_rec->byte_used) {
10010                 fprintf(stderr,
10011                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10012                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10013                         dev_rec->type, dev_rec->offset);
10014                 return -1;
10015         } else {
10016                 return 0;
10017         }
10018 }
10019
10020 /* check btrfs_dev_item -> btrfs_dev_extent */
10021 static int check_devices(struct rb_root *dev_cache,
10022                          struct device_extent_tree *dev_extent_cache)
10023 {
10024         struct rb_node *dev_node;
10025         struct device_record *dev_rec;
10026         struct device_extent_record *dext_rec;
10027         int err;
10028         int ret = 0;
10029
10030         dev_node = rb_first(dev_cache);
10031         while (dev_node) {
10032                 dev_rec = container_of(dev_node, struct device_record, node);
10033                 err = check_device_used(dev_rec, dev_extent_cache);
10034                 if (err)
10035                         ret = err;
10036
10037                 dev_node = rb_next(dev_node);
10038         }
10039         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10040                             device_list) {
10041                 fprintf(stderr,
10042                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10043                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
10044                 if (!ret)
10045                         ret = 1;
10046         }
10047         return ret;
10048 }
10049
10050 static int add_root_item_to_list(struct list_head *head,
10051                                   u64 objectid, u64 bytenr, u64 last_snapshot,
10052                                   u8 level, u8 drop_level,
10053                                   struct btrfs_key *drop_key)
10054 {
10055
10056         struct root_item_record *ri_rec;
10057         ri_rec = malloc(sizeof(*ri_rec));
10058         if (!ri_rec)
10059                 return -ENOMEM;
10060         ri_rec->bytenr = bytenr;
10061         ri_rec->objectid = objectid;
10062         ri_rec->level = level;
10063         ri_rec->drop_level = drop_level;
10064         ri_rec->last_snapshot = last_snapshot;
10065         if (drop_key)
10066                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10067         list_add_tail(&ri_rec->list, head);
10068
10069         return 0;
10070 }
10071
10072 static void free_root_item_list(struct list_head *list)
10073 {
10074         struct root_item_record *ri_rec;
10075
10076         while (!list_empty(list)) {
10077                 ri_rec = list_first_entry(list, struct root_item_record,
10078                                           list);
10079                 list_del_init(&ri_rec->list);
10080                 free(ri_rec);
10081         }
10082 }
10083
10084 static int deal_root_from_list(struct list_head *list,
10085                                struct btrfs_root *root,
10086                                struct block_info *bits,
10087                                int bits_nr,
10088                                struct cache_tree *pending,
10089                                struct cache_tree *seen,
10090                                struct cache_tree *reada,
10091                                struct cache_tree *nodes,
10092                                struct cache_tree *extent_cache,
10093                                struct cache_tree *chunk_cache,
10094                                struct rb_root *dev_cache,
10095                                struct block_group_tree *block_group_cache,
10096                                struct device_extent_tree *dev_extent_cache)
10097 {
10098         int ret = 0;
10099         u64 last;
10100
10101         while (!list_empty(list)) {
10102                 struct root_item_record *rec;
10103                 struct extent_buffer *buf;
10104                 rec = list_entry(list->next,
10105                                  struct root_item_record, list);
10106                 last = 0;
10107                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10108                 if (!extent_buffer_uptodate(buf)) {
10109                         free_extent_buffer(buf);
10110                         ret = -EIO;
10111                         break;
10112                 }
10113                 ret = add_root_to_pending(buf, extent_cache, pending,
10114                                     seen, nodes, rec->objectid);
10115                 if (ret < 0)
10116                         break;
10117                 /*
10118                  * To rebuild extent tree, we need deal with snapshot
10119                  * one by one, otherwise we deal with node firstly which
10120                  * can maximize readahead.
10121                  */
10122                 while (1) {
10123                         ret = run_next_block(root, bits, bits_nr, &last,
10124                                              pending, seen, reada, nodes,
10125                                              extent_cache, chunk_cache,
10126                                              dev_cache, block_group_cache,
10127                                              dev_extent_cache, rec);
10128                         if (ret != 0)
10129                                 break;
10130                 }
10131                 free_extent_buffer(buf);
10132                 list_del(&rec->list);
10133                 free(rec);
10134                 if (ret < 0)
10135                         break;
10136         }
10137         while (ret >= 0) {
10138                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10139                                      reada, nodes, extent_cache, chunk_cache,
10140                                      dev_cache, block_group_cache,
10141                                      dev_extent_cache, NULL);
10142                 if (ret != 0) {
10143                         if (ret > 0)
10144                                 ret = 0;
10145                         break;
10146                 }
10147         }
10148         return ret;
10149 }
10150
10151 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10152 {
10153         struct rb_root dev_cache;
10154         struct cache_tree chunk_cache;
10155         struct block_group_tree block_group_cache;
10156         struct device_extent_tree dev_extent_cache;
10157         struct cache_tree extent_cache;
10158         struct cache_tree seen;
10159         struct cache_tree pending;
10160         struct cache_tree reada;
10161         struct cache_tree nodes;
10162         struct extent_io_tree excluded_extents;
10163         struct cache_tree corrupt_blocks;
10164         struct btrfs_path path;
10165         struct btrfs_key key;
10166         struct btrfs_key found_key;
10167         int ret, err = 0;
10168         struct block_info *bits;
10169         int bits_nr;
10170         struct extent_buffer *leaf;
10171         int slot;
10172         struct btrfs_root_item ri;
10173         struct list_head dropping_trees;
10174         struct list_head normal_trees;
10175         struct btrfs_root *root1;
10176         struct btrfs_root *root;
10177         u64 objectid;
10178         u8 level;
10179
10180         root = fs_info->fs_root;
10181         dev_cache = RB_ROOT;
10182         cache_tree_init(&chunk_cache);
10183         block_group_tree_init(&block_group_cache);
10184         device_extent_tree_init(&dev_extent_cache);
10185
10186         cache_tree_init(&extent_cache);
10187         cache_tree_init(&seen);
10188         cache_tree_init(&pending);
10189         cache_tree_init(&nodes);
10190         cache_tree_init(&reada);
10191         cache_tree_init(&corrupt_blocks);
10192         extent_io_tree_init(&excluded_extents);
10193         INIT_LIST_HEAD(&dropping_trees);
10194         INIT_LIST_HEAD(&normal_trees);
10195
10196         if (repair) {
10197                 fs_info->excluded_extents = &excluded_extents;
10198                 fs_info->fsck_extent_cache = &extent_cache;
10199                 fs_info->free_extent_hook = free_extent_hook;
10200                 fs_info->corrupt_blocks = &corrupt_blocks;
10201         }
10202
10203         bits_nr = 1024;
10204         bits = malloc(bits_nr * sizeof(struct block_info));
10205         if (!bits) {
10206                 perror("malloc");
10207                 exit(1);
10208         }
10209
10210         if (ctx.progress_enabled) {
10211                 ctx.tp = TASK_EXTENTS;
10212                 task_start(ctx.info);
10213         }
10214
10215 again:
10216         root1 = fs_info->tree_root;
10217         level = btrfs_header_level(root1->node);
10218         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10219                                     root1->node->start, 0, level, 0, NULL);
10220         if (ret < 0)
10221                 goto out;
10222         root1 = fs_info->chunk_root;
10223         level = btrfs_header_level(root1->node);
10224         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10225                                     root1->node->start, 0, level, 0, NULL);
10226         if (ret < 0)
10227                 goto out;
10228         btrfs_init_path(&path);
10229         key.offset = 0;
10230         key.objectid = 0;
10231         key.type = BTRFS_ROOT_ITEM_KEY;
10232         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10233         if (ret < 0)
10234                 goto out;
10235         while(1) {
10236                 leaf = path.nodes[0];
10237                 slot = path.slots[0];
10238                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10239                         ret = btrfs_next_leaf(root, &path);
10240                         if (ret != 0)
10241                                 break;
10242                         leaf = path.nodes[0];
10243                         slot = path.slots[0];
10244                 }
10245                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10246                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10247                         unsigned long offset;
10248                         u64 last_snapshot;
10249
10250                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10251                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10252                         last_snapshot = btrfs_root_last_snapshot(&ri);
10253                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10254                                 level = btrfs_root_level(&ri);
10255                                 ret = add_root_item_to_list(&normal_trees,
10256                                                 found_key.objectid,
10257                                                 btrfs_root_bytenr(&ri),
10258                                                 last_snapshot, level,
10259                                                 0, NULL);
10260                                 if (ret < 0)
10261                                         goto out;
10262                         } else {
10263                                 level = btrfs_root_level(&ri);
10264                                 objectid = found_key.objectid;
10265                                 btrfs_disk_key_to_cpu(&found_key,
10266                                                       &ri.drop_progress);
10267                                 ret = add_root_item_to_list(&dropping_trees,
10268                                                 objectid,
10269                                                 btrfs_root_bytenr(&ri),
10270                                                 last_snapshot, level,
10271                                                 ri.drop_level, &found_key);
10272                                 if (ret < 0)
10273                                         goto out;
10274                         }
10275                 }
10276                 path.slots[0]++;
10277         }
10278         btrfs_release_path(&path);
10279
10280         /*
10281          * check_block can return -EAGAIN if it fixes something, please keep
10282          * this in mind when dealing with return values from these functions, if
10283          * we get -EAGAIN we want to fall through and restart the loop.
10284          */
10285         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10286                                   &seen, &reada, &nodes, &extent_cache,
10287                                   &chunk_cache, &dev_cache, &block_group_cache,
10288                                   &dev_extent_cache);
10289         if (ret < 0) {
10290                 if (ret == -EAGAIN)
10291                         goto loop;
10292                 goto out;
10293         }
10294         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10295                                   &pending, &seen, &reada, &nodes,
10296                                   &extent_cache, &chunk_cache, &dev_cache,
10297                                   &block_group_cache, &dev_extent_cache);
10298         if (ret < 0) {
10299                 if (ret == -EAGAIN)
10300                         goto loop;
10301                 goto out;
10302         }
10303
10304         ret = check_chunks(&chunk_cache, &block_group_cache,
10305                            &dev_extent_cache, NULL, NULL, NULL, 0);
10306         if (ret) {
10307                 if (ret == -EAGAIN)
10308                         goto loop;
10309                 err = ret;
10310         }
10311
10312         ret = check_extent_refs(root, &extent_cache);
10313         if (ret < 0) {
10314                 if (ret == -EAGAIN)
10315                         goto loop;
10316                 goto out;
10317         }
10318
10319         ret = check_devices(&dev_cache, &dev_extent_cache);
10320         if (ret && err)
10321                 ret = err;
10322
10323 out:
10324         task_stop(ctx.info);
10325         if (repair) {
10326                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10327                 extent_io_tree_cleanup(&excluded_extents);
10328                 fs_info->fsck_extent_cache = NULL;
10329                 fs_info->free_extent_hook = NULL;
10330                 fs_info->corrupt_blocks = NULL;
10331                 fs_info->excluded_extents = NULL;
10332         }
10333         free(bits);
10334         free_chunk_cache_tree(&chunk_cache);
10335         free_device_cache_tree(&dev_cache);
10336         free_block_group_tree(&block_group_cache);
10337         free_device_extent_tree(&dev_extent_cache);
10338         free_extent_cache_tree(&seen);
10339         free_extent_cache_tree(&pending);
10340         free_extent_cache_tree(&reada);
10341         free_extent_cache_tree(&nodes);
10342         free_root_item_list(&normal_trees);
10343         free_root_item_list(&dropping_trees);
10344         return ret;
10345 loop:
10346         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10347         free_extent_cache_tree(&seen);
10348         free_extent_cache_tree(&pending);
10349         free_extent_cache_tree(&reada);
10350         free_extent_cache_tree(&nodes);
10351         free_chunk_cache_tree(&chunk_cache);
10352         free_block_group_tree(&block_group_cache);
10353         free_device_cache_tree(&dev_cache);
10354         free_device_extent_tree(&dev_extent_cache);
10355         free_extent_record_cache(&extent_cache);
10356         free_root_item_list(&normal_trees);
10357         free_root_item_list(&dropping_trees);
10358         extent_io_tree_cleanup(&excluded_extents);
10359         goto again;
10360 }
10361
10362 /*
10363  * Check backrefs of a tree block given by @bytenr or @eb.
10364  *
10365  * @root:       the root containing the @bytenr or @eb
10366  * @eb:         tree block extent buffer, can be NULL
10367  * @bytenr:     bytenr of the tree block to search
10368  * @level:      tree level of the tree block
10369  * @owner:      owner of the tree block
10370  *
10371  * Return >0 for any error found and output error message
10372  * Return 0 for no error found
10373  */
10374 static int check_tree_block_ref(struct btrfs_root *root,
10375                                 struct extent_buffer *eb, u64 bytenr,
10376                                 int level, u64 owner)
10377 {
10378         struct btrfs_key key;
10379         struct btrfs_root *extent_root = root->fs_info->extent_root;
10380         struct btrfs_path path;
10381         struct btrfs_extent_item *ei;
10382         struct btrfs_extent_inline_ref *iref;
10383         struct extent_buffer *leaf;
10384         unsigned long end;
10385         unsigned long ptr;
10386         int slot;
10387         int skinny_level;
10388         int type;
10389         u32 nodesize = root->fs_info->nodesize;
10390         u32 item_size;
10391         u64 offset;
10392         int tree_reloc_root = 0;
10393         int found_ref = 0;
10394         int err = 0;
10395         int ret;
10396
10397         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10398             btrfs_header_bytenr(root->node) == bytenr)
10399                 tree_reloc_root = 1;
10400
10401         btrfs_init_path(&path);
10402         key.objectid = bytenr;
10403         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10404                 key.type = BTRFS_METADATA_ITEM_KEY;
10405         else
10406                 key.type = BTRFS_EXTENT_ITEM_KEY;
10407         key.offset = (u64)-1;
10408
10409         /* Search for the backref in extent tree */
10410         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10411         if (ret < 0) {
10412                 err |= BACKREF_MISSING;
10413                 goto out;
10414         }
10415         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10416         if (ret) {
10417                 err |= BACKREF_MISSING;
10418                 goto out;
10419         }
10420
10421         leaf = path.nodes[0];
10422         slot = path.slots[0];
10423         btrfs_item_key_to_cpu(leaf, &key, slot);
10424
10425         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10426
10427         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10428                 skinny_level = (int)key.offset;
10429                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10430         } else {
10431                 struct btrfs_tree_block_info *info;
10432
10433                 info = (struct btrfs_tree_block_info *)(ei + 1);
10434                 skinny_level = btrfs_tree_block_level(leaf, info);
10435                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10436         }
10437
10438         if (eb) {
10439                 u64 header_gen;
10440                 u64 extent_gen;
10441
10442                 if (!(btrfs_extent_flags(leaf, ei) &
10443                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10444                         error(
10445                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10446                                 key.objectid, nodesize,
10447                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10448                         err = BACKREF_MISMATCH;
10449                 }
10450                 header_gen = btrfs_header_generation(eb);
10451                 extent_gen = btrfs_extent_generation(leaf, ei);
10452                 if (header_gen != extent_gen) {
10453                         error(
10454         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10455                                 key.objectid, nodesize, header_gen,
10456                                 extent_gen);
10457                         err = BACKREF_MISMATCH;
10458                 }
10459                 if (level != skinny_level) {
10460                         error(
10461                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10462                                 key.objectid, nodesize, level, skinny_level);
10463                         err = BACKREF_MISMATCH;
10464                 }
10465                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10466                         error(
10467                         "extent[%llu %u] is referred by other roots than %llu",
10468                                 key.objectid, nodesize, root->objectid);
10469                         err = BACKREF_MISMATCH;
10470                 }
10471         }
10472
10473         /*
10474          * Iterate the extent/metadata item to find the exact backref
10475          */
10476         item_size = btrfs_item_size_nr(leaf, slot);
10477         ptr = (unsigned long)iref;
10478         end = (unsigned long)ei + item_size;
10479         while (ptr < end) {
10480                 iref = (struct btrfs_extent_inline_ref *)ptr;
10481                 type = btrfs_extent_inline_ref_type(leaf, iref);
10482                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10483
10484                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10485                         (offset == root->objectid || offset == owner)) {
10486                         found_ref = 1;
10487                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10488                         /*
10489                          * Backref of tree reloc root points to itself, no need
10490                          * to check backref any more.
10491                          */
10492                         if (tree_reloc_root)
10493                                 found_ref = 1;
10494                         else
10495                         /* Check if the backref points to valid referencer */
10496                                 found_ref = !check_tree_block_ref(root, NULL,
10497                                                 offset, level + 1, owner);
10498                 }
10499
10500                 if (found_ref)
10501                         break;
10502                 ptr += btrfs_extent_inline_ref_size(type);
10503         }
10504
10505         /*
10506          * Inlined extent item doesn't have what we need, check
10507          * TREE_BLOCK_REF_KEY
10508          */
10509         if (!found_ref) {
10510                 btrfs_release_path(&path);
10511                 key.objectid = bytenr;
10512                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10513                 key.offset = root->objectid;
10514
10515                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10516                 if (!ret)
10517                         found_ref = 1;
10518         }
10519         if (!found_ref)
10520                 err |= BACKREF_MISSING;
10521 out:
10522         btrfs_release_path(&path);
10523         if (eb && (err & BACKREF_MISSING))
10524                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10525                         bytenr, nodesize, owner, level);
10526         return err;
10527 }
10528
10529 /*
10530  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10531  *
10532  * Return >0 any error found and output error message
10533  * Return 0 for no error found
10534  */
10535 static int check_extent_data_item(struct btrfs_root *root,
10536                                   struct extent_buffer *eb, int slot)
10537 {
10538         struct btrfs_file_extent_item *fi;
10539         struct btrfs_path path;
10540         struct btrfs_root *extent_root = root->fs_info->extent_root;
10541         struct btrfs_key fi_key;
10542         struct btrfs_key dbref_key;
10543         struct extent_buffer *leaf;
10544         struct btrfs_extent_item *ei;
10545         struct btrfs_extent_inline_ref *iref;
10546         struct btrfs_extent_data_ref *dref;
10547         u64 owner;
10548         u64 disk_bytenr;
10549         u64 disk_num_bytes;
10550         u64 extent_num_bytes;
10551         u64 extent_flags;
10552         u32 item_size;
10553         unsigned long end;
10554         unsigned long ptr;
10555         int type;
10556         u64 ref_root;
10557         int found_dbackref = 0;
10558         int err = 0;
10559         int ret;
10560
10561         btrfs_item_key_to_cpu(eb, &fi_key, slot);
10562         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10563
10564         /* Nothing to check for hole and inline data extents */
10565         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10566             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10567                 return 0;
10568
10569         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10570         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10571         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10572
10573         /* Check unaligned disk_num_bytes and num_bytes */
10574         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10575                 error(
10576 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10577                         fi_key.objectid, fi_key.offset, disk_num_bytes,
10578                         root->fs_info->sectorsize);
10579                 err |= BYTES_UNALIGNED;
10580         } else {
10581                 data_bytes_allocated += disk_num_bytes;
10582         }
10583         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10584                 error(
10585 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10586                         fi_key.objectid, fi_key.offset, extent_num_bytes,
10587                         root->fs_info->sectorsize);
10588                 err |= BYTES_UNALIGNED;
10589         } else {
10590                 data_bytes_referenced += extent_num_bytes;
10591         }
10592         owner = btrfs_header_owner(eb);
10593
10594         /* Check the extent item of the file extent in extent tree */
10595         btrfs_init_path(&path);
10596         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10597         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10598         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10599
10600         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10601         if (ret)
10602                 goto out;
10603
10604         leaf = path.nodes[0];
10605         slot = path.slots[0];
10606         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10607
10608         extent_flags = btrfs_extent_flags(leaf, ei);
10609
10610         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10611                 error(
10612                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10613                     disk_bytenr, disk_num_bytes,
10614                     BTRFS_EXTENT_FLAG_DATA);
10615                 err |= BACKREF_MISMATCH;
10616         }
10617
10618         /* Check data backref inside that extent item */
10619         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10620         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10621         ptr = (unsigned long)iref;
10622         end = (unsigned long)ei + item_size;
10623         while (ptr < end) {
10624                 iref = (struct btrfs_extent_inline_ref *)ptr;
10625                 type = btrfs_extent_inline_ref_type(leaf, iref);
10626                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10627
10628                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10629                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
10630                         if (ref_root == owner || ref_root == root->objectid)
10631                                 found_dbackref = 1;
10632                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10633                         found_dbackref = !check_tree_block_ref(root, NULL,
10634                                 btrfs_extent_inline_ref_offset(leaf, iref),
10635                                 0, owner);
10636                 }
10637
10638                 if (found_dbackref)
10639                         break;
10640                 ptr += btrfs_extent_inline_ref_size(type);
10641         }
10642
10643         if (!found_dbackref) {
10644                 btrfs_release_path(&path);
10645
10646                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10647                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10648                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10649                 dbref_key.offset = hash_extent_data_ref(root->objectid,
10650                                 fi_key.objectid, fi_key.offset);
10651
10652                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10653                                         &dbref_key, &path, 0, 0);
10654                 if (!ret) {
10655                         found_dbackref = 1;
10656                         goto out;
10657                 }
10658
10659                 btrfs_release_path(&path);
10660
10661                 /*
10662                  * Neither inlined nor EXTENT_DATA_REF found, try
10663                  * SHARED_DATA_REF as last chance.
10664                  */
10665                 dbref_key.objectid = disk_bytenr;
10666                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10667                 dbref_key.offset = eb->start;
10668
10669                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10670                                         &dbref_key, &path, 0, 0);
10671                 if (!ret) {
10672                         found_dbackref = 1;
10673                         goto out;
10674                 }
10675         }
10676
10677 out:
10678         if (!found_dbackref)
10679                 err |= BACKREF_MISSING;
10680         btrfs_release_path(&path);
10681         if (err & BACKREF_MISSING) {
10682                 error("data extent[%llu %llu] backref lost",
10683                       disk_bytenr, disk_num_bytes);
10684         }
10685         return err;
10686 }
10687
10688 /*
10689  * Get real tree block level for the case like shared block
10690  * Return >= 0 as tree level
10691  * Return <0 for error
10692  */
10693 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10694 {
10695         struct extent_buffer *eb;
10696         struct btrfs_path path;
10697         struct btrfs_key key;
10698         struct btrfs_extent_item *ei;
10699         u64 flags;
10700         u64 transid;
10701         u8 backref_level;
10702         u8 header_level;
10703         int ret;
10704
10705         /* Search extent tree for extent generation and level */
10706         key.objectid = bytenr;
10707         key.type = BTRFS_METADATA_ITEM_KEY;
10708         key.offset = (u64)-1;
10709
10710         btrfs_init_path(&path);
10711         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10712         if (ret < 0)
10713                 goto release_out;
10714         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10715         if (ret < 0)
10716                 goto release_out;
10717         if (ret > 0) {
10718                 ret = -ENOENT;
10719                 goto release_out;
10720         }
10721
10722         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10723         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10724                             struct btrfs_extent_item);
10725         flags = btrfs_extent_flags(path.nodes[0], ei);
10726         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10727                 ret = -ENOENT;
10728                 goto release_out;
10729         }
10730
10731         /* Get transid for later read_tree_block() check */
10732         transid = btrfs_extent_generation(path.nodes[0], ei);
10733
10734         /* Get backref level as one source */
10735         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10736                 backref_level = key.offset;
10737         } else {
10738                 struct btrfs_tree_block_info *info;
10739
10740                 info = (struct btrfs_tree_block_info *)(ei + 1);
10741                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10742         }
10743         btrfs_release_path(&path);
10744
10745         /* Get level from tree block as an alternative source */
10746         eb = read_tree_block(fs_info, bytenr, transid);
10747         if (!extent_buffer_uptodate(eb)) {
10748                 free_extent_buffer(eb);
10749                 return -EIO;
10750         }
10751         header_level = btrfs_header_level(eb);
10752         free_extent_buffer(eb);
10753
10754         if (header_level != backref_level)
10755                 return -EIO;
10756         return header_level;
10757
10758 release_out:
10759         btrfs_release_path(&path);
10760         return ret;
10761 }
10762
10763 /*
10764  * Check if a tree block backref is valid (points to a valid tree block)
10765  * if level == -1, level will be resolved
10766  * Return >0 for any error found and print error message
10767  */
10768 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10769                                     u64 bytenr, int level)
10770 {
10771         struct btrfs_root *root;
10772         struct btrfs_key key;
10773         struct btrfs_path path;
10774         struct extent_buffer *eb;
10775         struct extent_buffer *node;
10776         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10777         int err = 0;
10778         int ret;
10779
10780         /* Query level for level == -1 special case */
10781         if (level == -1)
10782                 level = query_tree_block_level(fs_info, bytenr);
10783         if (level < 0) {
10784                 err |= REFERENCER_MISSING;
10785                 goto out;
10786         }
10787
10788         key.objectid = root_id;
10789         key.type = BTRFS_ROOT_ITEM_KEY;
10790         key.offset = (u64)-1;
10791
10792         root = btrfs_read_fs_root(fs_info, &key);
10793         if (IS_ERR(root)) {
10794                 err |= REFERENCER_MISSING;
10795                 goto out;
10796         }
10797
10798         /* Read out the tree block to get item/node key */
10799         eb = read_tree_block(fs_info, bytenr, 0);
10800         if (!extent_buffer_uptodate(eb)) {
10801                 err |= REFERENCER_MISSING;
10802                 free_extent_buffer(eb);
10803                 goto out;
10804         }
10805
10806         /* Empty tree, no need to check key */
10807         if (!btrfs_header_nritems(eb) && !level) {
10808                 free_extent_buffer(eb);
10809                 goto out;
10810         }
10811
10812         if (level)
10813                 btrfs_node_key_to_cpu(eb, &key, 0);
10814         else
10815                 btrfs_item_key_to_cpu(eb, &key, 0);
10816
10817         free_extent_buffer(eb);
10818
10819         btrfs_init_path(&path);
10820         path.lowest_level = level;
10821         /* Search with the first key, to ensure we can reach it */
10822         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10823         if (ret < 0) {
10824                 err |= REFERENCER_MISSING;
10825                 goto release_out;
10826         }
10827
10828         node = path.nodes[level];
10829         if (btrfs_header_bytenr(node) != bytenr) {
10830                 error(
10831         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10832                         bytenr, nodesize, bytenr,
10833                         btrfs_header_bytenr(node));
10834                 err |= REFERENCER_MISMATCH;
10835         }
10836         if (btrfs_header_level(node) != level) {
10837                 error(
10838         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10839                         bytenr, nodesize, level,
10840                         btrfs_header_level(node));
10841                 err |= REFERENCER_MISMATCH;
10842         }
10843
10844 release_out:
10845         btrfs_release_path(&path);
10846 out:
10847         if (err & REFERENCER_MISSING) {
10848                 if (level < 0)
10849                         error("extent [%llu %d] lost referencer (owner: %llu)",
10850                                 bytenr, nodesize, root_id);
10851                 else
10852                         error(
10853                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10854                                 bytenr, nodesize, root_id, level);
10855         }
10856
10857         return err;
10858 }
10859
10860 /*
10861  * Check if tree block @eb is tree reloc root.
10862  * Return 0 if it's not or any problem happens
10863  * Return 1 if it's a tree reloc root
10864  */
10865 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10866                                  struct extent_buffer *eb)
10867 {
10868         struct btrfs_root *tree_reloc_root;
10869         struct btrfs_key key;
10870         u64 bytenr = btrfs_header_bytenr(eb);
10871         u64 owner = btrfs_header_owner(eb);
10872         int ret = 0;
10873
10874         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10875         key.offset = owner;
10876         key.type = BTRFS_ROOT_ITEM_KEY;
10877
10878         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10879         if (IS_ERR(tree_reloc_root))
10880                 return 0;
10881
10882         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10883                 ret = 1;
10884         btrfs_free_fs_root(tree_reloc_root);
10885         return ret;
10886 }
10887
10888 /*
10889  * Check referencer for shared block backref
10890  * If level == -1, this function will resolve the level.
10891  */
10892 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10893                                      u64 parent, u64 bytenr, int level)
10894 {
10895         struct extent_buffer *eb;
10896         u32 nr;
10897         int found_parent = 0;
10898         int i;
10899
10900         eb = read_tree_block(fs_info, parent, 0);
10901         if (!extent_buffer_uptodate(eb))
10902                 goto out;
10903
10904         if (level == -1)
10905                 level = query_tree_block_level(fs_info, bytenr);
10906         if (level < 0)
10907                 goto out;
10908
10909         /* It's possible it's a tree reloc root */
10910         if (parent == bytenr) {
10911                 if (is_tree_reloc_root(fs_info, eb))
10912                         found_parent = 1;
10913                 goto out;
10914         }
10915
10916         if (level + 1 != btrfs_header_level(eb))
10917                 goto out;
10918
10919         nr = btrfs_header_nritems(eb);
10920         for (i = 0; i < nr; i++) {
10921                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10922                         found_parent = 1;
10923                         break;
10924                 }
10925         }
10926 out:
10927         free_extent_buffer(eb);
10928         if (!found_parent) {
10929                 error(
10930         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10931                         bytenr, fs_info->nodesize, parent, level);
10932                 return REFERENCER_MISSING;
10933         }
10934         return 0;
10935 }
10936
10937 /*
10938  * Check referencer for normal (inlined) data ref
10939  * If len == 0, it will be resolved by searching in extent tree
10940  */
10941 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10942                                      u64 root_id, u64 objectid, u64 offset,
10943                                      u64 bytenr, u64 len, u32 count)
10944 {
10945         struct btrfs_root *root;
10946         struct btrfs_root *extent_root = fs_info->extent_root;
10947         struct btrfs_key key;
10948         struct btrfs_path path;
10949         struct extent_buffer *leaf;
10950         struct btrfs_file_extent_item *fi;
10951         u32 found_count = 0;
10952         int slot;
10953         int ret = 0;
10954
10955         if (!len) {
10956                 key.objectid = bytenr;
10957                 key.type = BTRFS_EXTENT_ITEM_KEY;
10958                 key.offset = (u64)-1;
10959
10960                 btrfs_init_path(&path);
10961                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10962                 if (ret < 0)
10963                         goto out;
10964                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10965                 if (ret)
10966                         goto out;
10967                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10968                 if (key.objectid != bytenr ||
10969                     key.type != BTRFS_EXTENT_ITEM_KEY)
10970                         goto out;
10971                 len = key.offset;
10972                 btrfs_release_path(&path);
10973         }
10974         key.objectid = root_id;
10975         key.type = BTRFS_ROOT_ITEM_KEY;
10976         key.offset = (u64)-1;
10977         btrfs_init_path(&path);
10978
10979         root = btrfs_read_fs_root(fs_info, &key);
10980         if (IS_ERR(root))
10981                 goto out;
10982
10983         key.objectid = objectid;
10984         key.type = BTRFS_EXTENT_DATA_KEY;
10985         /*
10986          * It can be nasty as data backref offset is
10987          * file offset - file extent offset, which is smaller or
10988          * equal to original backref offset.  The only special case is
10989          * overflow.  So we need to special check and do further search.
10990          */
10991         key.offset = offset & (1ULL << 63) ? 0 : offset;
10992
10993         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10994         if (ret < 0)
10995                 goto out;
10996
10997         /*
10998          * Search afterwards to get correct one
10999          * NOTE: As we must do a comprehensive check on the data backref to
11000          * make sure the dref count also matches, we must iterate all file
11001          * extents for that inode.
11002          */
11003         while (1) {
11004                 leaf = path.nodes[0];
11005                 slot = path.slots[0];
11006
11007                 if (slot >= btrfs_header_nritems(leaf))
11008                         goto next;
11009                 btrfs_item_key_to_cpu(leaf, &key, slot);
11010                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11011                         break;
11012                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11013                 /*
11014                  * Except normal disk bytenr and disk num bytes, we still
11015                  * need to do extra check on dbackref offset as
11016                  * dbackref offset = file_offset - file_extent_offset
11017                  */
11018                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11019                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11020                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11021                     offset)
11022                         found_count++;
11023
11024 next:
11025                 ret = btrfs_next_item(root, &path);
11026                 if (ret)
11027                         break;
11028         }
11029 out:
11030         btrfs_release_path(&path);
11031         if (found_count != count) {
11032                 error(
11033 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11034                         bytenr, len, root_id, objectid, offset, count, found_count);
11035                 return REFERENCER_MISSING;
11036         }
11037         return 0;
11038 }
11039
11040 /*
11041  * Check if the referencer of a shared data backref exists
11042  */
11043 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11044                                      u64 parent, u64 bytenr)
11045 {
11046         struct extent_buffer *eb;
11047         struct btrfs_key key;
11048         struct btrfs_file_extent_item *fi;
11049         u32 nr;
11050         int found_parent = 0;
11051         int i;
11052
11053         eb = read_tree_block(fs_info, parent, 0);
11054         if (!extent_buffer_uptodate(eb))
11055                 goto out;
11056
11057         nr = btrfs_header_nritems(eb);
11058         for (i = 0; i < nr; i++) {
11059                 btrfs_item_key_to_cpu(eb, &key, i);
11060                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11061                         continue;
11062
11063                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11064                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11065                         continue;
11066
11067                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11068                         found_parent = 1;
11069                         break;
11070                 }
11071         }
11072
11073 out:
11074         free_extent_buffer(eb);
11075         if (!found_parent) {
11076                 error("shared extent %llu referencer lost (parent: %llu)",
11077                         bytenr, parent);
11078                 return REFERENCER_MISSING;
11079         }
11080         return 0;
11081 }
11082
11083 /*
11084  * This function will check a given extent item, including its backref and
11085  * itself (like crossing stripe boundary and type)
11086  *
11087  * Since we don't use extent_record anymore, introduce new error bit
11088  */
11089 static int check_extent_item(struct btrfs_fs_info *fs_info,
11090                              struct extent_buffer *eb, int slot)
11091 {
11092         struct btrfs_extent_item *ei;
11093         struct btrfs_extent_inline_ref *iref;
11094         struct btrfs_extent_data_ref *dref;
11095         unsigned long end;
11096         unsigned long ptr;
11097         int type;
11098         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11099         u32 item_size = btrfs_item_size_nr(eb, slot);
11100         u64 flags;
11101         u64 offset;
11102         int metadata = 0;
11103         int level;
11104         struct btrfs_key key;
11105         int ret;
11106         int err = 0;
11107
11108         btrfs_item_key_to_cpu(eb, &key, slot);
11109         if (key.type == BTRFS_EXTENT_ITEM_KEY)
11110                 bytes_used += key.offset;
11111         else
11112                 bytes_used += nodesize;
11113
11114         if (item_size < sizeof(*ei)) {
11115                 /*
11116                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11117                  * old thing when on disk format is still un-determined.
11118                  * No need to care about it anymore
11119                  */
11120                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11121                 return -ENOTTY;
11122         }
11123
11124         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11125         flags = btrfs_extent_flags(eb, ei);
11126
11127         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11128                 metadata = 1;
11129         if (metadata && check_crossing_stripes(global_info, key.objectid,
11130                                                eb->len)) {
11131                 error("bad metadata [%llu, %llu) crossing stripe boundary",
11132                       key.objectid, key.objectid + nodesize);
11133                 err |= CROSSING_STRIPE_BOUNDARY;
11134         }
11135
11136         ptr = (unsigned long)(ei + 1);
11137
11138         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11139                 /* Old EXTENT_ITEM metadata */
11140                 struct btrfs_tree_block_info *info;
11141
11142                 info = (struct btrfs_tree_block_info *)ptr;
11143                 level = btrfs_tree_block_level(eb, info);
11144                 ptr += sizeof(struct btrfs_tree_block_info);
11145         } else {
11146                 /* New METADATA_ITEM */
11147                 level = key.offset;
11148         }
11149         end = (unsigned long)ei + item_size;
11150
11151 next:
11152         /* Reached extent item end normally */
11153         if (ptr == end)
11154                 goto out;
11155
11156         /* Beyond extent item end, wrong item size */
11157         if (ptr > end) {
11158                 err |= ITEM_SIZE_MISMATCH;
11159                 error("extent item at bytenr %llu slot %d has wrong size",
11160                         eb->start, slot);
11161                 goto out;
11162         }
11163
11164         /* Now check every backref in this extent item */
11165         iref = (struct btrfs_extent_inline_ref *)ptr;
11166         type = btrfs_extent_inline_ref_type(eb, iref);
11167         offset = btrfs_extent_inline_ref_offset(eb, iref);
11168         switch (type) {
11169         case BTRFS_TREE_BLOCK_REF_KEY:
11170                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11171                                                level);
11172                 err |= ret;
11173                 break;
11174         case BTRFS_SHARED_BLOCK_REF_KEY:
11175                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11176                                                  level);
11177                 err |= ret;
11178                 break;
11179         case BTRFS_EXTENT_DATA_REF_KEY:
11180                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11181                 ret = check_extent_data_backref(fs_info,
11182                                 btrfs_extent_data_ref_root(eb, dref),
11183                                 btrfs_extent_data_ref_objectid(eb, dref),
11184                                 btrfs_extent_data_ref_offset(eb, dref),
11185                                 key.objectid, key.offset,
11186                                 btrfs_extent_data_ref_count(eb, dref));
11187                 err |= ret;
11188                 break;
11189         case BTRFS_SHARED_DATA_REF_KEY:
11190                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11191                 err |= ret;
11192                 break;
11193         default:
11194                 error("extent[%llu %d %llu] has unknown ref type: %d",
11195                         key.objectid, key.type, key.offset, type);
11196                 err |= UNKNOWN_TYPE;
11197                 goto out;
11198         }
11199
11200         ptr += btrfs_extent_inline_ref_size(type);
11201         goto next;
11202
11203 out:
11204         return err;
11205 }
11206
11207 /*
11208  * Check if a dev extent item is referred correctly by its chunk
11209  */
11210 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11211                                  struct extent_buffer *eb, int slot)
11212 {
11213         struct btrfs_root *chunk_root = fs_info->chunk_root;
11214         struct btrfs_dev_extent *ptr;
11215         struct btrfs_path path;
11216         struct btrfs_key chunk_key;
11217         struct btrfs_key devext_key;
11218         struct btrfs_chunk *chunk;
11219         struct extent_buffer *l;
11220         int num_stripes;
11221         u64 length;
11222         int i;
11223         int found_chunk = 0;
11224         int ret;
11225
11226         btrfs_item_key_to_cpu(eb, &devext_key, slot);
11227         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11228         length = btrfs_dev_extent_length(eb, ptr);
11229
11230         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11231         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11232         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11233
11234         btrfs_init_path(&path);
11235         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11236         if (ret)
11237                 goto out;
11238
11239         l = path.nodes[0];
11240         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11241         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11242                                       chunk_key.offset);
11243         if (ret < 0)
11244                 goto out;
11245
11246         if (btrfs_stripe_length(fs_info, l, chunk) != length)
11247                 goto out;
11248
11249         num_stripes = btrfs_chunk_num_stripes(l, chunk);
11250         for (i = 0; i < num_stripes; i++) {
11251                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11252                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11253
11254                 if (devid == devext_key.objectid &&
11255                     offset == devext_key.offset) {
11256                         found_chunk = 1;
11257                         break;
11258                 }
11259         }
11260 out:
11261         btrfs_release_path(&path);
11262         if (!found_chunk) {
11263                 error(
11264                 "device extent[%llu, %llu, %llu] did not find the related chunk",
11265                         devext_key.objectid, devext_key.offset, length);
11266                 return REFERENCER_MISSING;
11267         }
11268         return 0;
11269 }
11270
11271 /*
11272  * Check if the used space is correct with the dev item
11273  */
11274 static int check_dev_item(struct btrfs_fs_info *fs_info,
11275                           struct extent_buffer *eb, int slot)
11276 {
11277         struct btrfs_root *dev_root = fs_info->dev_root;
11278         struct btrfs_dev_item *dev_item;
11279         struct btrfs_path path;
11280         struct btrfs_key key;
11281         struct btrfs_dev_extent *ptr;
11282         u64 dev_id;
11283         u64 used;
11284         u64 total = 0;
11285         int ret;
11286
11287         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11288         dev_id = btrfs_device_id(eb, dev_item);
11289         used = btrfs_device_bytes_used(eb, dev_item);
11290
11291         key.objectid = dev_id;
11292         key.type = BTRFS_DEV_EXTENT_KEY;
11293         key.offset = 0;
11294
11295         btrfs_init_path(&path);
11296         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11297         if (ret < 0) {
11298                 btrfs_item_key_to_cpu(eb, &key, slot);
11299                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11300                         key.objectid, key.type, key.offset);
11301                 btrfs_release_path(&path);
11302                 return REFERENCER_MISSING;
11303         }
11304
11305         /* Iterate dev_extents to calculate the used space of a device */
11306         while (1) {
11307                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11308                         goto next;
11309
11310                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11311                 if (key.objectid > dev_id)
11312                         break;
11313                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11314                         goto next;
11315
11316                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11317                                      struct btrfs_dev_extent);
11318                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11319 next:
11320                 ret = btrfs_next_item(dev_root, &path);
11321                 if (ret)
11322                         break;
11323         }
11324         btrfs_release_path(&path);
11325
11326         if (used != total) {
11327                 btrfs_item_key_to_cpu(eb, &key, slot);
11328                 error(
11329 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11330                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11331                         BTRFS_DEV_EXTENT_KEY, dev_id);
11332                 return ACCOUNTING_MISMATCH;
11333         }
11334         return 0;
11335 }
11336
11337 /*
11338  * Check a block group item with its referener (chunk) and its used space
11339  * with extent/metadata item
11340  */
11341 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11342                                   struct extent_buffer *eb, int slot)
11343 {
11344         struct btrfs_root *extent_root = fs_info->extent_root;
11345         struct btrfs_root *chunk_root = fs_info->chunk_root;
11346         struct btrfs_block_group_item *bi;
11347         struct btrfs_block_group_item bg_item;
11348         struct btrfs_path path;
11349         struct btrfs_key bg_key;
11350         struct btrfs_key chunk_key;
11351         struct btrfs_key extent_key;
11352         struct btrfs_chunk *chunk;
11353         struct extent_buffer *leaf;
11354         struct btrfs_extent_item *ei;
11355         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11356         u64 flags;
11357         u64 bg_flags;
11358         u64 used;
11359         u64 total = 0;
11360         int ret;
11361         int err = 0;
11362
11363         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11364         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11365         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11366         used = btrfs_block_group_used(&bg_item);
11367         bg_flags = btrfs_block_group_flags(&bg_item);
11368
11369         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11370         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11371         chunk_key.offset = bg_key.objectid;
11372
11373         btrfs_init_path(&path);
11374         /* Search for the referencer chunk */
11375         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11376         if (ret) {
11377                 error(
11378                 "block group[%llu %llu] did not find the related chunk item",
11379                         bg_key.objectid, bg_key.offset);
11380                 err |= REFERENCER_MISSING;
11381         } else {
11382                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11383                                         struct btrfs_chunk);
11384                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11385                                                 bg_key.offset) {
11386                         error(
11387         "block group[%llu %llu] related chunk item length does not match",
11388                                 bg_key.objectid, bg_key.offset);
11389                         err |= REFERENCER_MISMATCH;
11390                 }
11391         }
11392         btrfs_release_path(&path);
11393
11394         /* Search from the block group bytenr */
11395         extent_key.objectid = bg_key.objectid;
11396         extent_key.type = 0;
11397         extent_key.offset = 0;
11398
11399         btrfs_init_path(&path);
11400         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11401         if (ret < 0)
11402                 goto out;
11403
11404         /* Iterate extent tree to account used space */
11405         while (1) {
11406                 leaf = path.nodes[0];
11407
11408                 /* Search slot can point to the last item beyond leaf nritems */
11409                 if (path.slots[0] >= btrfs_header_nritems(leaf))
11410                         goto next;
11411
11412                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11413                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11414                         break;
11415
11416                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11417                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11418                         goto next;
11419                 if (extent_key.objectid < bg_key.objectid)
11420                         goto next;
11421
11422                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11423                         total += nodesize;
11424                 else
11425                         total += extent_key.offset;
11426
11427                 ei = btrfs_item_ptr(leaf, path.slots[0],
11428                                     struct btrfs_extent_item);
11429                 flags = btrfs_extent_flags(leaf, ei);
11430                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11431                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11432                                 error(
11433                         "bad extent[%llu, %llu) type mismatch with chunk",
11434                                         extent_key.objectid,
11435                                         extent_key.objectid + extent_key.offset);
11436                                 err |= CHUNK_TYPE_MISMATCH;
11437                         }
11438                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11439                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11440                                     BTRFS_BLOCK_GROUP_METADATA))) {
11441                                 error(
11442                         "bad extent[%llu, %llu) type mismatch with chunk",
11443                                         extent_key.objectid,
11444                                         extent_key.objectid + nodesize);
11445                                 err |= CHUNK_TYPE_MISMATCH;
11446                         }
11447                 }
11448 next:
11449                 ret = btrfs_next_item(extent_root, &path);
11450                 if (ret)
11451                         break;
11452         }
11453
11454 out:
11455         btrfs_release_path(&path);
11456
11457         if (total != used) {
11458                 error(
11459                 "block group[%llu %llu] used %llu but extent items used %llu",
11460                         bg_key.objectid, bg_key.offset, used, total);
11461                 err |= ACCOUNTING_MISMATCH;
11462         }
11463         return err;
11464 }
11465
11466 /*
11467  * Check a chunk item.
11468  * Including checking all referred dev_extents and block group
11469  */
11470 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11471                             struct extent_buffer *eb, int slot)
11472 {
11473         struct btrfs_root *extent_root = fs_info->extent_root;
11474         struct btrfs_root *dev_root = fs_info->dev_root;
11475         struct btrfs_path path;
11476         struct btrfs_key chunk_key;
11477         struct btrfs_key bg_key;
11478         struct btrfs_key devext_key;
11479         struct btrfs_chunk *chunk;
11480         struct extent_buffer *leaf;
11481         struct btrfs_block_group_item *bi;
11482         struct btrfs_block_group_item bg_item;
11483         struct btrfs_dev_extent *ptr;
11484         u64 length;
11485         u64 chunk_end;
11486         u64 stripe_len;
11487         u64 type;
11488         int num_stripes;
11489         u64 offset;
11490         u64 objectid;
11491         int i;
11492         int ret;
11493         int err = 0;
11494
11495         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11496         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11497         length = btrfs_chunk_length(eb, chunk);
11498         chunk_end = chunk_key.offset + length;
11499         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11500                                       chunk_key.offset);
11501         if (ret < 0) {
11502                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11503                         chunk_end);
11504                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11505                 goto out;
11506         }
11507         type = btrfs_chunk_type(eb, chunk);
11508
11509         bg_key.objectid = chunk_key.offset;
11510         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11511         bg_key.offset = length;
11512
11513         btrfs_init_path(&path);
11514         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11515         if (ret) {
11516                 error(
11517                 "chunk[%llu %llu) did not find the related block group item",
11518                         chunk_key.offset, chunk_end);
11519                 err |= REFERENCER_MISSING;
11520         } else{
11521                 leaf = path.nodes[0];
11522                 bi = btrfs_item_ptr(leaf, path.slots[0],
11523                                     struct btrfs_block_group_item);
11524                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11525                                    sizeof(bg_item));
11526                 if (btrfs_block_group_flags(&bg_item) != type) {
11527                         error(
11528 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11529                                 chunk_key.offset, chunk_end, type,
11530                                 btrfs_block_group_flags(&bg_item));
11531                         err |= REFERENCER_MISSING;
11532                 }
11533         }
11534
11535         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11536         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11537         for (i = 0; i < num_stripes; i++) {
11538                 btrfs_release_path(&path);
11539                 btrfs_init_path(&path);
11540                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11541                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11542                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11543
11544                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11545                                         0, 0);
11546                 if (ret)
11547                         goto not_match_dev;
11548
11549                 leaf = path.nodes[0];
11550                 ptr = btrfs_item_ptr(leaf, path.slots[0],
11551                                      struct btrfs_dev_extent);
11552                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11553                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11554                 if (objectid != chunk_key.objectid ||
11555                     offset != chunk_key.offset ||
11556                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11557                         goto not_match_dev;
11558                 continue;
11559 not_match_dev:
11560                 err |= BACKREF_MISSING;
11561                 error(
11562                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11563                         chunk_key.objectid, chunk_end, i);
11564                 continue;
11565         }
11566         btrfs_release_path(&path);
11567 out:
11568         return err;
11569 }
11570
11571 /*
11572  * Main entry function to check known items and update related accounting info
11573  */
11574 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11575 {
11576         struct btrfs_fs_info *fs_info = root->fs_info;
11577         struct btrfs_key key;
11578         int slot = 0;
11579         int type;
11580         struct btrfs_extent_data_ref *dref;
11581         int ret;
11582         int err = 0;
11583
11584 next:
11585         btrfs_item_key_to_cpu(eb, &key, slot);
11586         type = key.type;
11587
11588         switch (type) {
11589         case BTRFS_EXTENT_DATA_KEY:
11590                 ret = check_extent_data_item(root, eb, slot);
11591                 err |= ret;
11592                 break;
11593         case BTRFS_BLOCK_GROUP_ITEM_KEY:
11594                 ret = check_block_group_item(fs_info, eb, slot);
11595                 err |= ret;
11596                 break;
11597         case BTRFS_DEV_ITEM_KEY:
11598                 ret = check_dev_item(fs_info, eb, slot);
11599                 err |= ret;
11600                 break;
11601         case BTRFS_CHUNK_ITEM_KEY:
11602                 ret = check_chunk_item(fs_info, eb, slot);
11603                 err |= ret;
11604                 break;
11605         case BTRFS_DEV_EXTENT_KEY:
11606                 ret = check_dev_extent_item(fs_info, eb, slot);
11607                 err |= ret;
11608                 break;
11609         case BTRFS_EXTENT_ITEM_KEY:
11610         case BTRFS_METADATA_ITEM_KEY:
11611                 ret = check_extent_item(fs_info, eb, slot);
11612                 err |= ret;
11613                 break;
11614         case BTRFS_EXTENT_CSUM_KEY:
11615                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11616                 break;
11617         case BTRFS_TREE_BLOCK_REF_KEY:
11618                 ret = check_tree_block_backref(fs_info, key.offset,
11619                                                key.objectid, -1);
11620                 err |= ret;
11621                 break;
11622         case BTRFS_EXTENT_DATA_REF_KEY:
11623                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11624                 ret = check_extent_data_backref(fs_info,
11625                                 btrfs_extent_data_ref_root(eb, dref),
11626                                 btrfs_extent_data_ref_objectid(eb, dref),
11627                                 btrfs_extent_data_ref_offset(eb, dref),
11628                                 key.objectid, 0,
11629                                 btrfs_extent_data_ref_count(eb, dref));
11630                 err |= ret;
11631                 break;
11632         case BTRFS_SHARED_BLOCK_REF_KEY:
11633                 ret = check_shared_block_backref(fs_info, key.offset,
11634                                                  key.objectid, -1);
11635                 err |= ret;
11636                 break;
11637         case BTRFS_SHARED_DATA_REF_KEY:
11638                 ret = check_shared_data_backref(fs_info, key.offset,
11639                                                 key.objectid);
11640                 err |= ret;
11641                 break;
11642         default:
11643                 break;
11644         }
11645
11646         if (++slot < btrfs_header_nritems(eb))
11647                 goto next;
11648
11649         return err;
11650 }
11651
11652 /*
11653  * Helper function for later fs/subvol tree check.  To determine if a tree
11654  * block should be checked.
11655  * This function will ensure only the direct referencer with lowest rootid to
11656  * check a fs/subvolume tree block.
11657  *
11658  * Backref check at extent tree would detect errors like missing subvolume
11659  * tree, so we can do aggressive check to reduce duplicated checks.
11660  */
11661 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11662 {
11663         struct btrfs_root *extent_root = root->fs_info->extent_root;
11664         struct btrfs_key key;
11665         struct btrfs_path path;
11666         struct extent_buffer *leaf;
11667         int slot;
11668         struct btrfs_extent_item *ei;
11669         unsigned long ptr;
11670         unsigned long end;
11671         int type;
11672         u32 item_size;
11673         u64 offset;
11674         struct btrfs_extent_inline_ref *iref;
11675         int ret;
11676
11677         btrfs_init_path(&path);
11678         key.objectid = btrfs_header_bytenr(eb);
11679         key.type = BTRFS_METADATA_ITEM_KEY;
11680         key.offset = (u64)-1;
11681
11682         /*
11683          * Any failure in backref resolving means we can't determine
11684          * whom the tree block belongs to.
11685          * So in that case, we need to check that tree block
11686          */
11687         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11688         if (ret < 0)
11689                 goto need_check;
11690
11691         ret = btrfs_previous_extent_item(extent_root, &path,
11692                                          btrfs_header_bytenr(eb));
11693         if (ret)
11694                 goto need_check;
11695
11696         leaf = path.nodes[0];
11697         slot = path.slots[0];
11698         btrfs_item_key_to_cpu(leaf, &key, slot);
11699         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11700
11701         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11702                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11703         } else {
11704                 struct btrfs_tree_block_info *info;
11705
11706                 info = (struct btrfs_tree_block_info *)(ei + 1);
11707                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11708         }
11709
11710         item_size = btrfs_item_size_nr(leaf, slot);
11711         ptr = (unsigned long)iref;
11712         end = (unsigned long)ei + item_size;
11713         while (ptr < end) {
11714                 iref = (struct btrfs_extent_inline_ref *)ptr;
11715                 type = btrfs_extent_inline_ref_type(leaf, iref);
11716                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11717
11718                 /*
11719                  * We only check the tree block if current root is
11720                  * the lowest referencer of it.
11721                  */
11722                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11723                     offset < root->objectid) {
11724                         btrfs_release_path(&path);
11725                         return 0;
11726                 }
11727
11728                 ptr += btrfs_extent_inline_ref_size(type);
11729         }
11730         /*
11731          * Normally we should also check keyed tree block ref, but that may be
11732          * very time consuming.  Inlined ref should already make us skip a lot
11733          * of refs now.  So skip search keyed tree block ref.
11734          */
11735
11736 need_check:
11737         btrfs_release_path(&path);
11738         return 1;
11739 }
11740
11741 /*
11742  * Traversal function for tree block. We will do:
11743  * 1) Skip shared fs/subvolume tree blocks
11744  * 2) Update related bytes accounting
11745  * 3) Pre-order traversal
11746  */
11747 static int traverse_tree_block(struct btrfs_root *root,
11748                                 struct extent_buffer *node)
11749 {
11750         struct extent_buffer *eb;
11751         struct btrfs_key key;
11752         struct btrfs_key drop_key;
11753         int level;
11754         u64 nr;
11755         int i;
11756         int err = 0;
11757         int ret;
11758
11759         /*
11760          * Skip shared fs/subvolume tree block, in that case they will
11761          * be checked by referencer with lowest rootid
11762          */
11763         if (is_fstree(root->objectid) && !should_check(root, node))
11764                 return 0;
11765
11766         /* Update bytes accounting */
11767         total_btree_bytes += node->len;
11768         if (fs_root_objectid(btrfs_header_owner(node)))
11769                 total_fs_tree_bytes += node->len;
11770         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11771                 total_extent_tree_bytes += node->len;
11772
11773         /* pre-order tranversal, check itself first */
11774         level = btrfs_header_level(node);
11775         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11776                                    btrfs_header_level(node),
11777                                    btrfs_header_owner(node));
11778         err |= ret;
11779         if (err)
11780                 error(
11781         "check %s failed root %llu bytenr %llu level %d, force continue check",
11782                         level ? "node":"leaf", root->objectid,
11783                         btrfs_header_bytenr(node), btrfs_header_level(node));
11784
11785         if (!level) {
11786                 btree_space_waste += btrfs_leaf_free_space(root, node);
11787                 ret = check_leaf_items(root, node);
11788                 err |= ret;
11789                 return err;
11790         }
11791
11792         nr = btrfs_header_nritems(node);
11793         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11794         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11795                 sizeof(struct btrfs_key_ptr);
11796
11797         /* Then check all its children */
11798         for (i = 0; i < nr; i++) {
11799                 u64 blocknr = btrfs_node_blockptr(node, i);
11800
11801                 btrfs_node_key_to_cpu(node, &key, i);
11802                 if (level == root->root_item.drop_level &&
11803                     is_dropped_key(&key, &drop_key))
11804                         continue;
11805
11806                 /*
11807                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11808                  * to call the function itself.
11809                  */
11810                 eb = read_tree_block(root->fs_info, blocknr, 0);
11811                 if (extent_buffer_uptodate(eb)) {
11812                         ret = traverse_tree_block(root, eb);
11813                         err |= ret;
11814                 }
11815                 free_extent_buffer(eb);
11816         }
11817
11818         return err;
11819 }
11820
11821 /*
11822  * Low memory usage version check_chunks_and_extents.
11823  */
11824 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
11825 {
11826         struct btrfs_path path;
11827         struct btrfs_key key;
11828         struct btrfs_root *root1;
11829         struct btrfs_root *root;
11830         struct btrfs_root *cur_root;
11831         int err = 0;
11832         int ret;
11833
11834         root = fs_info->fs_root;
11835
11836         root1 = root->fs_info->chunk_root;
11837         ret = traverse_tree_block(root1, root1->node);
11838         err |= ret;
11839
11840         root1 = root->fs_info->tree_root;
11841         ret = traverse_tree_block(root1, root1->node);
11842         err |= ret;
11843
11844         btrfs_init_path(&path);
11845         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11846         key.offset = 0;
11847         key.type = BTRFS_ROOT_ITEM_KEY;
11848
11849         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11850         if (ret) {
11851                 error("cannot find extent treet in tree_root");
11852                 goto out;
11853         }
11854
11855         while (1) {
11856                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11857                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11858                         goto next;
11859                 key.offset = (u64)-1;
11860
11861                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11862                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11863                                         &key);
11864                 else
11865                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
11866                 if (IS_ERR(cur_root) || !cur_root) {
11867                         error("failed to read tree: %lld", key.objectid);
11868                         goto next;
11869                 }
11870
11871                 ret = traverse_tree_block(cur_root, cur_root->node);
11872                 err |= ret;
11873
11874                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11875                         btrfs_free_fs_root(cur_root);
11876 next:
11877                 ret = btrfs_next_item(root1, &path);
11878                 if (ret)
11879                         goto out;
11880         }
11881
11882 out:
11883         btrfs_release_path(&path);
11884         return err;
11885 }
11886
11887 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11888 {
11889         int ret;
11890
11891         if (!ctx.progress_enabled)
11892                 fprintf(stderr, "checking extents\n");
11893         if (check_mode == CHECK_MODE_LOWMEM)
11894                 ret = check_chunks_and_extents_v2(fs_info);
11895         else
11896                 ret = check_chunks_and_extents(fs_info);
11897
11898         return ret;
11899 }
11900
11901 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11902                            struct btrfs_root *root, int overwrite)
11903 {
11904         struct extent_buffer *c;
11905         struct extent_buffer *old = root->node;
11906         int level;
11907         int ret;
11908         struct btrfs_disk_key disk_key = {0,0,0};
11909
11910         level = 0;
11911
11912         if (overwrite) {
11913                 c = old;
11914                 extent_buffer_get(c);
11915                 goto init;
11916         }
11917         c = btrfs_alloc_free_block(trans, root,
11918                                    root->fs_info->nodesize,
11919                                    root->root_key.objectid,
11920                                    &disk_key, level, 0, 0);
11921         if (IS_ERR(c)) {
11922                 c = old;
11923                 extent_buffer_get(c);
11924                 overwrite = 1;
11925         }
11926 init:
11927         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11928         btrfs_set_header_level(c, level);
11929         btrfs_set_header_bytenr(c, c->start);
11930         btrfs_set_header_generation(c, trans->transid);
11931         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11932         btrfs_set_header_owner(c, root->root_key.objectid);
11933
11934         write_extent_buffer(c, root->fs_info->fsid,
11935                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11936
11937         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11938                             btrfs_header_chunk_tree_uuid(c),
11939                             BTRFS_UUID_SIZE);
11940
11941         btrfs_mark_buffer_dirty(c);
11942         /*
11943          * this case can happen in the following case:
11944          *
11945          * 1.overwrite previous root.
11946          *
11947          * 2.reinit reloc data root, this is because we skip pin
11948          * down reloc data tree before which means we can allocate
11949          * same block bytenr here.
11950          */
11951         if (old->start == c->start) {
11952                 btrfs_set_root_generation(&root->root_item,
11953                                           trans->transid);
11954                 root->root_item.level = btrfs_header_level(root->node);
11955                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11956                                         &root->root_key, &root->root_item);
11957                 if (ret) {
11958                         free_extent_buffer(c);
11959                         return ret;
11960                 }
11961         }
11962         free_extent_buffer(old);
11963         root->node = c;
11964         add_root_to_dirty_list(root);
11965         return 0;
11966 }
11967
11968 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11969                                 struct extent_buffer *eb, int tree_root)
11970 {
11971         struct extent_buffer *tmp;
11972         struct btrfs_root_item *ri;
11973         struct btrfs_key key;
11974         u64 bytenr;
11975         int level = btrfs_header_level(eb);
11976         int nritems;
11977         int ret;
11978         int i;
11979
11980         /*
11981          * If we have pinned this block before, don't pin it again.
11982          * This can not only avoid forever loop with broken filesystem
11983          * but also give us some speedups.
11984          */
11985         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11986                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11987                 return 0;
11988
11989         btrfs_pin_extent(fs_info, eb->start, eb->len);
11990
11991         nritems = btrfs_header_nritems(eb);
11992         for (i = 0; i < nritems; i++) {
11993                 if (level == 0) {
11994                         btrfs_item_key_to_cpu(eb, &key, i);
11995                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11996                                 continue;
11997                         /* Skip the extent root and reloc roots */
11998                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11999                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12000                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12001                                 continue;
12002                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12003                         bytenr = btrfs_disk_root_bytenr(eb, ri);
12004
12005                         /*
12006                          * If at any point we start needing the real root we
12007                          * will have to build a stump root for the root we are
12008                          * in, but for now this doesn't actually use the root so
12009                          * just pass in extent_root.
12010                          */
12011                         tmp = read_tree_block(fs_info, bytenr, 0);
12012                         if (!extent_buffer_uptodate(tmp)) {
12013                                 fprintf(stderr, "Error reading root block\n");
12014                                 return -EIO;
12015                         }
12016                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
12017                         free_extent_buffer(tmp);
12018                         if (ret)
12019                                 return ret;
12020                 } else {
12021                         bytenr = btrfs_node_blockptr(eb, i);
12022
12023                         /* If we aren't the tree root don't read the block */
12024                         if (level == 1 && !tree_root) {
12025                                 btrfs_pin_extent(fs_info, bytenr,
12026                                                 fs_info->nodesize);
12027                                 continue;
12028                         }
12029
12030                         tmp = read_tree_block(fs_info, bytenr, 0);
12031                         if (!extent_buffer_uptodate(tmp)) {
12032                                 fprintf(stderr, "Error reading tree block\n");
12033                                 return -EIO;
12034                         }
12035                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12036                         free_extent_buffer(tmp);
12037                         if (ret)
12038                                 return ret;
12039                 }
12040         }
12041
12042         return 0;
12043 }
12044
12045 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12046 {
12047         int ret;
12048
12049         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12050         if (ret)
12051                 return ret;
12052
12053         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12054 }
12055
12056 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12057 {
12058         struct btrfs_block_group_cache *cache;
12059         struct btrfs_path path;
12060         struct extent_buffer *leaf;
12061         struct btrfs_chunk *chunk;
12062         struct btrfs_key key;
12063         int ret;
12064         u64 start;
12065
12066         btrfs_init_path(&path);
12067         key.objectid = 0;
12068         key.type = BTRFS_CHUNK_ITEM_KEY;
12069         key.offset = 0;
12070         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12071         if (ret < 0) {
12072                 btrfs_release_path(&path);
12073                 return ret;
12074         }
12075
12076         /*
12077          * We do this in case the block groups were screwed up and had alloc
12078          * bits that aren't actually set on the chunks.  This happens with
12079          * restored images every time and could happen in real life I guess.
12080          */
12081         fs_info->avail_data_alloc_bits = 0;
12082         fs_info->avail_metadata_alloc_bits = 0;
12083         fs_info->avail_system_alloc_bits = 0;
12084
12085         /* First we need to create the in-memory block groups */
12086         while (1) {
12087                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12088                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12089                         if (ret < 0) {
12090                                 btrfs_release_path(&path);
12091                                 return ret;
12092                         }
12093                         if (ret) {
12094                                 ret = 0;
12095                                 break;
12096                         }
12097                 }
12098                 leaf = path.nodes[0];
12099                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12100                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12101                         path.slots[0]++;
12102                         continue;
12103                 }
12104
12105                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12106                 btrfs_add_block_group(fs_info, 0,
12107                                       btrfs_chunk_type(leaf, chunk),
12108                                       key.objectid, key.offset,
12109                                       btrfs_chunk_length(leaf, chunk));
12110                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12111                                  key.offset + btrfs_chunk_length(leaf, chunk));
12112                 path.slots[0]++;
12113         }
12114         start = 0;
12115         while (1) {
12116                 cache = btrfs_lookup_first_block_group(fs_info, start);
12117                 if (!cache)
12118                         break;
12119                 cache->cached = 1;
12120                 start = cache->key.objectid + cache->key.offset;
12121         }
12122
12123         btrfs_release_path(&path);
12124         return 0;
12125 }
12126
12127 static int reset_balance(struct btrfs_trans_handle *trans,
12128                          struct btrfs_fs_info *fs_info)
12129 {
12130         struct btrfs_root *root = fs_info->tree_root;
12131         struct btrfs_path path;
12132         struct extent_buffer *leaf;
12133         struct btrfs_key key;
12134         int del_slot, del_nr = 0;
12135         int ret;
12136         int found = 0;
12137
12138         btrfs_init_path(&path);
12139         key.objectid = BTRFS_BALANCE_OBJECTID;
12140         key.type = BTRFS_BALANCE_ITEM_KEY;
12141         key.offset = 0;
12142         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12143         if (ret) {
12144                 if (ret > 0)
12145                         ret = 0;
12146                 if (!ret)
12147                         goto reinit_data_reloc;
12148                 else
12149                         goto out;
12150         }
12151
12152         ret = btrfs_del_item(trans, root, &path);
12153         if (ret)
12154                 goto out;
12155         btrfs_release_path(&path);
12156
12157         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12158         key.type = BTRFS_ROOT_ITEM_KEY;
12159         key.offset = 0;
12160         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12161         if (ret < 0)
12162                 goto out;
12163         while (1) {
12164                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12165                         if (!found)
12166                                 break;
12167
12168                         if (del_nr) {
12169                                 ret = btrfs_del_items(trans, root, &path,
12170                                                       del_slot, del_nr);
12171                                 del_nr = 0;
12172                                 if (ret)
12173                                         goto out;
12174                         }
12175                         key.offset++;
12176                         btrfs_release_path(&path);
12177
12178                         found = 0;
12179                         ret = btrfs_search_slot(trans, root, &key, &path,
12180                                                 -1, 1);
12181                         if (ret < 0)
12182                                 goto out;
12183                         continue;
12184                 }
12185                 found = 1;
12186                 leaf = path.nodes[0];
12187                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12188                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12189                         break;
12190                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12191                         path.slots[0]++;
12192                         continue;
12193                 }
12194                 if (!del_nr) {
12195                         del_slot = path.slots[0];
12196                         del_nr = 1;
12197                 } else {
12198                         del_nr++;
12199                 }
12200                 path.slots[0]++;
12201         }
12202
12203         if (del_nr) {
12204                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12205                 if (ret)
12206                         goto out;
12207         }
12208         btrfs_release_path(&path);
12209
12210 reinit_data_reloc:
12211         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12212         key.type = BTRFS_ROOT_ITEM_KEY;
12213         key.offset = (u64)-1;
12214         root = btrfs_read_fs_root(fs_info, &key);
12215         if (IS_ERR(root)) {
12216                 fprintf(stderr, "Error reading data reloc tree\n");
12217                 ret = PTR_ERR(root);
12218                 goto out;
12219         }
12220         record_root_in_trans(trans, root);
12221         ret = btrfs_fsck_reinit_root(trans, root, 0);
12222         if (ret)
12223                 goto out;
12224         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12225 out:
12226         btrfs_release_path(&path);
12227         return ret;
12228 }
12229
12230 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12231                               struct btrfs_fs_info *fs_info)
12232 {
12233         u64 start = 0;
12234         int ret;
12235
12236         /*
12237          * The only reason we don't do this is because right now we're just
12238          * walking the trees we find and pinning down their bytes, we don't look
12239          * at any of the leaves.  In order to do mixed groups we'd have to check
12240          * the leaves of any fs roots and pin down the bytes for any file
12241          * extents we find.  Not hard but why do it if we don't have to?
12242          */
12243         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12244                 fprintf(stderr, "We don't support re-initing the extent tree "
12245                         "for mixed block groups yet, please notify a btrfs "
12246                         "developer you want to do this so they can add this "
12247                         "functionality.\n");
12248                 return -EINVAL;
12249         }
12250
12251         /*
12252          * first we need to walk all of the trees except the extent tree and pin
12253          * down the bytes that are in use so we don't overwrite any existing
12254          * metadata.
12255          */
12256         ret = pin_metadata_blocks(fs_info);
12257         if (ret) {
12258                 fprintf(stderr, "error pinning down used bytes\n");
12259                 return ret;
12260         }
12261
12262         /*
12263          * Need to drop all the block groups since we're going to recreate all
12264          * of them again.
12265          */
12266         btrfs_free_block_groups(fs_info);
12267         ret = reset_block_groups(fs_info);
12268         if (ret) {
12269                 fprintf(stderr, "error resetting the block groups\n");
12270                 return ret;
12271         }
12272
12273         /* Ok we can allocate now, reinit the extent root */
12274         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12275         if (ret) {
12276                 fprintf(stderr, "extent root initialization failed\n");
12277                 /*
12278                  * When the transaction code is updated we should end the
12279                  * transaction, but for now progs only knows about commit so
12280                  * just return an error.
12281                  */
12282                 return ret;
12283         }
12284
12285         /*
12286          * Now we have all the in-memory block groups setup so we can make
12287          * allocations properly, and the metadata we care about is safe since we
12288          * pinned all of it above.
12289          */
12290         while (1) {
12291                 struct btrfs_block_group_cache *cache;
12292
12293                 cache = btrfs_lookup_first_block_group(fs_info, start);
12294                 if (!cache)
12295                         break;
12296                 start = cache->key.objectid + cache->key.offset;
12297                 ret = btrfs_insert_item(trans, fs_info->extent_root,
12298                                         &cache->key, &cache->item,
12299                                         sizeof(cache->item));
12300                 if (ret) {
12301                         fprintf(stderr, "Error adding block group\n");
12302                         return ret;
12303                 }
12304                 btrfs_extent_post_op(trans, fs_info->extent_root);
12305         }
12306
12307         ret = reset_balance(trans, fs_info);
12308         if (ret)
12309                 fprintf(stderr, "error resetting the pending balance\n");
12310
12311         return ret;
12312 }
12313
12314 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12315 {
12316         struct btrfs_path path;
12317         struct btrfs_trans_handle *trans;
12318         struct btrfs_key key;
12319         int ret;
12320
12321         printf("Recowing metadata block %llu\n", eb->start);
12322         key.objectid = btrfs_header_owner(eb);
12323         key.type = BTRFS_ROOT_ITEM_KEY;
12324         key.offset = (u64)-1;
12325
12326         root = btrfs_read_fs_root(root->fs_info, &key);
12327         if (IS_ERR(root)) {
12328                 fprintf(stderr, "Couldn't find owner root %llu\n",
12329                         key.objectid);
12330                 return PTR_ERR(root);
12331         }
12332
12333         trans = btrfs_start_transaction(root, 1);
12334         if (IS_ERR(trans))
12335                 return PTR_ERR(trans);
12336
12337         btrfs_init_path(&path);
12338         path.lowest_level = btrfs_header_level(eb);
12339         if (path.lowest_level)
12340                 btrfs_node_key_to_cpu(eb, &key, 0);
12341         else
12342                 btrfs_item_key_to_cpu(eb, &key, 0);
12343
12344         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12345         btrfs_commit_transaction(trans, root);
12346         btrfs_release_path(&path);
12347         return ret;
12348 }
12349
12350 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12351 {
12352         struct btrfs_path path;
12353         struct btrfs_trans_handle *trans;
12354         struct btrfs_key key;
12355         int ret;
12356
12357         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12358                bad->key.type, bad->key.offset);
12359         key.objectid = bad->root_id;
12360         key.type = BTRFS_ROOT_ITEM_KEY;
12361         key.offset = (u64)-1;
12362
12363         root = btrfs_read_fs_root(root->fs_info, &key);
12364         if (IS_ERR(root)) {
12365                 fprintf(stderr, "Couldn't find owner root %llu\n",
12366                         key.objectid);
12367                 return PTR_ERR(root);
12368         }
12369
12370         trans = btrfs_start_transaction(root, 1);
12371         if (IS_ERR(trans))
12372                 return PTR_ERR(trans);
12373
12374         btrfs_init_path(&path);
12375         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12376         if (ret) {
12377                 if (ret > 0)
12378                         ret = 0;
12379                 goto out;
12380         }
12381         ret = btrfs_del_item(trans, root, &path);
12382 out:
12383         btrfs_commit_transaction(trans, root);
12384         btrfs_release_path(&path);
12385         return ret;
12386 }
12387
12388 static int zero_log_tree(struct btrfs_root *root)
12389 {
12390         struct btrfs_trans_handle *trans;
12391         int ret;
12392
12393         trans = btrfs_start_transaction(root, 1);
12394         if (IS_ERR(trans)) {
12395                 ret = PTR_ERR(trans);
12396                 return ret;
12397         }
12398         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12399         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12400         ret = btrfs_commit_transaction(trans, root);
12401         return ret;
12402 }
12403
12404 static int populate_csum(struct btrfs_trans_handle *trans,
12405                          struct btrfs_root *csum_root, char *buf, u64 start,
12406                          u64 len)
12407 {
12408         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12409         u64 offset = 0;
12410         u64 sectorsize;
12411         int ret = 0;
12412
12413         while (offset < len) {
12414                 sectorsize = fs_info->sectorsize;
12415                 ret = read_extent_data(fs_info, buf, start + offset,
12416                                        &sectorsize, 0);
12417                 if (ret)
12418                         break;
12419                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12420                                             start + offset, buf, sectorsize);
12421                 if (ret)
12422                         break;
12423                 offset += sectorsize;
12424         }
12425         return ret;
12426 }
12427
12428 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12429                                       struct btrfs_root *csum_root,
12430                                       struct btrfs_root *cur_root)
12431 {
12432         struct btrfs_path path;
12433         struct btrfs_key key;
12434         struct extent_buffer *node;
12435         struct btrfs_file_extent_item *fi;
12436         char *buf = NULL;
12437         u64 start = 0;
12438         u64 len = 0;
12439         int slot = 0;
12440         int ret = 0;
12441
12442         buf = malloc(cur_root->fs_info->sectorsize);
12443         if (!buf)
12444                 return -ENOMEM;
12445
12446         btrfs_init_path(&path);
12447         key.objectid = 0;
12448         key.offset = 0;
12449         key.type = 0;
12450         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12451         if (ret < 0)
12452                 goto out;
12453         /* Iterate all regular file extents and fill its csum */
12454         while (1) {
12455                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12456
12457                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12458                         goto next;
12459                 node = path.nodes[0];
12460                 slot = path.slots[0];
12461                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12462                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12463                         goto next;
12464                 start = btrfs_file_extent_disk_bytenr(node, fi);
12465                 len = btrfs_file_extent_disk_num_bytes(node, fi);
12466
12467                 ret = populate_csum(trans, csum_root, buf, start, len);
12468                 if (ret == -EEXIST)
12469                         ret = 0;
12470                 if (ret < 0)
12471                         goto out;
12472 next:
12473                 /*
12474                  * TODO: if next leaf is corrupted, jump to nearest next valid
12475                  * leaf.
12476                  */
12477                 ret = btrfs_next_item(cur_root, &path);
12478                 if (ret < 0)
12479                         goto out;
12480                 if (ret > 0) {
12481                         ret = 0;
12482                         goto out;
12483                 }
12484         }
12485
12486 out:
12487         btrfs_release_path(&path);
12488         free(buf);
12489         return ret;
12490 }
12491
12492 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12493                                   struct btrfs_root *csum_root)
12494 {
12495         struct btrfs_fs_info *fs_info = csum_root->fs_info;
12496         struct btrfs_path path;
12497         struct btrfs_root *tree_root = fs_info->tree_root;
12498         struct btrfs_root *cur_root;
12499         struct extent_buffer *node;
12500         struct btrfs_key key;
12501         int slot = 0;
12502         int ret = 0;
12503
12504         btrfs_init_path(&path);
12505         key.objectid = BTRFS_FS_TREE_OBJECTID;
12506         key.offset = 0;
12507         key.type = BTRFS_ROOT_ITEM_KEY;
12508         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12509         if (ret < 0)
12510                 goto out;
12511         if (ret > 0) {
12512                 ret = -ENOENT;
12513                 goto out;
12514         }
12515
12516         while (1) {
12517                 node = path.nodes[0];
12518                 slot = path.slots[0];
12519                 btrfs_item_key_to_cpu(node, &key, slot);
12520                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12521                         goto out;
12522                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12523                         goto next;
12524                 if (!is_fstree(key.objectid))
12525                         goto next;
12526                 key.offset = (u64)-1;
12527
12528                 cur_root = btrfs_read_fs_root(fs_info, &key);
12529                 if (IS_ERR(cur_root) || !cur_root) {
12530                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12531                                 key.objectid);
12532                         goto out;
12533                 }
12534                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12535                                 cur_root);
12536                 if (ret < 0)
12537                         goto out;
12538 next:
12539                 ret = btrfs_next_item(tree_root, &path);
12540                 if (ret > 0) {
12541                         ret = 0;
12542                         goto out;
12543                 }
12544                 if (ret < 0)
12545                         goto out;
12546         }
12547
12548 out:
12549         btrfs_release_path(&path);
12550         return ret;
12551 }
12552
12553 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12554                                       struct btrfs_root *csum_root)
12555 {
12556         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12557         struct btrfs_path path;
12558         struct btrfs_extent_item *ei;
12559         struct extent_buffer *leaf;
12560         char *buf;
12561         struct btrfs_key key;
12562         int ret;
12563
12564         btrfs_init_path(&path);
12565         key.objectid = 0;
12566         key.type = BTRFS_EXTENT_ITEM_KEY;
12567         key.offset = 0;
12568         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12569         if (ret < 0) {
12570                 btrfs_release_path(&path);
12571                 return ret;
12572         }
12573
12574         buf = malloc(csum_root->fs_info->sectorsize);
12575         if (!buf) {
12576                 btrfs_release_path(&path);
12577                 return -ENOMEM;
12578         }
12579
12580         while (1) {
12581                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12582                         ret = btrfs_next_leaf(extent_root, &path);
12583                         if (ret < 0)
12584                                 break;
12585                         if (ret) {
12586                                 ret = 0;
12587                                 break;
12588                         }
12589                 }
12590                 leaf = path.nodes[0];
12591
12592                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12593                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12594                         path.slots[0]++;
12595                         continue;
12596                 }
12597
12598                 ei = btrfs_item_ptr(leaf, path.slots[0],
12599                                     struct btrfs_extent_item);
12600                 if (!(btrfs_extent_flags(leaf, ei) &
12601                       BTRFS_EXTENT_FLAG_DATA)) {
12602                         path.slots[0]++;
12603                         continue;
12604                 }
12605
12606                 ret = populate_csum(trans, csum_root, buf, key.objectid,
12607                                     key.offset);
12608                 if (ret)
12609                         break;
12610                 path.slots[0]++;
12611         }
12612
12613         btrfs_release_path(&path);
12614         free(buf);
12615         return ret;
12616 }
12617
12618 /*
12619  * Recalculate the csum and put it into the csum tree.
12620  *
12621  * Extent tree init will wipe out all the extent info, so in that case, we
12622  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
12623  * will use fs/subvol trees to init the csum tree.
12624  */
12625 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12626                           struct btrfs_root *csum_root,
12627                           int search_fs_tree)
12628 {
12629         if (search_fs_tree)
12630                 return fill_csum_tree_from_fs(trans, csum_root);
12631         else
12632                 return fill_csum_tree_from_extent(trans, csum_root);
12633 }
12634
12635 static void free_roots_info_cache(void)
12636 {
12637         if (!roots_info_cache)
12638                 return;
12639
12640         while (!cache_tree_empty(roots_info_cache)) {
12641                 struct cache_extent *entry;
12642                 struct root_item_info *rii;
12643
12644                 entry = first_cache_extent(roots_info_cache);
12645                 if (!entry)
12646                         break;
12647                 remove_cache_extent(roots_info_cache, entry);
12648                 rii = container_of(entry, struct root_item_info, cache_extent);
12649                 free(rii);
12650         }
12651
12652         free(roots_info_cache);
12653         roots_info_cache = NULL;
12654 }
12655
12656 static int build_roots_info_cache(struct btrfs_fs_info *info)
12657 {
12658         int ret = 0;
12659         struct btrfs_key key;
12660         struct extent_buffer *leaf;
12661         struct btrfs_path path;
12662
12663         if (!roots_info_cache) {
12664                 roots_info_cache = malloc(sizeof(*roots_info_cache));
12665                 if (!roots_info_cache)
12666                         return -ENOMEM;
12667                 cache_tree_init(roots_info_cache);
12668         }
12669
12670         btrfs_init_path(&path);
12671         key.objectid = 0;
12672         key.type = BTRFS_EXTENT_ITEM_KEY;
12673         key.offset = 0;
12674         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12675         if (ret < 0)
12676                 goto out;
12677         leaf = path.nodes[0];
12678
12679         while (1) {
12680                 struct btrfs_key found_key;
12681                 struct btrfs_extent_item *ei;
12682                 struct btrfs_extent_inline_ref *iref;
12683                 int slot = path.slots[0];
12684                 int type;
12685                 u64 flags;
12686                 u64 root_id;
12687                 u8 level;
12688                 struct cache_extent *entry;
12689                 struct root_item_info *rii;
12690
12691                 if (slot >= btrfs_header_nritems(leaf)) {
12692                         ret = btrfs_next_leaf(info->extent_root, &path);
12693                         if (ret < 0) {
12694                                 break;
12695                         } else if (ret) {
12696                                 ret = 0;
12697                                 break;
12698                         }
12699                         leaf = path.nodes[0];
12700                         slot = path.slots[0];
12701                 }
12702
12703                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12704
12705                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12706                     found_key.type != BTRFS_METADATA_ITEM_KEY)
12707                         goto next;
12708
12709                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12710                 flags = btrfs_extent_flags(leaf, ei);
12711
12712                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12713                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12714                         goto next;
12715
12716                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12717                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12718                         level = found_key.offset;
12719                 } else {
12720                         struct btrfs_tree_block_info *binfo;
12721
12722                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
12723                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12724                         level = btrfs_tree_block_level(leaf, binfo);
12725                 }
12726
12727                 /*
12728                  * For a root extent, it must be of the following type and the
12729                  * first (and only one) iref in the item.
12730                  */
12731                 type = btrfs_extent_inline_ref_type(leaf, iref);
12732                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12733                         goto next;
12734
12735                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12736                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12737                 if (!entry) {
12738                         rii = malloc(sizeof(struct root_item_info));
12739                         if (!rii) {
12740                                 ret = -ENOMEM;
12741                                 goto out;
12742                         }
12743                         rii->cache_extent.start = root_id;
12744                         rii->cache_extent.size = 1;
12745                         rii->level = (u8)-1;
12746                         entry = &rii->cache_extent;
12747                         ret = insert_cache_extent(roots_info_cache, entry);
12748                         ASSERT(ret == 0);
12749                 } else {
12750                         rii = container_of(entry, struct root_item_info,
12751                                            cache_extent);
12752                 }
12753
12754                 ASSERT(rii->cache_extent.start == root_id);
12755                 ASSERT(rii->cache_extent.size == 1);
12756
12757                 if (level > rii->level || rii->level == (u8)-1) {
12758                         rii->level = level;
12759                         rii->bytenr = found_key.objectid;
12760                         rii->gen = btrfs_extent_generation(leaf, ei);
12761                         rii->node_count = 1;
12762                 } else if (level == rii->level) {
12763                         rii->node_count++;
12764                 }
12765 next:
12766                 path.slots[0]++;
12767         }
12768
12769 out:
12770         btrfs_release_path(&path);
12771
12772         return ret;
12773 }
12774
12775 static int maybe_repair_root_item(struct btrfs_path *path,
12776                                   const struct btrfs_key *root_key,
12777                                   const int read_only_mode)
12778 {
12779         const u64 root_id = root_key->objectid;
12780         struct cache_extent *entry;
12781         struct root_item_info *rii;
12782         struct btrfs_root_item ri;
12783         unsigned long offset;
12784
12785         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12786         if (!entry) {
12787                 fprintf(stderr,
12788                         "Error: could not find extent items for root %llu\n",
12789                         root_key->objectid);
12790                 return -ENOENT;
12791         }
12792
12793         rii = container_of(entry, struct root_item_info, cache_extent);
12794         ASSERT(rii->cache_extent.start == root_id);
12795         ASSERT(rii->cache_extent.size == 1);
12796
12797         if (rii->node_count != 1) {
12798                 fprintf(stderr,
12799                         "Error: could not find btree root extent for root %llu\n",
12800                         root_id);
12801                 return -ENOENT;
12802         }
12803
12804         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12805         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12806
12807         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12808             btrfs_root_level(&ri) != rii->level ||
12809             btrfs_root_generation(&ri) != rii->gen) {
12810
12811                 /*
12812                  * If we're in repair mode but our caller told us to not update
12813                  * the root item, i.e. just check if it needs to be updated, don't
12814                  * print this message, since the caller will call us again shortly
12815                  * for the same root item without read only mode (the caller will
12816                  * open a transaction first).
12817                  */
12818                 if (!(read_only_mode && repair))
12819                         fprintf(stderr,
12820                                 "%sroot item for root %llu,"
12821                                 " current bytenr %llu, current gen %llu, current level %u,"
12822                                 " new bytenr %llu, new gen %llu, new level %u\n",
12823                                 (read_only_mode ? "" : "fixing "),
12824                                 root_id,
12825                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12826                                 btrfs_root_level(&ri),
12827                                 rii->bytenr, rii->gen, rii->level);
12828
12829                 if (btrfs_root_generation(&ri) > rii->gen) {
12830                         fprintf(stderr,
12831                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12832                                 root_id, btrfs_root_generation(&ri), rii->gen);
12833                         return -EINVAL;
12834                 }
12835
12836                 if (!read_only_mode) {
12837                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12838                         btrfs_set_root_level(&ri, rii->level);
12839                         btrfs_set_root_generation(&ri, rii->gen);
12840                         write_extent_buffer(path->nodes[0], &ri,
12841                                             offset, sizeof(ri));
12842                 }
12843
12844                 return 1;
12845         }
12846
12847         return 0;
12848 }
12849
12850 /*
12851  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12852  * caused read-only snapshots to be corrupted if they were created at a moment
12853  * when the source subvolume/snapshot had orphan items. The issue was that the
12854  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12855  * node instead of the post orphan cleanup root node.
12856  * So this function, and its callees, just detects and fixes those cases. Even
12857  * though the regression was for read-only snapshots, this function applies to
12858  * any snapshot/subvolume root.
12859  * This must be run before any other repair code - not doing it so, makes other
12860  * repair code delete or modify backrefs in the extent tree for example, which
12861  * will result in an inconsistent fs after repairing the root items.
12862  */
12863 static int repair_root_items(struct btrfs_fs_info *info)
12864 {
12865         struct btrfs_path path;
12866         struct btrfs_key key;
12867         struct extent_buffer *leaf;
12868         struct btrfs_trans_handle *trans = NULL;
12869         int ret = 0;
12870         int bad_roots = 0;
12871         int need_trans = 0;
12872
12873         btrfs_init_path(&path);
12874
12875         ret = build_roots_info_cache(info);
12876         if (ret)
12877                 goto out;
12878
12879         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12880         key.type = BTRFS_ROOT_ITEM_KEY;
12881         key.offset = 0;
12882
12883 again:
12884         /*
12885          * Avoid opening and committing transactions if a leaf doesn't have
12886          * any root items that need to be fixed, so that we avoid rotating
12887          * backup roots unnecessarily.
12888          */
12889         if (need_trans) {
12890                 trans = btrfs_start_transaction(info->tree_root, 1);
12891                 if (IS_ERR(trans)) {
12892                         ret = PTR_ERR(trans);
12893                         goto out;
12894                 }
12895         }
12896
12897         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12898                                 0, trans ? 1 : 0);
12899         if (ret < 0)
12900                 goto out;
12901         leaf = path.nodes[0];
12902
12903         while (1) {
12904                 struct btrfs_key found_key;
12905
12906                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12907                         int no_more_keys = find_next_key(&path, &key);
12908
12909                         btrfs_release_path(&path);
12910                         if (trans) {
12911                                 ret = btrfs_commit_transaction(trans,
12912                                                                info->tree_root);
12913                                 trans = NULL;
12914                                 if (ret < 0)
12915                                         goto out;
12916                         }
12917                         need_trans = 0;
12918                         if (no_more_keys)
12919                                 break;
12920                         goto again;
12921                 }
12922
12923                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12924
12925                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12926                         goto next;
12927                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12928                         goto next;
12929
12930                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12931                 if (ret < 0)
12932                         goto out;
12933                 if (ret) {
12934                         if (!trans && repair) {
12935                                 need_trans = 1;
12936                                 key = found_key;
12937                                 btrfs_release_path(&path);
12938                                 goto again;
12939                         }
12940                         bad_roots++;
12941                 }
12942 next:
12943                 path.slots[0]++;
12944         }
12945         ret = 0;
12946 out:
12947         free_roots_info_cache();
12948         btrfs_release_path(&path);
12949         if (trans)
12950                 btrfs_commit_transaction(trans, info->tree_root);
12951         if (ret < 0)
12952                 return ret;
12953
12954         return bad_roots;
12955 }
12956
12957 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12958 {
12959         struct btrfs_trans_handle *trans;
12960         struct btrfs_block_group_cache *bg_cache;
12961         u64 current = 0;
12962         int ret = 0;
12963
12964         /* Clear all free space cache inodes and its extent data */
12965         while (1) {
12966                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12967                 if (!bg_cache)
12968                         break;
12969                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12970                 if (ret < 0)
12971                         return ret;
12972                 current = bg_cache->key.objectid + bg_cache->key.offset;
12973         }
12974
12975         /* Don't forget to set cache_generation to -1 */
12976         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12977         if (IS_ERR(trans)) {
12978                 error("failed to update super block cache generation");
12979                 return PTR_ERR(trans);
12980         }
12981         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12982         btrfs_commit_transaction(trans, fs_info->tree_root);
12983
12984         return ret;
12985 }
12986
12987 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
12988                 int clear_version)
12989 {
12990         int ret = 0;
12991
12992         if (clear_version == 1) {
12993                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12994                         error(
12995                 "free space cache v2 detected, use --clear-space-cache v2");
12996                         ret = 1;
12997                         goto close_out;
12998                 }
12999                 printf("Clearing free space cache\n");
13000                 ret = clear_free_space_cache(fs_info);
13001                 if (ret) {
13002                         error("failed to clear free space cache");
13003                         ret = 1;
13004                 } else {
13005                         printf("Free space cache cleared\n");
13006                 }
13007         } else if (clear_version == 2) {
13008                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13009                         printf("no free space cache v2 to clear\n");
13010                         ret = 0;
13011                         goto close_out;
13012                 }
13013                 printf("Clear free space cache v2\n");
13014                 ret = btrfs_clear_free_space_tree(fs_info);
13015                 if (ret) {
13016                         error("failed to clear free space cache v2: %d", ret);
13017                         ret = 1;
13018                 } else {
13019                         printf("free space cache v2 cleared\n");
13020                 }
13021         }
13022 close_out:
13023         return ret;
13024 }
13025
13026 const char * const cmd_check_usage[] = {
13027         "btrfs check [options] <device>",
13028         "Check structural integrity of a filesystem (unmounted).",
13029         "Check structural integrity of an unmounted filesystem. Verify internal",
13030         "trees' consistency and item connectivity. In the repair mode try to",
13031         "fix the problems found. ",
13032         "WARNING: the repair mode is considered dangerous",
13033         "",
13034         "-s|--super <superblock>     use this superblock copy",
13035         "-b|--backup                 use the first valid backup root copy",
13036         "--force                     skip mount checks, repair is not possible",
13037         "--repair                    try to repair the filesystem",
13038         "--readonly                  run in read-only mode (default)",
13039         "--init-csum-tree            create a new CRC tree",
13040         "--init-extent-tree          create a new extent tree",
13041         "--mode <MODE>               allows choice of memory/IO trade-offs",
13042         "                            where MODE is one of:",
13043         "                            original - read inodes and extents to memory (requires",
13044         "                                       more memory, does less IO)",
13045         "                            lowmem   - try to use less memory but read blocks again",
13046         "                                       when needed",
13047         "--check-data-csum           verify checksums of data blocks",
13048         "-Q|--qgroup-report          print a report on qgroup consistency",
13049         "-E|--subvol-extents <subvolid>",
13050         "                            print subvolume extents and sharing state",
13051         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
13052         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
13053         "-p|--progress               indicate progress",
13054         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
13055         NULL
13056 };
13057
13058 int cmd_check(int argc, char **argv)
13059 {
13060         struct cache_tree root_cache;
13061         struct btrfs_root *root;
13062         struct btrfs_fs_info *info;
13063         u64 bytenr = 0;
13064         u64 subvolid = 0;
13065         u64 tree_root_bytenr = 0;
13066         u64 chunk_root_bytenr = 0;
13067         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13068         int ret = 0;
13069         int err = 0;
13070         u64 num;
13071         int init_csum_tree = 0;
13072         int readonly = 0;
13073         int clear_space_cache = 0;
13074         int qgroup_report = 0;
13075         int qgroups_repaired = 0;
13076         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13077         int force = 0;
13078
13079         while(1) {
13080                 int c;
13081                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13082                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13083                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13084                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13085                         GETOPT_VAL_FORCE };
13086                 static const struct option long_options[] = {
13087                         { "super", required_argument, NULL, 's' },
13088                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13089                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13090                         { "init-csum-tree", no_argument, NULL,
13091                                 GETOPT_VAL_INIT_CSUM },
13092                         { "init-extent-tree", no_argument, NULL,
13093                                 GETOPT_VAL_INIT_EXTENT },
13094                         { "check-data-csum", no_argument, NULL,
13095                                 GETOPT_VAL_CHECK_CSUM },
13096                         { "backup", no_argument, NULL, 'b' },
13097                         { "subvol-extents", required_argument, NULL, 'E' },
13098                         { "qgroup-report", no_argument, NULL, 'Q' },
13099                         { "tree-root", required_argument, NULL, 'r' },
13100                         { "chunk-root", required_argument, NULL,
13101                                 GETOPT_VAL_CHUNK_TREE },
13102                         { "progress", no_argument, NULL, 'p' },
13103                         { "mode", required_argument, NULL,
13104                                 GETOPT_VAL_MODE },
13105                         { "clear-space-cache", required_argument, NULL,
13106                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
13107                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13108                         { NULL, 0, NULL, 0}
13109                 };
13110
13111                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13112                 if (c < 0)
13113                         break;
13114                 switch(c) {
13115                         case 'a': /* ignored */ break;
13116                         case 'b':
13117                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13118                                 break;
13119                         case 's':
13120                                 num = arg_strtou64(optarg);
13121                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13122                                         error(
13123                                         "super mirror should be less than %d",
13124                                                 BTRFS_SUPER_MIRROR_MAX);
13125                                         exit(1);
13126                                 }
13127                                 bytenr = btrfs_sb_offset(((int)num));
13128                                 printf("using SB copy %llu, bytenr %llu\n", num,
13129                                        (unsigned long long)bytenr);
13130                                 break;
13131                         case 'Q':
13132                                 qgroup_report = 1;
13133                                 break;
13134                         case 'E':
13135                                 subvolid = arg_strtou64(optarg);
13136                                 break;
13137                         case 'r':
13138                                 tree_root_bytenr = arg_strtou64(optarg);
13139                                 break;
13140                         case GETOPT_VAL_CHUNK_TREE:
13141                                 chunk_root_bytenr = arg_strtou64(optarg);
13142                                 break;
13143                         case 'p':
13144                                 ctx.progress_enabled = true;
13145                                 break;
13146                         case '?':
13147                         case 'h':
13148                                 usage(cmd_check_usage);
13149                         case GETOPT_VAL_REPAIR:
13150                                 printf("enabling repair mode\n");
13151                                 repair = 1;
13152                                 ctree_flags |= OPEN_CTREE_WRITES;
13153                                 break;
13154                         case GETOPT_VAL_READONLY:
13155                                 readonly = 1;
13156                                 break;
13157                         case GETOPT_VAL_INIT_CSUM:
13158                                 printf("Creating a new CRC tree\n");
13159                                 init_csum_tree = 1;
13160                                 repair = 1;
13161                                 ctree_flags |= OPEN_CTREE_WRITES;
13162                                 break;
13163                         case GETOPT_VAL_INIT_EXTENT:
13164                                 init_extent_tree = 1;
13165                                 ctree_flags |= (OPEN_CTREE_WRITES |
13166                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
13167                                 repair = 1;
13168                                 break;
13169                         case GETOPT_VAL_CHECK_CSUM:
13170                                 check_data_csum = 1;
13171                                 break;
13172                         case GETOPT_VAL_MODE:
13173                                 check_mode = parse_check_mode(optarg);
13174                                 if (check_mode == CHECK_MODE_UNKNOWN) {
13175                                         error("unknown mode: %s", optarg);
13176                                         exit(1);
13177                                 }
13178                                 break;
13179                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
13180                                 if (strcmp(optarg, "v1") == 0) {
13181                                         clear_space_cache = 1;
13182                                 } else if (strcmp(optarg, "v2") == 0) {
13183                                         clear_space_cache = 2;
13184                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13185                                 } else {
13186                                         error(
13187                 "invalid argument to --clear-space-cache, must be v1 or v2");
13188                                         exit(1);
13189                                 }
13190                                 ctree_flags |= OPEN_CTREE_WRITES;
13191                                 break;
13192                         case GETOPT_VAL_FORCE:
13193                                 force = 1;
13194                                 break;
13195                 }
13196         }
13197
13198         if (check_argc_exact(argc - optind, 1))
13199                 usage(cmd_check_usage);
13200
13201         if (ctx.progress_enabled) {
13202                 ctx.tp = TASK_NOTHING;
13203                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13204         }
13205
13206         /* This check is the only reason for --readonly to exist */
13207         if (readonly && repair) {
13208                 error("repair options are not compatible with --readonly");
13209                 exit(1);
13210         }
13211
13212         /*
13213          * experimental and dangerous
13214          */
13215         if (repair && check_mode == CHECK_MODE_LOWMEM)
13216                 warning("low-memory mode repair support is only partial");
13217
13218         radix_tree_init();
13219         cache_tree_init(&root_cache);
13220
13221         ret = check_mounted(argv[optind]);
13222         if (!force) {
13223                 if (ret < 0) {
13224                         error("could not check mount status: %s",
13225                                         strerror(-ret));
13226                         err |= !!ret;
13227                         goto err_out;
13228                 } else if (ret) {
13229                         error(
13230 "%s is currently mounted, use --force if you really intend to check the filesystem",
13231                                 argv[optind]);
13232                         ret = -EBUSY;
13233                         err |= !!ret;
13234                         goto err_out;
13235                 }
13236         } else {
13237                 if (repair) {
13238                         error("repair and --force is not yet supported");
13239                         ret = 1;
13240                         err |= !!ret;
13241                         goto err_out;
13242                 }
13243                 if (ret < 0) {
13244                         warning(
13245 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13246                                 argv[optind]);
13247                 } else if (ret) {
13248                         warning(
13249                         "filesystem mounted, continuing because of --force");
13250                 }
13251                 /* A block device is mounted in exclusive mode by kernel */
13252                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13253         }
13254
13255         /* only allow partial opening under repair mode */
13256         if (repair)
13257                 ctree_flags |= OPEN_CTREE_PARTIAL;
13258
13259         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13260                                   chunk_root_bytenr, ctree_flags);
13261         if (!info) {
13262                 error("cannot open file system");
13263                 ret = -EIO;
13264                 err |= !!ret;
13265                 goto err_out;
13266         }
13267
13268         global_info = info;
13269         root = info->fs_root;
13270         uuid_unparse(info->super_copy->fsid, uuidbuf);
13271
13272         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13273
13274         /*
13275          * Check the bare minimum before starting anything else that could rely
13276          * on it, namely the tree roots, any local consistency checks
13277          */
13278         if (!extent_buffer_uptodate(info->tree_root->node) ||
13279             !extent_buffer_uptodate(info->dev_root->node) ||
13280             !extent_buffer_uptodate(info->chunk_root->node)) {
13281                 error("critical roots corrupted, unable to check the filesystem");
13282                 err |= !!ret;
13283                 ret = -EIO;
13284                 goto close_out;
13285         }
13286
13287         if (clear_space_cache) {
13288                 ret = do_clear_free_space_cache(info, clear_space_cache);
13289                 err |= !!ret;
13290                 goto close_out;
13291         }
13292
13293         /*
13294          * repair mode will force us to commit transaction which
13295          * will make us fail to load log tree when mounting.
13296          */
13297         if (repair && btrfs_super_log_root(info->super_copy)) {
13298                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13299                 if (!ret) {
13300                         ret = 1;
13301                         err |= !!ret;
13302                         goto close_out;
13303                 }
13304                 ret = zero_log_tree(root);
13305                 err |= !!ret;
13306                 if (ret) {
13307                         error("failed to zero log tree: %d", ret);
13308                         goto close_out;
13309                 }
13310         }
13311
13312         if (qgroup_report) {
13313                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13314                        uuidbuf);
13315                 ret = qgroup_verify_all(info);
13316                 err |= !!ret;
13317                 if (ret == 0)
13318                         report_qgroups(1);
13319                 goto close_out;
13320         }
13321         if (subvolid) {
13322                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13323                        subvolid, argv[optind], uuidbuf);
13324                 ret = print_extent_state(info, subvolid);
13325                 err |= !!ret;
13326                 goto close_out;
13327         }
13328
13329         if (init_extent_tree || init_csum_tree) {
13330                 struct btrfs_trans_handle *trans;
13331
13332                 trans = btrfs_start_transaction(info->extent_root, 0);
13333                 if (IS_ERR(trans)) {
13334                         error("error starting transaction");
13335                         ret = PTR_ERR(trans);
13336                         err |= !!ret;
13337                         goto close_out;
13338                 }
13339
13340                 if (init_extent_tree) {
13341                         printf("Creating a new extent tree\n");
13342                         ret = reinit_extent_tree(trans, info);
13343                         err |= !!ret;
13344                         if (ret)
13345                                 goto close_out;
13346                 }
13347
13348                 if (init_csum_tree) {
13349                         printf("Reinitialize checksum tree\n");
13350                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13351                         if (ret) {
13352                                 error("checksum tree initialization failed: %d",
13353                                                 ret);
13354                                 ret = -EIO;
13355                                 err |= !!ret;
13356                                 goto close_out;
13357                         }
13358
13359                         ret = fill_csum_tree(trans, info->csum_root,
13360                                              init_extent_tree);
13361                         err |= !!ret;
13362                         if (ret) {
13363                                 error("checksum tree refilling failed: %d", ret);
13364                                 return -EIO;
13365                         }
13366                 }
13367                 /*
13368                  * Ok now we commit and run the normal fsck, which will add
13369                  * extent entries for all of the items it finds.
13370                  */
13371                 ret = btrfs_commit_transaction(trans, info->extent_root);
13372                 err |= !!ret;
13373                 if (ret)
13374                         goto close_out;
13375         }
13376         if (!extent_buffer_uptodate(info->extent_root->node)) {
13377                 error("critical: extent_root, unable to check the filesystem");
13378                 ret = -EIO;
13379                 err |= !!ret;
13380                 goto close_out;
13381         }
13382         if (!extent_buffer_uptodate(info->csum_root->node)) {
13383                 error("critical: csum_root, unable to check the filesystem");
13384                 ret = -EIO;
13385                 err |= !!ret;
13386                 goto close_out;
13387         }
13388
13389         ret = do_check_chunks_and_extents(info);
13390         err |= !!ret;
13391         if (ret)
13392                 error(
13393                 "errors found in extent allocation tree or chunk allocation");
13394
13395         ret = repair_root_items(info);
13396         err |= !!ret;
13397         if (ret < 0) {
13398                 error("failed to repair root items: %s", strerror(-ret));
13399                 goto close_out;
13400         }
13401         if (repair) {
13402                 fprintf(stderr, "Fixed %d roots.\n", ret);
13403                 ret = 0;
13404         } else if (ret > 0) {
13405                 fprintf(stderr,
13406                        "Found %d roots with an outdated root item.\n",
13407                        ret);
13408                 fprintf(stderr,
13409                         "Please run a filesystem check with the option --repair to fix them.\n");
13410                 ret = 1;
13411                 err |= !!ret;
13412                 goto close_out;
13413         }
13414
13415         if (!ctx.progress_enabled) {
13416                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13417                         fprintf(stderr, "checking free space tree\n");
13418                 else
13419                         fprintf(stderr, "checking free space cache\n");
13420         }
13421         ret = check_space_cache(root);
13422         err |= !!ret;
13423         if (ret) {
13424                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13425                         error("errors found in free space tree");
13426                 else
13427                         error("errors found in free space cache");
13428                 goto out;
13429         }
13430
13431         /*
13432          * We used to have to have these hole extents in between our real
13433          * extents so if we don't have this flag set we need to make sure there
13434          * are no gaps in the file extents for inodes, otherwise we can just
13435          * ignore it when this happens.
13436          */
13437         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13438         ret = do_check_fs_roots(info, &root_cache);
13439         err |= !!ret;
13440         if (ret) {
13441                 error("errors found in fs roots");
13442                 goto out;
13443         }
13444
13445         fprintf(stderr, "checking csums\n");
13446         ret = check_csums(root);
13447         err |= !!ret;
13448         if (ret) {
13449                 error("errors found in csum tree");
13450                 goto out;
13451         }
13452
13453         fprintf(stderr, "checking root refs\n");
13454         /* For low memory mode, check_fs_roots_v2 handles root refs */
13455         if (check_mode != CHECK_MODE_LOWMEM) {
13456                 ret = check_root_refs(root, &root_cache);
13457                 err |= !!ret;
13458                 if (ret) {
13459                         error("errors found in root refs");
13460                         goto out;
13461                 }
13462         }
13463
13464         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13465                 struct extent_buffer *eb;
13466
13467                 eb = list_first_entry(&root->fs_info->recow_ebs,
13468                                       struct extent_buffer, recow);
13469                 list_del_init(&eb->recow);
13470                 ret = recow_extent_buffer(root, eb);
13471                 err |= !!ret;
13472                 if (ret) {
13473                         error("fails to fix transid errors");
13474                         break;
13475                 }
13476         }
13477
13478         while (!list_empty(&delete_items)) {
13479                 struct bad_item *bad;
13480
13481                 bad = list_first_entry(&delete_items, struct bad_item, list);
13482                 list_del_init(&bad->list);
13483                 if (repair) {
13484                         ret = delete_bad_item(root, bad);
13485                         err |= !!ret;
13486                 }
13487                 free(bad);
13488         }
13489
13490         if (info->quota_enabled) {
13491                 fprintf(stderr, "checking quota groups\n");
13492                 ret = qgroup_verify_all(info);
13493                 err |= !!ret;
13494                 if (ret) {
13495                         error("failed to check quota groups");
13496                         goto out;
13497                 }
13498                 report_qgroups(0);
13499                 ret = repair_qgroups(info, &qgroups_repaired);
13500                 err |= !!ret;
13501                 if (err) {
13502                         error("failed to repair quota groups");
13503                         goto out;
13504                 }
13505                 ret = 0;
13506         }
13507
13508         if (!list_empty(&root->fs_info->recow_ebs)) {
13509                 error("transid errors in file system");
13510                 ret = 1;
13511                 err |= !!ret;
13512         }
13513 out:
13514         printf("found %llu bytes used, ",
13515                (unsigned long long)bytes_used);
13516         if (err)
13517                 printf("error(s) found\n");
13518         else
13519                 printf("no error found\n");
13520         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13521         printf("total tree bytes: %llu\n",
13522                (unsigned long long)total_btree_bytes);
13523         printf("total fs tree bytes: %llu\n",
13524                (unsigned long long)total_fs_tree_bytes);
13525         printf("total extent tree bytes: %llu\n",
13526                (unsigned long long)total_extent_tree_bytes);
13527         printf("btree space waste bytes: %llu\n",
13528                (unsigned long long)btree_space_waste);
13529         printf("file data blocks allocated: %llu\n referenced %llu\n",
13530                 (unsigned long long)data_bytes_allocated,
13531                 (unsigned long long)data_bytes_referenced);
13532
13533         free_qgroup_counts();
13534         free_root_recs_tree(&root_cache);
13535 close_out:
13536         close_ctree(root);
13537 err_out:
13538         if (ctx.progress_enabled)
13539                 task_deinit(ctx.info);
13540
13541         return err;
13542 }