btrfs-progs: check: repair inode nlink in lowmem
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING       (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH      (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN         (1<<20) /* DIR isize should be recalculated */
136
137 static inline struct data_backref* to_data_backref(struct extent_backref *back)
138 {
139         return container_of(back, struct data_backref, node);
140 }
141
142 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
143 {
144         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
145         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
146         struct data_backref *back1 = to_data_backref(ext1);
147         struct data_backref *back2 = to_data_backref(ext2);
148
149         WARN_ON(!ext1->is_data);
150         WARN_ON(!ext2->is_data);
151
152         /* parent and root are a union, so this covers both */
153         if (back1->parent > back2->parent)
154                 return 1;
155         if (back1->parent < back2->parent)
156                 return -1;
157
158         /* This is a full backref and the parents match. */
159         if (back1->node.full_backref)
160                 return 0;
161
162         if (back1->owner > back2->owner)
163                 return 1;
164         if (back1->owner < back2->owner)
165                 return -1;
166
167         if (back1->offset > back2->offset)
168                 return 1;
169         if (back1->offset < back2->offset)
170                 return -1;
171
172         if (back1->found_ref && back2->found_ref) {
173                 if (back1->disk_bytenr > back2->disk_bytenr)
174                         return 1;
175                 if (back1->disk_bytenr < back2->disk_bytenr)
176                         return -1;
177
178                 if (back1->bytes > back2->bytes)
179                         return 1;
180                 if (back1->bytes < back2->bytes)
181                         return -1;
182         }
183
184         return 0;
185 }
186
187 /*
188  * Much like data_backref, just removed the undetermined members
189  * and change it to use list_head.
190  * During extent scan, it is stored in root->orphan_data_extent.
191  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
192  */
193 struct orphan_data_extent {
194         struct list_head list;
195         u64 root;
196         u64 objectid;
197         u64 offset;
198         u64 disk_bytenr;
199         u64 disk_len;
200 };
201
202 struct tree_backref {
203         struct extent_backref node;
204         union {
205                 u64 parent;
206                 u64 root;
207         };
208 };
209
210 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
211 {
212         return container_of(back, struct tree_backref, node);
213 }
214
215 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
216 {
217         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
218         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
219         struct tree_backref *back1 = to_tree_backref(ext1);
220         struct tree_backref *back2 = to_tree_backref(ext2);
221
222         WARN_ON(ext1->is_data);
223         WARN_ON(ext2->is_data);
224
225         /* parent and root are a union, so this covers both */
226         if (back1->parent > back2->parent)
227                 return 1;
228         if (back1->parent < back2->parent)
229                 return -1;
230
231         return 0;
232 }
233
234 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
235 {
236         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
237         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
238
239         if (ext1->is_data > ext2->is_data)
240                 return 1;
241
242         if (ext1->is_data < ext2->is_data)
243                 return -1;
244
245         if (ext1->full_backref > ext2->full_backref)
246                 return 1;
247         if (ext1->full_backref < ext2->full_backref)
248                 return -1;
249
250         if (ext1->is_data)
251                 return compare_data_backref(node1, node2);
252         else
253                 return compare_tree_backref(node1, node2);
254 }
255
256 /* Explicit initialization for extent_record::flag_block_full_backref */
257 enum { FLAG_UNSET = 2 };
258
259 struct extent_record {
260         struct list_head backrefs;
261         struct list_head dups;
262         struct rb_root backref_tree;
263         struct list_head list;
264         struct cache_extent cache;
265         struct btrfs_disk_key parent_key;
266         u64 start;
267         u64 max_size;
268         u64 nr;
269         u64 refs;
270         u64 extent_item_refs;
271         u64 generation;
272         u64 parent_generation;
273         u64 info_objectid;
274         u32 num_duplicates;
275         u8 info_level;
276         unsigned int flag_block_full_backref:2;
277         unsigned int found_rec:1;
278         unsigned int content_checked:1;
279         unsigned int owner_ref_checked:1;
280         unsigned int is_root:1;
281         unsigned int metadata:1;
282         unsigned int bad_full_backref:1;
283         unsigned int crossing_stripes:1;
284         unsigned int wrong_chunk_type:1;
285 };
286
287 static inline struct extent_record* to_extent_record(struct list_head *entry)
288 {
289         return container_of(entry, struct extent_record, list);
290 }
291
292 struct inode_backref {
293         struct list_head list;
294         unsigned int found_dir_item:1;
295         unsigned int found_dir_index:1;
296         unsigned int found_inode_ref:1;
297         u8 filetype;
298         u8 ref_type;
299         int errors;
300         u64 dir;
301         u64 index;
302         u16 namelen;
303         char name[0];
304 };
305
306 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
307 {
308         return list_entry(entry, struct inode_backref, list);
309 }
310
311 struct root_item_record {
312         struct list_head list;
313         u64 objectid;
314         u64 bytenr;
315         u64 last_snapshot;
316         u8 level;
317         u8 drop_level;
318         struct btrfs_key drop_key;
319 };
320
321 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
322 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
323 #define REF_ERR_NO_INODE_REF            (1 << 2)
324 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
325 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
326 #define REF_ERR_DUP_INODE_REF           (1 << 5)
327 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
328 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
329 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
330 #define REF_ERR_NO_ROOT_REF             (1 << 9)
331 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
332 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
333 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
334
335 struct file_extent_hole {
336         struct rb_node node;
337         u64 start;
338         u64 len;
339 };
340
341 struct inode_record {
342         struct list_head backrefs;
343         unsigned int checked:1;
344         unsigned int merging:1;
345         unsigned int found_inode_item:1;
346         unsigned int found_dir_item:1;
347         unsigned int found_file_extent:1;
348         unsigned int found_csum_item:1;
349         unsigned int some_csum_missing:1;
350         unsigned int nodatasum:1;
351         int errors;
352
353         u64 ino;
354         u32 nlink;
355         u32 imode;
356         u64 isize;
357         u64 nbytes;
358
359         u32 found_link;
360         u64 found_size;
361         u64 extent_start;
362         u64 extent_end;
363         struct rb_root holes;
364         struct list_head orphan_extents;
365
366         u32 refs;
367 };
368
369 #define I_ERR_NO_INODE_ITEM             (1 << 0)
370 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
371 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
372 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
373 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
374 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
375 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
376 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
377 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
378 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
379 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
380 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
381 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
382 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
383 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
384
385 struct root_backref {
386         struct list_head list;
387         unsigned int found_dir_item:1;
388         unsigned int found_dir_index:1;
389         unsigned int found_back_ref:1;
390         unsigned int found_forward_ref:1;
391         unsigned int reachable:1;
392         int errors;
393         u64 ref_root;
394         u64 dir;
395         u64 index;
396         u16 namelen;
397         char name[0];
398 };
399
400 static inline struct root_backref* to_root_backref(struct list_head *entry)
401 {
402         return list_entry(entry, struct root_backref, list);
403 }
404
405 struct root_record {
406         struct list_head backrefs;
407         struct cache_extent cache;
408         unsigned int found_root_item:1;
409         u64 objectid;
410         u32 found_ref;
411 };
412
413 struct ptr_node {
414         struct cache_extent cache;
415         void *data;
416 };
417
418 struct shared_node {
419         struct cache_extent cache;
420         struct cache_tree root_cache;
421         struct cache_tree inode_cache;
422         struct inode_record *current;
423         u32 refs;
424 };
425
426 struct block_info {
427         u64 start;
428         u32 size;
429 };
430
431 struct walk_control {
432         struct cache_tree shared;
433         struct shared_node *nodes[BTRFS_MAX_LEVEL];
434         int active_node;
435         int root_level;
436 };
437
438 struct bad_item {
439         struct btrfs_key key;
440         u64 root_id;
441         struct list_head list;
442 };
443
444 struct extent_entry {
445         u64 bytenr;
446         u64 bytes;
447         int count;
448         int broken;
449         struct list_head list;
450 };
451
452 struct root_item_info {
453         /* level of the root */
454         u8 level;
455         /* number of nodes at this level, must be 1 for a root */
456         int node_count;
457         u64 bytenr;
458         u64 gen;
459         struct cache_extent cache_extent;
460 };
461
462 /*
463  * Error bit for low memory mode check.
464  *
465  * Currently no caller cares about it yet.  Just internal use for error
466  * classification.
467  */
468 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
469 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
470 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
471 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
472 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
473 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
474 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
475 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
476 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
477 #define CHUNK_TYPE_MISMATCH     (1 << 8)
478
479 static void *print_status_check(void *p)
480 {
481         struct task_ctx *priv = p;
482         const char work_indicator[] = { '.', 'o', 'O', 'o' };
483         uint32_t count = 0;
484         static char *task_position_string[] = {
485                 "checking extents",
486                 "checking free space cache",
487                 "checking fs roots",
488         };
489
490         task_period_start(priv->info, 1000 /* 1s */);
491
492         if (priv->tp == TASK_NOTHING)
493                 return NULL;
494
495         while (1) {
496                 printf("%s [%c]\r", task_position_string[priv->tp],
497                                 work_indicator[count % 4]);
498                 count++;
499                 fflush(stdout);
500                 task_period_wait(priv->info);
501         }
502         return NULL;
503 }
504
505 static int print_status_return(void *p)
506 {
507         printf("\n");
508         fflush(stdout);
509
510         return 0;
511 }
512
513 static enum btrfs_check_mode parse_check_mode(const char *str)
514 {
515         if (strcmp(str, "lowmem") == 0)
516                 return CHECK_MODE_LOWMEM;
517         if (strcmp(str, "orig") == 0)
518                 return CHECK_MODE_ORIGINAL;
519         if (strcmp(str, "original") == 0)
520                 return CHECK_MODE_ORIGINAL;
521
522         return CHECK_MODE_UNKNOWN;
523 }
524
525 /* Compatible function to allow reuse of old codes */
526 static u64 first_extent_gap(struct rb_root *holes)
527 {
528         struct file_extent_hole *hole;
529
530         if (RB_EMPTY_ROOT(holes))
531                 return (u64)-1;
532
533         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
534         return hole->start;
535 }
536
537 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
538 {
539         struct file_extent_hole *hole1;
540         struct file_extent_hole *hole2;
541
542         hole1 = rb_entry(node1, struct file_extent_hole, node);
543         hole2 = rb_entry(node2, struct file_extent_hole, node);
544
545         if (hole1->start > hole2->start)
546                 return -1;
547         if (hole1->start < hole2->start)
548                 return 1;
549         /* Now hole1->start == hole2->start */
550         if (hole1->len >= hole2->len)
551                 /*
552                  * Hole 1 will be merge center
553                  * Same hole will be merged later
554                  */
555                 return -1;
556         /* Hole 2 will be merge center */
557         return 1;
558 }
559
560 /*
561  * Add a hole to the record
562  *
563  * This will do hole merge for copy_file_extent_holes(),
564  * which will ensure there won't be continuous holes.
565  */
566 static int add_file_extent_hole(struct rb_root *holes,
567                                 u64 start, u64 len)
568 {
569         struct file_extent_hole *hole;
570         struct file_extent_hole *prev = NULL;
571         struct file_extent_hole *next = NULL;
572
573         hole = malloc(sizeof(*hole));
574         if (!hole)
575                 return -ENOMEM;
576         hole->start = start;
577         hole->len = len;
578         /* Since compare will not return 0, no -EEXIST will happen */
579         rb_insert(holes, &hole->node, compare_hole);
580
581         /* simple merge with previous hole */
582         if (rb_prev(&hole->node))
583                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
584                                 node);
585         if (prev && prev->start + prev->len >= hole->start) {
586                 hole->len = hole->start + hole->len - prev->start;
587                 hole->start = prev->start;
588                 rb_erase(&prev->node, holes);
589                 free(prev);
590                 prev = NULL;
591         }
592
593         /* iterate merge with next holes */
594         while (1) {
595                 if (!rb_next(&hole->node))
596                         break;
597                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
598                                         node);
599                 if (hole->start + hole->len >= next->start) {
600                         if (hole->start + hole->len <= next->start + next->len)
601                                 hole->len = next->start + next->len -
602                                             hole->start;
603                         rb_erase(&next->node, holes);
604                         free(next);
605                         next = NULL;
606                 } else
607                         break;
608         }
609         return 0;
610 }
611
612 static int compare_hole_range(struct rb_node *node, void *data)
613 {
614         struct file_extent_hole *hole;
615         u64 start;
616
617         hole = (struct file_extent_hole *)data;
618         start = hole->start;
619
620         hole = rb_entry(node, struct file_extent_hole, node);
621         if (start < hole->start)
622                 return -1;
623         if (start >= hole->start && start < hole->start + hole->len)
624                 return 0;
625         return 1;
626 }
627
628 /*
629  * Delete a hole in the record
630  *
631  * This will do the hole split and is much restrict than add.
632  */
633 static int del_file_extent_hole(struct rb_root *holes,
634                                 u64 start, u64 len)
635 {
636         struct file_extent_hole *hole;
637         struct file_extent_hole tmp;
638         u64 prev_start = 0;
639         u64 prev_len = 0;
640         u64 next_start = 0;
641         u64 next_len = 0;
642         struct rb_node *node;
643         int have_prev = 0;
644         int have_next = 0;
645         int ret = 0;
646
647         tmp.start = start;
648         tmp.len = len;
649         node = rb_search(holes, &tmp, compare_hole_range, NULL);
650         if (!node)
651                 return -EEXIST;
652         hole = rb_entry(node, struct file_extent_hole, node);
653         if (start + len > hole->start + hole->len)
654                 return -EEXIST;
655
656         /*
657          * Now there will be no overlap, delete the hole and re-add the
658          * split(s) if they exists.
659          */
660         if (start > hole->start) {
661                 prev_start = hole->start;
662                 prev_len = start - hole->start;
663                 have_prev = 1;
664         }
665         if (hole->start + hole->len > start + len) {
666                 next_start = start + len;
667                 next_len = hole->start + hole->len - start - len;
668                 have_next = 1;
669         }
670         rb_erase(node, holes);
671         free(hole);
672         if (have_prev) {
673                 ret = add_file_extent_hole(holes, prev_start, prev_len);
674                 if (ret < 0)
675                         return ret;
676         }
677         if (have_next) {
678                 ret = add_file_extent_hole(holes, next_start, next_len);
679                 if (ret < 0)
680                         return ret;
681         }
682         return 0;
683 }
684
685 static int copy_file_extent_holes(struct rb_root *dst,
686                                   struct rb_root *src)
687 {
688         struct file_extent_hole *hole;
689         struct rb_node *node;
690         int ret = 0;
691
692         node = rb_first(src);
693         while (node) {
694                 hole = rb_entry(node, struct file_extent_hole, node);
695                 ret = add_file_extent_hole(dst, hole->start, hole->len);
696                 if (ret)
697                         break;
698                 node = rb_next(node);
699         }
700         return ret;
701 }
702
703 static void free_file_extent_holes(struct rb_root *holes)
704 {
705         struct rb_node *node;
706         struct file_extent_hole *hole;
707
708         node = rb_first(holes);
709         while (node) {
710                 hole = rb_entry(node, struct file_extent_hole, node);
711                 rb_erase(node, holes);
712                 free(hole);
713                 node = rb_first(holes);
714         }
715 }
716
717 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
718
719 static void record_root_in_trans(struct btrfs_trans_handle *trans,
720                                  struct btrfs_root *root)
721 {
722         if (root->last_trans != trans->transid) {
723                 root->track_dirty = 1;
724                 root->last_trans = trans->transid;
725                 root->commit_root = root->node;
726                 extent_buffer_get(root->node);
727         }
728 }
729
730 static u8 imode_to_type(u32 imode)
731 {
732 #define S_SHIFT 12
733         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
734                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
735                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
736                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
737                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
738                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
739                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
740                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
741         };
742
743         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
744 #undef S_SHIFT
745 }
746
747 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
748 {
749         struct device_record *rec1;
750         struct device_record *rec2;
751
752         rec1 = rb_entry(node1, struct device_record, node);
753         rec2 = rb_entry(node2, struct device_record, node);
754         if (rec1->devid > rec2->devid)
755                 return -1;
756         else if (rec1->devid < rec2->devid)
757                 return 1;
758         else
759                 return 0;
760 }
761
762 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
763 {
764         struct inode_record *rec;
765         struct inode_backref *backref;
766         struct inode_backref *orig;
767         struct inode_backref *tmp;
768         struct orphan_data_extent *src_orphan;
769         struct orphan_data_extent *dst_orphan;
770         struct rb_node *rb;
771         size_t size;
772         int ret;
773
774         rec = malloc(sizeof(*rec));
775         if (!rec)
776                 return ERR_PTR(-ENOMEM);
777         memcpy(rec, orig_rec, sizeof(*rec));
778         rec->refs = 1;
779         INIT_LIST_HEAD(&rec->backrefs);
780         INIT_LIST_HEAD(&rec->orphan_extents);
781         rec->holes = RB_ROOT;
782
783         list_for_each_entry(orig, &orig_rec->backrefs, list) {
784                 size = sizeof(*orig) + orig->namelen + 1;
785                 backref = malloc(size);
786                 if (!backref) {
787                         ret = -ENOMEM;
788                         goto cleanup;
789                 }
790                 memcpy(backref, orig, size);
791                 list_add_tail(&backref->list, &rec->backrefs);
792         }
793         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
794                 dst_orphan = malloc(sizeof(*dst_orphan));
795                 if (!dst_orphan) {
796                         ret = -ENOMEM;
797                         goto cleanup;
798                 }
799                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
800                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
801         }
802         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
803         if (ret < 0)
804                 goto cleanup_rb;
805
806         return rec;
807
808 cleanup_rb:
809         rb = rb_first(&rec->holes);
810         while (rb) {
811                 struct file_extent_hole *hole;
812
813                 hole = rb_entry(rb, struct file_extent_hole, node);
814                 rb = rb_next(rb);
815                 free(hole);
816         }
817
818 cleanup:
819         if (!list_empty(&rec->backrefs))
820                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
821                         list_del(&orig->list);
822                         free(orig);
823                 }
824
825         if (!list_empty(&rec->orphan_extents))
826                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
827                         list_del(&orig->list);
828                         free(orig);
829                 }
830
831         free(rec);
832
833         return ERR_PTR(ret);
834 }
835
836 static void print_orphan_data_extents(struct list_head *orphan_extents,
837                                       u64 objectid)
838 {
839         struct orphan_data_extent *orphan;
840
841         if (list_empty(orphan_extents))
842                 return;
843         printf("The following data extent is lost in tree %llu:\n",
844                objectid);
845         list_for_each_entry(orphan, orphan_extents, list) {
846                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
847                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
848                        orphan->disk_len);
849         }
850 }
851
852 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
853 {
854         u64 root_objectid = root->root_key.objectid;
855         int errors = rec->errors;
856
857         if (!errors)
858                 return;
859         /* reloc root errors, we print its corresponding fs root objectid*/
860         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
861                 root_objectid = root->root_key.offset;
862                 fprintf(stderr, "reloc");
863         }
864         fprintf(stderr, "root %llu inode %llu errors %x",
865                 (unsigned long long) root_objectid,
866                 (unsigned long long) rec->ino, rec->errors);
867
868         if (errors & I_ERR_NO_INODE_ITEM)
869                 fprintf(stderr, ", no inode item");
870         if (errors & I_ERR_NO_ORPHAN_ITEM)
871                 fprintf(stderr, ", no orphan item");
872         if (errors & I_ERR_DUP_INODE_ITEM)
873                 fprintf(stderr, ", dup inode item");
874         if (errors & I_ERR_DUP_DIR_INDEX)
875                 fprintf(stderr, ", dup dir index");
876         if (errors & I_ERR_ODD_DIR_ITEM)
877                 fprintf(stderr, ", odd dir item");
878         if (errors & I_ERR_ODD_FILE_EXTENT)
879                 fprintf(stderr, ", odd file extent");
880         if (errors & I_ERR_BAD_FILE_EXTENT)
881                 fprintf(stderr, ", bad file extent");
882         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
883                 fprintf(stderr, ", file extent overlap");
884         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
885                 fprintf(stderr, ", file extent discount");
886         if (errors & I_ERR_DIR_ISIZE_WRONG)
887                 fprintf(stderr, ", dir isize wrong");
888         if (errors & I_ERR_FILE_NBYTES_WRONG)
889                 fprintf(stderr, ", nbytes wrong");
890         if (errors & I_ERR_ODD_CSUM_ITEM)
891                 fprintf(stderr, ", odd csum item");
892         if (errors & I_ERR_SOME_CSUM_MISSING)
893                 fprintf(stderr, ", some csum missing");
894         if (errors & I_ERR_LINK_COUNT_WRONG)
895                 fprintf(stderr, ", link count wrong");
896         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
897                 fprintf(stderr, ", orphan file extent");
898         fprintf(stderr, "\n");
899         /* Print the orphan extents if needed */
900         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
901                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
902
903         /* Print the holes if needed */
904         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
905                 struct file_extent_hole *hole;
906                 struct rb_node *node;
907                 int found = 0;
908
909                 node = rb_first(&rec->holes);
910                 fprintf(stderr, "Found file extent holes:\n");
911                 while (node) {
912                         found = 1;
913                         hole = rb_entry(node, struct file_extent_hole, node);
914                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
915                                 hole->start, hole->len);
916                         node = rb_next(node);
917                 }
918                 if (!found)
919                         fprintf(stderr, "\tstart: 0, len: %llu\n",
920                                 round_up(rec->isize,
921                                          root->fs_info->sectorsize));
922         }
923 }
924
925 static void print_ref_error(int errors)
926 {
927         if (errors & REF_ERR_NO_DIR_ITEM)
928                 fprintf(stderr, ", no dir item");
929         if (errors & REF_ERR_NO_DIR_INDEX)
930                 fprintf(stderr, ", no dir index");
931         if (errors & REF_ERR_NO_INODE_REF)
932                 fprintf(stderr, ", no inode ref");
933         if (errors & REF_ERR_DUP_DIR_ITEM)
934                 fprintf(stderr, ", dup dir item");
935         if (errors & REF_ERR_DUP_DIR_INDEX)
936                 fprintf(stderr, ", dup dir index");
937         if (errors & REF_ERR_DUP_INODE_REF)
938                 fprintf(stderr, ", dup inode ref");
939         if (errors & REF_ERR_INDEX_UNMATCH)
940                 fprintf(stderr, ", index mismatch");
941         if (errors & REF_ERR_FILETYPE_UNMATCH)
942                 fprintf(stderr, ", filetype mismatch");
943         if (errors & REF_ERR_NAME_TOO_LONG)
944                 fprintf(stderr, ", name too long");
945         if (errors & REF_ERR_NO_ROOT_REF)
946                 fprintf(stderr, ", no root ref");
947         if (errors & REF_ERR_NO_ROOT_BACKREF)
948                 fprintf(stderr, ", no root backref");
949         if (errors & REF_ERR_DUP_ROOT_REF)
950                 fprintf(stderr, ", dup root ref");
951         if (errors & REF_ERR_DUP_ROOT_BACKREF)
952                 fprintf(stderr, ", dup root backref");
953         fprintf(stderr, "\n");
954 }
955
956 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
957                                           u64 ino, int mod)
958 {
959         struct ptr_node *node;
960         struct cache_extent *cache;
961         struct inode_record *rec = NULL;
962         int ret;
963
964         cache = lookup_cache_extent(inode_cache, ino, 1);
965         if (cache) {
966                 node = container_of(cache, struct ptr_node, cache);
967                 rec = node->data;
968                 if (mod && rec->refs > 1) {
969                         node->data = clone_inode_rec(rec);
970                         if (IS_ERR(node->data))
971                                 return node->data;
972                         rec->refs--;
973                         rec = node->data;
974                 }
975         } else if (mod) {
976                 rec = calloc(1, sizeof(*rec));
977                 if (!rec)
978                         return ERR_PTR(-ENOMEM);
979                 rec->ino = ino;
980                 rec->extent_start = (u64)-1;
981                 rec->refs = 1;
982                 INIT_LIST_HEAD(&rec->backrefs);
983                 INIT_LIST_HEAD(&rec->orphan_extents);
984                 rec->holes = RB_ROOT;
985
986                 node = malloc(sizeof(*node));
987                 if (!node) {
988                         free(rec);
989                         return ERR_PTR(-ENOMEM);
990                 }
991                 node->cache.start = ino;
992                 node->cache.size = 1;
993                 node->data = rec;
994
995                 if (ino == BTRFS_FREE_INO_OBJECTID)
996                         rec->found_link = 1;
997
998                 ret = insert_cache_extent(inode_cache, &node->cache);
999                 if (ret)
1000                         return ERR_PTR(-EEXIST);
1001         }
1002         return rec;
1003 }
1004
1005 static void free_orphan_data_extents(struct list_head *orphan_extents)
1006 {
1007         struct orphan_data_extent *orphan;
1008
1009         while (!list_empty(orphan_extents)) {
1010                 orphan = list_entry(orphan_extents->next,
1011                                     struct orphan_data_extent, list);
1012                 list_del(&orphan->list);
1013                 free(orphan);
1014         }
1015 }
1016
1017 static void free_inode_rec(struct inode_record *rec)
1018 {
1019         struct inode_backref *backref;
1020
1021         if (--rec->refs > 0)
1022                 return;
1023
1024         while (!list_empty(&rec->backrefs)) {
1025                 backref = to_inode_backref(rec->backrefs.next);
1026                 list_del(&backref->list);
1027                 free(backref);
1028         }
1029         free_orphan_data_extents(&rec->orphan_extents);
1030         free_file_extent_holes(&rec->holes);
1031         free(rec);
1032 }
1033
1034 static int can_free_inode_rec(struct inode_record *rec)
1035 {
1036         if (!rec->errors && rec->checked && rec->found_inode_item &&
1037             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1038                 return 1;
1039         return 0;
1040 }
1041
1042 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1043                                  struct inode_record *rec)
1044 {
1045         struct cache_extent *cache;
1046         struct inode_backref *tmp, *backref;
1047         struct ptr_node *node;
1048         u8 filetype;
1049
1050         if (!rec->found_inode_item)
1051                 return;
1052
1053         filetype = imode_to_type(rec->imode);
1054         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1055                 if (backref->found_dir_item && backref->found_dir_index) {
1056                         if (backref->filetype != filetype)
1057                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1058                         if (!backref->errors && backref->found_inode_ref &&
1059                             rec->nlink == rec->found_link) {
1060                                 list_del(&backref->list);
1061                                 free(backref);
1062                         }
1063                 }
1064         }
1065
1066         if (!rec->checked || rec->merging)
1067                 return;
1068
1069         if (S_ISDIR(rec->imode)) {
1070                 if (rec->found_size != rec->isize)
1071                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1072                 if (rec->found_file_extent)
1073                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1074         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1075                 if (rec->found_dir_item)
1076                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1077                 if (rec->found_size != rec->nbytes)
1078                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1079                 if (rec->nlink > 0 && !no_holes &&
1080                     (rec->extent_end < rec->isize ||
1081                      first_extent_gap(&rec->holes) < rec->isize))
1082                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1083         }
1084
1085         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1086                 if (rec->found_csum_item && rec->nodatasum)
1087                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1088                 if (rec->some_csum_missing && !rec->nodatasum)
1089                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1090         }
1091
1092         BUG_ON(rec->refs != 1);
1093         if (can_free_inode_rec(rec)) {
1094                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1095                 node = container_of(cache, struct ptr_node, cache);
1096                 BUG_ON(node->data != rec);
1097                 remove_cache_extent(inode_cache, &node->cache);
1098                 free(node);
1099                 free_inode_rec(rec);
1100         }
1101 }
1102
1103 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1104 {
1105         struct btrfs_path path;
1106         struct btrfs_key key;
1107         int ret;
1108
1109         key.objectid = BTRFS_ORPHAN_OBJECTID;
1110         key.type = BTRFS_ORPHAN_ITEM_KEY;
1111         key.offset = ino;
1112
1113         btrfs_init_path(&path);
1114         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1115         btrfs_release_path(&path);
1116         if (ret > 0)
1117                 ret = -ENOENT;
1118         return ret;
1119 }
1120
1121 static int process_inode_item(struct extent_buffer *eb,
1122                               int slot, struct btrfs_key *key,
1123                               struct shared_node *active_node)
1124 {
1125         struct inode_record *rec;
1126         struct btrfs_inode_item *item;
1127
1128         rec = active_node->current;
1129         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1130         if (rec->found_inode_item) {
1131                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1132                 return 1;
1133         }
1134         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1135         rec->nlink = btrfs_inode_nlink(eb, item);
1136         rec->isize = btrfs_inode_size(eb, item);
1137         rec->nbytes = btrfs_inode_nbytes(eb, item);
1138         rec->imode = btrfs_inode_mode(eb, item);
1139         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1140                 rec->nodatasum = 1;
1141         rec->found_inode_item = 1;
1142         if (rec->nlink == 0)
1143                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1144         maybe_free_inode_rec(&active_node->inode_cache, rec);
1145         return 0;
1146 }
1147
1148 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1149                                                 const char *name,
1150                                                 int namelen, u64 dir)
1151 {
1152         struct inode_backref *backref;
1153
1154         list_for_each_entry(backref, &rec->backrefs, list) {
1155                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1156                         break;
1157                 if (backref->dir != dir || backref->namelen != namelen)
1158                         continue;
1159                 if (memcmp(name, backref->name, namelen))
1160                         continue;
1161                 return backref;
1162         }
1163
1164         backref = malloc(sizeof(*backref) + namelen + 1);
1165         if (!backref)
1166                 return NULL;
1167         memset(backref, 0, sizeof(*backref));
1168         backref->dir = dir;
1169         backref->namelen = namelen;
1170         memcpy(backref->name, name, namelen);
1171         backref->name[namelen] = '\0';
1172         list_add_tail(&backref->list, &rec->backrefs);
1173         return backref;
1174 }
1175
1176 static int add_inode_backref(struct cache_tree *inode_cache,
1177                              u64 ino, u64 dir, u64 index,
1178                              const char *name, int namelen,
1179                              u8 filetype, u8 itemtype, int errors)
1180 {
1181         struct inode_record *rec;
1182         struct inode_backref *backref;
1183
1184         rec = get_inode_rec(inode_cache, ino, 1);
1185         BUG_ON(IS_ERR(rec));
1186         backref = get_inode_backref(rec, name, namelen, dir);
1187         BUG_ON(!backref);
1188         if (errors)
1189                 backref->errors |= errors;
1190         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1191                 if (backref->found_dir_index)
1192                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1193                 if (backref->found_inode_ref && backref->index != index)
1194                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1195                 if (backref->found_dir_item && backref->filetype != filetype)
1196                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1197
1198                 backref->index = index;
1199                 backref->filetype = filetype;
1200                 backref->found_dir_index = 1;
1201         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1202                 rec->found_link++;
1203                 if (backref->found_dir_item)
1204                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1205                 if (backref->found_dir_index && backref->filetype != filetype)
1206                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1207
1208                 backref->filetype = filetype;
1209                 backref->found_dir_item = 1;
1210         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1211                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1212                 if (backref->found_inode_ref)
1213                         backref->errors |= REF_ERR_DUP_INODE_REF;
1214                 if (backref->found_dir_index && backref->index != index)
1215                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1216                 else
1217                         backref->index = index;
1218
1219                 backref->ref_type = itemtype;
1220                 backref->found_inode_ref = 1;
1221         } else {
1222                 BUG_ON(1);
1223         }
1224
1225         maybe_free_inode_rec(inode_cache, rec);
1226         return 0;
1227 }
1228
1229 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1230                             struct cache_tree *dst_cache)
1231 {
1232         struct inode_backref *backref;
1233         u32 dir_count = 0;
1234         int ret = 0;
1235
1236         dst->merging = 1;
1237         list_for_each_entry(backref, &src->backrefs, list) {
1238                 if (backref->found_dir_index) {
1239                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1240                                         backref->index, backref->name,
1241                                         backref->namelen, backref->filetype,
1242                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1243                 }
1244                 if (backref->found_dir_item) {
1245                         dir_count++;
1246                         add_inode_backref(dst_cache, dst->ino,
1247                                         backref->dir, 0, backref->name,
1248                                         backref->namelen, backref->filetype,
1249                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1250                 }
1251                 if (backref->found_inode_ref) {
1252                         add_inode_backref(dst_cache, dst->ino,
1253                                         backref->dir, backref->index,
1254                                         backref->name, backref->namelen, 0,
1255                                         backref->ref_type, backref->errors);
1256                 }
1257         }
1258
1259         if (src->found_dir_item)
1260                 dst->found_dir_item = 1;
1261         if (src->found_file_extent)
1262                 dst->found_file_extent = 1;
1263         if (src->found_csum_item)
1264                 dst->found_csum_item = 1;
1265         if (src->some_csum_missing)
1266                 dst->some_csum_missing = 1;
1267         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1268                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1269                 if (ret < 0)
1270                         return ret;
1271         }
1272
1273         BUG_ON(src->found_link < dir_count);
1274         dst->found_link += src->found_link - dir_count;
1275         dst->found_size += src->found_size;
1276         if (src->extent_start != (u64)-1) {
1277                 if (dst->extent_start == (u64)-1) {
1278                         dst->extent_start = src->extent_start;
1279                         dst->extent_end = src->extent_end;
1280                 } else {
1281                         if (dst->extent_end > src->extent_start)
1282                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1283                         else if (dst->extent_end < src->extent_start) {
1284                                 ret = add_file_extent_hole(&dst->holes,
1285                                         dst->extent_end,
1286                                         src->extent_start - dst->extent_end);
1287                         }
1288                         if (dst->extent_end < src->extent_end)
1289                                 dst->extent_end = src->extent_end;
1290                 }
1291         }
1292
1293         dst->errors |= src->errors;
1294         if (src->found_inode_item) {
1295                 if (!dst->found_inode_item) {
1296                         dst->nlink = src->nlink;
1297                         dst->isize = src->isize;
1298                         dst->nbytes = src->nbytes;
1299                         dst->imode = src->imode;
1300                         dst->nodatasum = src->nodatasum;
1301                         dst->found_inode_item = 1;
1302                 } else {
1303                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1304                 }
1305         }
1306         dst->merging = 0;
1307
1308         return 0;
1309 }
1310
1311 static int splice_shared_node(struct shared_node *src_node,
1312                               struct shared_node *dst_node)
1313 {
1314         struct cache_extent *cache;
1315         struct ptr_node *node, *ins;
1316         struct cache_tree *src, *dst;
1317         struct inode_record *rec, *conflict;
1318         u64 current_ino = 0;
1319         int splice = 0;
1320         int ret;
1321
1322         if (--src_node->refs == 0)
1323                 splice = 1;
1324         if (src_node->current)
1325                 current_ino = src_node->current->ino;
1326
1327         src = &src_node->root_cache;
1328         dst = &dst_node->root_cache;
1329 again:
1330         cache = search_cache_extent(src, 0);
1331         while (cache) {
1332                 node = container_of(cache, struct ptr_node, cache);
1333                 rec = node->data;
1334                 cache = next_cache_extent(cache);
1335
1336                 if (splice) {
1337                         remove_cache_extent(src, &node->cache);
1338                         ins = node;
1339                 } else {
1340                         ins = malloc(sizeof(*ins));
1341                         BUG_ON(!ins);
1342                         ins->cache.start = node->cache.start;
1343                         ins->cache.size = node->cache.size;
1344                         ins->data = rec;
1345                         rec->refs++;
1346                 }
1347                 ret = insert_cache_extent(dst, &ins->cache);
1348                 if (ret == -EEXIST) {
1349                         conflict = get_inode_rec(dst, rec->ino, 1);
1350                         BUG_ON(IS_ERR(conflict));
1351                         merge_inode_recs(rec, conflict, dst);
1352                         if (rec->checked) {
1353                                 conflict->checked = 1;
1354                                 if (dst_node->current == conflict)
1355                                         dst_node->current = NULL;
1356                         }
1357                         maybe_free_inode_rec(dst, conflict);
1358                         free_inode_rec(rec);
1359                         free(ins);
1360                 } else {
1361                         BUG_ON(ret);
1362                 }
1363         }
1364
1365         if (src == &src_node->root_cache) {
1366                 src = &src_node->inode_cache;
1367                 dst = &dst_node->inode_cache;
1368                 goto again;
1369         }
1370
1371         if (current_ino > 0 && (!dst_node->current ||
1372             current_ino > dst_node->current->ino)) {
1373                 if (dst_node->current) {
1374                         dst_node->current->checked = 1;
1375                         maybe_free_inode_rec(dst, dst_node->current);
1376                 }
1377                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1378                 BUG_ON(IS_ERR(dst_node->current));
1379         }
1380         return 0;
1381 }
1382
1383 static void free_inode_ptr(struct cache_extent *cache)
1384 {
1385         struct ptr_node *node;
1386         struct inode_record *rec;
1387
1388         node = container_of(cache, struct ptr_node, cache);
1389         rec = node->data;
1390         free_inode_rec(rec);
1391         free(node);
1392 }
1393
1394 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1395
1396 static struct shared_node *find_shared_node(struct cache_tree *shared,
1397                                             u64 bytenr)
1398 {
1399         struct cache_extent *cache;
1400         struct shared_node *node;
1401
1402         cache = lookup_cache_extent(shared, bytenr, 1);
1403         if (cache) {
1404                 node = container_of(cache, struct shared_node, cache);
1405                 return node;
1406         }
1407         return NULL;
1408 }
1409
1410 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1411 {
1412         int ret;
1413         struct shared_node *node;
1414
1415         node = calloc(1, sizeof(*node));
1416         if (!node)
1417                 return -ENOMEM;
1418         node->cache.start = bytenr;
1419         node->cache.size = 1;
1420         cache_tree_init(&node->root_cache);
1421         cache_tree_init(&node->inode_cache);
1422         node->refs = refs;
1423
1424         ret = insert_cache_extent(shared, &node->cache);
1425
1426         return ret;
1427 }
1428
1429 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1430                              struct walk_control *wc, int level)
1431 {
1432         struct shared_node *node;
1433         struct shared_node *dest;
1434         int ret;
1435
1436         if (level == wc->active_node)
1437                 return 0;
1438
1439         BUG_ON(wc->active_node <= level);
1440         node = find_shared_node(&wc->shared, bytenr);
1441         if (!node) {
1442                 ret = add_shared_node(&wc->shared, bytenr, refs);
1443                 BUG_ON(ret);
1444                 node = find_shared_node(&wc->shared, bytenr);
1445                 wc->nodes[level] = node;
1446                 wc->active_node = level;
1447                 return 0;
1448         }
1449
1450         if (wc->root_level == wc->active_node &&
1451             btrfs_root_refs(&root->root_item) == 0) {
1452                 if (--node->refs == 0) {
1453                         free_inode_recs_tree(&node->root_cache);
1454                         free_inode_recs_tree(&node->inode_cache);
1455                         remove_cache_extent(&wc->shared, &node->cache);
1456                         free(node);
1457                 }
1458                 return 1;
1459         }
1460
1461         dest = wc->nodes[wc->active_node];
1462         splice_shared_node(node, dest);
1463         if (node->refs == 0) {
1464                 remove_cache_extent(&wc->shared, &node->cache);
1465                 free(node);
1466         }
1467         return 1;
1468 }
1469
1470 static int leave_shared_node(struct btrfs_root *root,
1471                              struct walk_control *wc, int level)
1472 {
1473         struct shared_node *node;
1474         struct shared_node *dest;
1475         int i;
1476
1477         if (level == wc->root_level)
1478                 return 0;
1479
1480         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1481                 if (wc->nodes[i])
1482                         break;
1483         }
1484         BUG_ON(i >= BTRFS_MAX_LEVEL);
1485
1486         node = wc->nodes[wc->active_node];
1487         wc->nodes[wc->active_node] = NULL;
1488         wc->active_node = i;
1489
1490         dest = wc->nodes[wc->active_node];
1491         if (wc->active_node < wc->root_level ||
1492             btrfs_root_refs(&root->root_item) > 0) {
1493                 BUG_ON(node->refs <= 1);
1494                 splice_shared_node(node, dest);
1495         } else {
1496                 BUG_ON(node->refs < 2);
1497                 node->refs--;
1498         }
1499         return 0;
1500 }
1501
1502 /*
1503  * Returns:
1504  * < 0 - on error
1505  * 1   - if the root with id child_root_id is a child of root parent_root_id
1506  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1507  *       has other root(s) as parent(s)
1508  * 2   - if the root child_root_id doesn't have any parent roots
1509  */
1510 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1511                          u64 child_root_id)
1512 {
1513         struct btrfs_path path;
1514         struct btrfs_key key;
1515         struct extent_buffer *leaf;
1516         int has_parent = 0;
1517         int ret;
1518
1519         btrfs_init_path(&path);
1520
1521         key.objectid = parent_root_id;
1522         key.type = BTRFS_ROOT_REF_KEY;
1523         key.offset = child_root_id;
1524         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1525                                 0, 0);
1526         if (ret < 0)
1527                 return ret;
1528         btrfs_release_path(&path);
1529         if (!ret)
1530                 return 1;
1531
1532         key.objectid = child_root_id;
1533         key.type = BTRFS_ROOT_BACKREF_KEY;
1534         key.offset = 0;
1535         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1536                                 0, 0);
1537         if (ret < 0)
1538                 goto out;
1539
1540         while (1) {
1541                 leaf = path.nodes[0];
1542                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1543                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1544                         if (ret)
1545                                 break;
1546                         leaf = path.nodes[0];
1547                 }
1548
1549                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1550                 if (key.objectid != child_root_id ||
1551                     key.type != BTRFS_ROOT_BACKREF_KEY)
1552                         break;
1553
1554                 has_parent = 1;
1555
1556                 if (key.offset == parent_root_id) {
1557                         btrfs_release_path(&path);
1558                         return 1;
1559                 }
1560
1561                 path.slots[0]++;
1562         }
1563 out:
1564         btrfs_release_path(&path);
1565         if (ret < 0)
1566                 return ret;
1567         return has_parent ? 0 : 2;
1568 }
1569
1570 static int process_dir_item(struct extent_buffer *eb,
1571                             int slot, struct btrfs_key *key,
1572                             struct shared_node *active_node)
1573 {
1574         u32 total;
1575         u32 cur = 0;
1576         u32 len;
1577         u32 name_len;
1578         u32 data_len;
1579         int error;
1580         int nritems = 0;
1581         u8 filetype;
1582         struct btrfs_dir_item *di;
1583         struct inode_record *rec;
1584         struct cache_tree *root_cache;
1585         struct cache_tree *inode_cache;
1586         struct btrfs_key location;
1587         char namebuf[BTRFS_NAME_LEN];
1588
1589         root_cache = &active_node->root_cache;
1590         inode_cache = &active_node->inode_cache;
1591         rec = active_node->current;
1592         rec->found_dir_item = 1;
1593
1594         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1595         total = btrfs_item_size_nr(eb, slot);
1596         while (cur < total) {
1597                 nritems++;
1598                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1599                 name_len = btrfs_dir_name_len(eb, di);
1600                 data_len = btrfs_dir_data_len(eb, di);
1601                 filetype = btrfs_dir_type(eb, di);
1602
1603                 rec->found_size += name_len;
1604                 if (cur + sizeof(*di) + name_len > total ||
1605                     name_len > BTRFS_NAME_LEN) {
1606                         error = REF_ERR_NAME_TOO_LONG;
1607
1608                         if (cur + sizeof(*di) > total)
1609                                 break;
1610                         len = min_t(u32, total - cur - sizeof(*di),
1611                                     BTRFS_NAME_LEN);
1612                 } else {
1613                         len = name_len;
1614                         error = 0;
1615                 }
1616
1617                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1618
1619                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1620                     key->offset != btrfs_name_hash(namebuf, len)) {
1621                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1622                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1623                         key->objectid, key->offset, namebuf, len, filetype,
1624                         key->offset, btrfs_name_hash(namebuf, len));
1625                 }
1626
1627                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1628                         add_inode_backref(inode_cache, location.objectid,
1629                                           key->objectid, key->offset, namebuf,
1630                                           len, filetype, key->type, error);
1631                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1632                         add_inode_backref(root_cache, location.objectid,
1633                                           key->objectid, key->offset,
1634                                           namebuf, len, filetype,
1635                                           key->type, error);
1636                 } else {
1637                         fprintf(stderr, "invalid location in dir item %u\n",
1638                                 location.type);
1639                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1640                                           key->objectid, key->offset, namebuf,
1641                                           len, filetype, key->type, error);
1642                 }
1643
1644                 len = sizeof(*di) + name_len + data_len;
1645                 di = (struct btrfs_dir_item *)((char *)di + len);
1646                 cur += len;
1647         }
1648         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1649                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1650
1651         return 0;
1652 }
1653
1654 static int process_inode_ref(struct extent_buffer *eb,
1655                              int slot, struct btrfs_key *key,
1656                              struct shared_node *active_node)
1657 {
1658         u32 total;
1659         u32 cur = 0;
1660         u32 len;
1661         u32 name_len;
1662         u64 index;
1663         int error;
1664         struct cache_tree *inode_cache;
1665         struct btrfs_inode_ref *ref;
1666         char namebuf[BTRFS_NAME_LEN];
1667
1668         inode_cache = &active_node->inode_cache;
1669
1670         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1671         total = btrfs_item_size_nr(eb, slot);
1672         while (cur < total) {
1673                 name_len = btrfs_inode_ref_name_len(eb, ref);
1674                 index = btrfs_inode_ref_index(eb, ref);
1675
1676                 /* inode_ref + namelen should not cross item boundary */
1677                 if (cur + sizeof(*ref) + name_len > total ||
1678                     name_len > BTRFS_NAME_LEN) {
1679                         if (total < cur + sizeof(*ref))
1680                                 break;
1681
1682                         /* Still try to read out the remaining part */
1683                         len = min_t(u32, total - cur - sizeof(*ref),
1684                                     BTRFS_NAME_LEN);
1685                         error = REF_ERR_NAME_TOO_LONG;
1686                 } else {
1687                         len = name_len;
1688                         error = 0;
1689                 }
1690
1691                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1692                 add_inode_backref(inode_cache, key->objectid, key->offset,
1693                                   index, namebuf, len, 0, key->type, error);
1694
1695                 len = sizeof(*ref) + name_len;
1696                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1697                 cur += len;
1698         }
1699         return 0;
1700 }
1701
1702 static int process_inode_extref(struct extent_buffer *eb,
1703                                 int slot, struct btrfs_key *key,
1704                                 struct shared_node *active_node)
1705 {
1706         u32 total;
1707         u32 cur = 0;
1708         u32 len;
1709         u32 name_len;
1710         u64 index;
1711         u64 parent;
1712         int error;
1713         struct cache_tree *inode_cache;
1714         struct btrfs_inode_extref *extref;
1715         char namebuf[BTRFS_NAME_LEN];
1716
1717         inode_cache = &active_node->inode_cache;
1718
1719         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1720         total = btrfs_item_size_nr(eb, slot);
1721         while (cur < total) {
1722                 name_len = btrfs_inode_extref_name_len(eb, extref);
1723                 index = btrfs_inode_extref_index(eb, extref);
1724                 parent = btrfs_inode_extref_parent(eb, extref);
1725                 if (name_len <= BTRFS_NAME_LEN) {
1726                         len = name_len;
1727                         error = 0;
1728                 } else {
1729                         len = BTRFS_NAME_LEN;
1730                         error = REF_ERR_NAME_TOO_LONG;
1731                 }
1732                 read_extent_buffer(eb, namebuf,
1733                                    (unsigned long)(extref + 1), len);
1734                 add_inode_backref(inode_cache, key->objectid, parent,
1735                                   index, namebuf, len, 0, key->type, error);
1736
1737                 len = sizeof(*extref) + name_len;
1738                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1739                 cur += len;
1740         }
1741         return 0;
1742
1743 }
1744
1745 static int count_csum_range(struct btrfs_root *root, u64 start,
1746                             u64 len, u64 *found)
1747 {
1748         struct btrfs_key key;
1749         struct btrfs_path path;
1750         struct extent_buffer *leaf;
1751         int ret;
1752         size_t size;
1753         *found = 0;
1754         u64 csum_end;
1755         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1756
1757         btrfs_init_path(&path);
1758
1759         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1760         key.offset = start;
1761         key.type = BTRFS_EXTENT_CSUM_KEY;
1762
1763         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1764                                 &key, &path, 0, 0);
1765         if (ret < 0)
1766                 goto out;
1767         if (ret > 0 && path.slots[0] > 0) {
1768                 leaf = path.nodes[0];
1769                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1770                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1771                     key.type == BTRFS_EXTENT_CSUM_KEY)
1772                         path.slots[0]--;
1773         }
1774
1775         while (len > 0) {
1776                 leaf = path.nodes[0];
1777                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1778                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1779                         if (ret > 0)
1780                                 break;
1781                         else if (ret < 0)
1782                                 goto out;
1783                         leaf = path.nodes[0];
1784                 }
1785
1786                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1787                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1788                     key.type != BTRFS_EXTENT_CSUM_KEY)
1789                         break;
1790
1791                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1792                 if (key.offset >= start + len)
1793                         break;
1794
1795                 if (key.offset > start)
1796                         start = key.offset;
1797
1798                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1799                 csum_end = key.offset + (size / csum_size) *
1800                            root->fs_info->sectorsize;
1801                 if (csum_end > start) {
1802                         size = min(csum_end - start, len);
1803                         len -= size;
1804                         start += size;
1805                         *found += size;
1806                 }
1807
1808                 path.slots[0]++;
1809         }
1810 out:
1811         btrfs_release_path(&path);
1812         if (ret < 0)
1813                 return ret;
1814         return 0;
1815 }
1816
1817 static int process_file_extent(struct btrfs_root *root,
1818                                 struct extent_buffer *eb,
1819                                 int slot, struct btrfs_key *key,
1820                                 struct shared_node *active_node)
1821 {
1822         struct inode_record *rec;
1823         struct btrfs_file_extent_item *fi;
1824         u64 num_bytes = 0;
1825         u64 disk_bytenr = 0;
1826         u64 extent_offset = 0;
1827         u64 mask = root->fs_info->sectorsize - 1;
1828         int extent_type;
1829         int ret;
1830
1831         rec = active_node->current;
1832         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1833         rec->found_file_extent = 1;
1834
1835         if (rec->extent_start == (u64)-1) {
1836                 rec->extent_start = key->offset;
1837                 rec->extent_end = key->offset;
1838         }
1839
1840         if (rec->extent_end > key->offset)
1841                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1842         else if (rec->extent_end < key->offset) {
1843                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1844                                            key->offset - rec->extent_end);
1845                 if (ret < 0)
1846                         return ret;
1847         }
1848
1849         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1850         extent_type = btrfs_file_extent_type(eb, fi);
1851
1852         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1853                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1854                 if (num_bytes == 0)
1855                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1856                 rec->found_size += num_bytes;
1857                 num_bytes = (num_bytes + mask) & ~mask;
1858         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1859                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1860                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1861                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1862                 extent_offset = btrfs_file_extent_offset(eb, fi);
1863                 if (num_bytes == 0 || (num_bytes & mask))
1864                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865                 if (num_bytes + extent_offset >
1866                     btrfs_file_extent_ram_bytes(eb, fi))
1867                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1868                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1869                     (btrfs_file_extent_compression(eb, fi) ||
1870                      btrfs_file_extent_encryption(eb, fi) ||
1871                      btrfs_file_extent_other_encoding(eb, fi)))
1872                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1873                 if (disk_bytenr > 0)
1874                         rec->found_size += num_bytes;
1875         } else {
1876                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1877         }
1878         rec->extent_end = key->offset + num_bytes;
1879
1880         /*
1881          * The data reloc tree will copy full extents into its inode and then
1882          * copy the corresponding csums.  Because the extent it copied could be
1883          * a preallocated extent that hasn't been written to yet there may be no
1884          * csums to copy, ergo we won't have csums for our file extent.  This is
1885          * ok so just don't bother checking csums if the inode belongs to the
1886          * data reloc tree.
1887          */
1888         if (disk_bytenr > 0 &&
1889             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1890                 u64 found;
1891                 if (btrfs_file_extent_compression(eb, fi))
1892                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1893                 else
1894                         disk_bytenr += extent_offset;
1895
1896                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1897                 if (ret < 0)
1898                         return ret;
1899                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1900                         if (found > 0)
1901                                 rec->found_csum_item = 1;
1902                         if (found < num_bytes)
1903                                 rec->some_csum_missing = 1;
1904                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1905                         if (found > 0)
1906                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1907                 }
1908         }
1909         return 0;
1910 }
1911
1912 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1913                             struct walk_control *wc)
1914 {
1915         struct btrfs_key key;
1916         u32 nritems;
1917         int i;
1918         int ret = 0;
1919         struct cache_tree *inode_cache;
1920         struct shared_node *active_node;
1921
1922         if (wc->root_level == wc->active_node &&
1923             btrfs_root_refs(&root->root_item) == 0)
1924                 return 0;
1925
1926         active_node = wc->nodes[wc->active_node];
1927         inode_cache = &active_node->inode_cache;
1928         nritems = btrfs_header_nritems(eb);
1929         for (i = 0; i < nritems; i++) {
1930                 btrfs_item_key_to_cpu(eb, &key, i);
1931
1932                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1933                         continue;
1934                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1935                         continue;
1936
1937                 if (active_node->current == NULL ||
1938                     active_node->current->ino < key.objectid) {
1939                         if (active_node->current) {
1940                                 active_node->current->checked = 1;
1941                                 maybe_free_inode_rec(inode_cache,
1942                                                      active_node->current);
1943                         }
1944                         active_node->current = get_inode_rec(inode_cache,
1945                                                              key.objectid, 1);
1946                         BUG_ON(IS_ERR(active_node->current));
1947                 }
1948                 switch (key.type) {
1949                 case BTRFS_DIR_ITEM_KEY:
1950                 case BTRFS_DIR_INDEX_KEY:
1951                         ret = process_dir_item(eb, i, &key, active_node);
1952                         break;
1953                 case BTRFS_INODE_REF_KEY:
1954                         ret = process_inode_ref(eb, i, &key, active_node);
1955                         break;
1956                 case BTRFS_INODE_EXTREF_KEY:
1957                         ret = process_inode_extref(eb, i, &key, active_node);
1958                         break;
1959                 case BTRFS_INODE_ITEM_KEY:
1960                         ret = process_inode_item(eb, i, &key, active_node);
1961                         break;
1962                 case BTRFS_EXTENT_DATA_KEY:
1963                         ret = process_file_extent(root, eb, i, &key,
1964                                                   active_node);
1965                         break;
1966                 default:
1967                         break;
1968                 };
1969         }
1970         return ret;
1971 }
1972
1973 struct node_refs {
1974         u64 bytenr[BTRFS_MAX_LEVEL];
1975         u64 refs[BTRFS_MAX_LEVEL];
1976         int need_check[BTRFS_MAX_LEVEL];
1977 };
1978
1979 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1980                              struct node_refs *nrefs, u64 level);
1981 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1982                             unsigned int ext_ref);
1983
1984 /*
1985  * Returns >0  Found error, not fatal, should continue
1986  * Returns <0  Fatal error, must exit the whole check
1987  * Returns 0   No errors found
1988  */
1989 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1990                                struct node_refs *nrefs, int *level, int ext_ref)
1991 {
1992         struct extent_buffer *cur = path->nodes[0];
1993         struct btrfs_key key;
1994         u64 cur_bytenr;
1995         u32 nritems;
1996         u64 first_ino = 0;
1997         int root_level = btrfs_header_level(root->node);
1998         int i;
1999         int ret = 0; /* Final return value */
2000         int err = 0; /* Positive error bitmap */
2001
2002         cur_bytenr = cur->start;
2003
2004         /* skip to first inode item or the first inode number change */
2005         nritems = btrfs_header_nritems(cur);
2006         for (i = 0; i < nritems; i++) {
2007                 btrfs_item_key_to_cpu(cur, &key, i);
2008                 if (i == 0)
2009                         first_ino = key.objectid;
2010                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2011                     (first_ino && first_ino != key.objectid))
2012                         break;
2013         }
2014         if (i == nritems) {
2015                 path->slots[0] = nritems;
2016                 return 0;
2017         }
2018         path->slots[0] = i;
2019
2020 again:
2021         err |= check_inode_item(root, path, ext_ref);
2022
2023         /* modify cur since check_inode_item may change path */
2024         cur = path->nodes[0];
2025
2026         if (err & LAST_ITEM)
2027                 goto out;
2028
2029         /* still have inode items in thie leaf */
2030         if (cur->start == cur_bytenr)
2031                 goto again;
2032
2033         /*
2034          * we have switched to another leaf, above nodes may
2035          * have changed, here walk down the path, if a node
2036          * or leaf is shared, check whether we can skip this
2037          * node or leaf.
2038          */
2039         for (i = root_level; i >= 0; i--) {
2040                 if (path->nodes[i]->start == nrefs->bytenr[i])
2041                         continue;
2042
2043                 ret = update_nodes_refs(root,
2044                                 path->nodes[i]->start,
2045                                 nrefs, i);
2046                 if (ret)
2047                         goto out;
2048
2049                 if (!nrefs->need_check[i]) {
2050                         *level += 1;
2051                         break;
2052                 }
2053         }
2054
2055         for (i = 0; i < *level; i++) {
2056                 free_extent_buffer(path->nodes[i]);
2057                 path->nodes[i] = NULL;
2058         }
2059 out:
2060         err &= ~LAST_ITEM;
2061         if (err && !ret)
2062                 ret = err;
2063         return ret;
2064 }
2065
2066 static void reada_walk_down(struct btrfs_root *root,
2067                             struct extent_buffer *node, int slot)
2068 {
2069         struct btrfs_fs_info *fs_info = root->fs_info;
2070         u64 bytenr;
2071         u64 ptr_gen;
2072         u32 nritems;
2073         int i;
2074         int level;
2075
2076         level = btrfs_header_level(node);
2077         if (level != 1)
2078                 return;
2079
2080         nritems = btrfs_header_nritems(node);
2081         for (i = slot; i < nritems; i++) {
2082                 bytenr = btrfs_node_blockptr(node, i);
2083                 ptr_gen = btrfs_node_ptr_generation(node, i);
2084                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2085         }
2086 }
2087
2088 /*
2089  * Check the child node/leaf by the following condition:
2090  * 1. the first item key of the node/leaf should be the same with the one
2091  *    in parent.
2092  * 2. block in parent node should match the child node/leaf.
2093  * 3. generation of parent node and child's header should be consistent.
2094  *
2095  * Or the child node/leaf pointed by the key in parent is not valid.
2096  *
2097  * We hope to check leaf owner too, but since subvol may share leaves,
2098  * which makes leaf owner check not so strong, key check should be
2099  * sufficient enough for that case.
2100  */
2101 static int check_child_node(struct extent_buffer *parent, int slot,
2102                             struct extent_buffer *child)
2103 {
2104         struct btrfs_key parent_key;
2105         struct btrfs_key child_key;
2106         int ret = 0;
2107
2108         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2109         if (btrfs_header_level(child) == 0)
2110                 btrfs_item_key_to_cpu(child, &child_key, 0);
2111         else
2112                 btrfs_node_key_to_cpu(child, &child_key, 0);
2113
2114         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2115                 ret = -EINVAL;
2116                 fprintf(stderr,
2117                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2118                         parent_key.objectid, parent_key.type, parent_key.offset,
2119                         child_key.objectid, child_key.type, child_key.offset);
2120         }
2121         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2122                 ret = -EINVAL;
2123                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2124                         btrfs_node_blockptr(parent, slot),
2125                         btrfs_header_bytenr(child));
2126         }
2127         if (btrfs_node_ptr_generation(parent, slot) !=
2128             btrfs_header_generation(child)) {
2129                 ret = -EINVAL;
2130                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2131                         btrfs_header_generation(child),
2132                         btrfs_node_ptr_generation(parent, slot));
2133         }
2134         return ret;
2135 }
2136
2137 /*
2138  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2139  * in every fs or file tree check. Here we find its all root ids, and only check
2140  * it in the fs or file tree which has the smallest root id.
2141  */
2142 static int need_check(struct btrfs_root *root, struct ulist *roots)
2143 {
2144         struct rb_node *node;
2145         struct ulist_node *u;
2146
2147         if (roots->nnodes == 1)
2148                 return 1;
2149
2150         node = rb_first(&roots->root);
2151         u = rb_entry(node, struct ulist_node, rb_node);
2152         /*
2153          * current root id is not smallest, we skip it and let it be checked
2154          * in the fs or file tree who hash the smallest root id.
2155          */
2156         if (root->objectid != u->val)
2157                 return 0;
2158
2159         return 1;
2160 }
2161
2162 /*
2163  * for a tree node or leaf, we record its reference count, so later if we still
2164  * process this node or leaf, don't need to compute its reference count again.
2165  */
2166 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2167                              struct node_refs *nrefs, u64 level)
2168 {
2169         int check, ret;
2170         u64 refs;
2171         struct ulist *roots;
2172
2173         if (nrefs->bytenr[level] != bytenr) {
2174                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2175                                        level, 1, &refs, NULL);
2176                 if (ret < 0)
2177                         return ret;
2178
2179                 nrefs->bytenr[level] = bytenr;
2180                 nrefs->refs[level] = refs;
2181                 if (refs > 1) {
2182                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2183                                                    0, &roots);
2184                         if (ret)
2185                                 return -EIO;
2186
2187                         check = need_check(root, roots);
2188                         ulist_free(roots);
2189                         nrefs->need_check[level] = check;
2190                 } else {
2191                         nrefs->need_check[level] = 1;
2192                 }
2193         }
2194
2195         return 0;
2196 }
2197
2198 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2199                           struct walk_control *wc, int *level,
2200                           struct node_refs *nrefs)
2201 {
2202         enum btrfs_tree_block_status status;
2203         u64 bytenr;
2204         u64 ptr_gen;
2205         struct btrfs_fs_info *fs_info = root->fs_info;
2206         struct extent_buffer *next;
2207         struct extent_buffer *cur;
2208         int ret, err = 0;
2209         u64 refs;
2210
2211         WARN_ON(*level < 0);
2212         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2213
2214         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2215                 refs = nrefs->refs[*level];
2216                 ret = 0;
2217         } else {
2218                 ret = btrfs_lookup_extent_info(NULL, root,
2219                                        path->nodes[*level]->start,
2220                                        *level, 1, &refs, NULL);
2221                 if (ret < 0) {
2222                         err = ret;
2223                         goto out;
2224                 }
2225                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2226                 nrefs->refs[*level] = refs;
2227         }
2228
2229         if (refs > 1) {
2230                 ret = enter_shared_node(root, path->nodes[*level]->start,
2231                                         refs, wc, *level);
2232                 if (ret > 0) {
2233                         err = ret;
2234                         goto out;
2235                 }
2236         }
2237
2238         while (*level >= 0) {
2239                 WARN_ON(*level < 0);
2240                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2241                 cur = path->nodes[*level];
2242
2243                 if (btrfs_header_level(cur) != *level)
2244                         WARN_ON(1);
2245
2246                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2247                         break;
2248                 if (*level == 0) {
2249                         ret = process_one_leaf(root, cur, wc);
2250                         if (ret < 0)
2251                                 err = ret;
2252                         break;
2253                 }
2254                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2255                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2256
2257                 if (bytenr == nrefs->bytenr[*level - 1]) {
2258                         refs = nrefs->refs[*level - 1];
2259                 } else {
2260                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2261                                         *level - 1, 1, &refs, NULL);
2262                         if (ret < 0) {
2263                                 refs = 0;
2264                         } else {
2265                                 nrefs->bytenr[*level - 1] = bytenr;
2266                                 nrefs->refs[*level - 1] = refs;
2267                         }
2268                 }
2269
2270                 if (refs > 1) {
2271                         ret = enter_shared_node(root, bytenr, refs,
2272                                                 wc, *level - 1);
2273                         if (ret > 0) {
2274                                 path->slots[*level]++;
2275                                 continue;
2276                         }
2277                 }
2278
2279                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2280                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2281                         free_extent_buffer(next);
2282                         reada_walk_down(root, cur, path->slots[*level]);
2283                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2284                         if (!extent_buffer_uptodate(next)) {
2285                                 struct btrfs_key node_key;
2286
2287                                 btrfs_node_key_to_cpu(path->nodes[*level],
2288                                                       &node_key,
2289                                                       path->slots[*level]);
2290                                 btrfs_add_corrupt_extent_record(root->fs_info,
2291                                                 &node_key,
2292                                                 path->nodes[*level]->start,
2293                                                 root->fs_info->nodesize,
2294                                                 *level);
2295                                 err = -EIO;
2296                                 goto out;
2297                         }
2298                 }
2299
2300                 ret = check_child_node(cur, path->slots[*level], next);
2301                 if (ret) {
2302                         free_extent_buffer(next);
2303                         err = ret;
2304                         goto out;
2305                 }
2306
2307                 if (btrfs_is_leaf(next))
2308                         status = btrfs_check_leaf(root, NULL, next);
2309                 else
2310                         status = btrfs_check_node(root, NULL, next);
2311                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2312                         free_extent_buffer(next);
2313                         err = -EIO;
2314                         goto out;
2315                 }
2316
2317                 *level = *level - 1;
2318                 free_extent_buffer(path->nodes[*level]);
2319                 path->nodes[*level] = next;
2320                 path->slots[*level] = 0;
2321         }
2322 out:
2323         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2324         return err;
2325 }
2326
2327 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2328                             unsigned int ext_ref);
2329
2330 /*
2331  * Returns >0  Found error, should continue
2332  * Returns <0  Fatal error, must exit the whole check
2333  * Returns 0   No errors found
2334  */
2335 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2336                              int *level, struct node_refs *nrefs, int ext_ref)
2337 {
2338         enum btrfs_tree_block_status status;
2339         u64 bytenr;
2340         u64 ptr_gen;
2341         struct btrfs_fs_info *fs_info = root->fs_info;
2342         struct extent_buffer *next;
2343         struct extent_buffer *cur;
2344         int ret;
2345
2346         WARN_ON(*level < 0);
2347         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2348
2349         ret = update_nodes_refs(root, path->nodes[*level]->start,
2350                                 nrefs, *level);
2351         if (ret < 0)
2352                 return ret;
2353
2354         while (*level >= 0) {
2355                 WARN_ON(*level < 0);
2356                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2357                 cur = path->nodes[*level];
2358
2359                 if (btrfs_header_level(cur) != *level)
2360                         WARN_ON(1);
2361
2362                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2363                         break;
2364                 /* Don't forgot to check leaf/node validation */
2365                 if (*level == 0) {
2366                         ret = btrfs_check_leaf(root, NULL, cur);
2367                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2368                                 ret = -EIO;
2369                                 break;
2370                         }
2371                         ret = process_one_leaf_v2(root, path, nrefs,
2372                                                   level, ext_ref);
2373                         cur = path->nodes[*level];
2374                         break;
2375                 } else {
2376                         ret = btrfs_check_node(root, NULL, cur);
2377                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2378                                 ret = -EIO;
2379                                 break;
2380                         }
2381                 }
2382                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2383                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2384
2385                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2386                 if (ret)
2387                         break;
2388                 if (!nrefs->need_check[*level - 1]) {
2389                         path->slots[*level]++;
2390                         continue;
2391                 }
2392
2393                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2394                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2395                         free_extent_buffer(next);
2396                         reada_walk_down(root, cur, path->slots[*level]);
2397                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2398                         if (!extent_buffer_uptodate(next)) {
2399                                 struct btrfs_key node_key;
2400
2401                                 btrfs_node_key_to_cpu(path->nodes[*level],
2402                                                       &node_key,
2403                                                       path->slots[*level]);
2404                                 btrfs_add_corrupt_extent_record(fs_info,
2405                                                 &node_key,
2406                                                 path->nodes[*level]->start,
2407                                                 fs_info->nodesize,
2408                                                 *level);
2409                                 ret = -EIO;
2410                                 break;
2411                         }
2412                 }
2413
2414                 ret = check_child_node(cur, path->slots[*level], next);
2415                 if (ret < 0) 
2416                         break;
2417
2418                 if (btrfs_is_leaf(next))
2419                         status = btrfs_check_leaf(root, NULL, next);
2420                 else
2421                         status = btrfs_check_node(root, NULL, next);
2422                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2423                         free_extent_buffer(next);
2424                         ret = -EIO;
2425                         break;
2426                 }
2427
2428                 *level = *level - 1;
2429                 free_extent_buffer(path->nodes[*level]);
2430                 path->nodes[*level] = next;
2431                 path->slots[*level] = 0;
2432         }
2433         return ret;
2434 }
2435
2436 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2437                         struct walk_control *wc, int *level)
2438 {
2439         int i;
2440         struct extent_buffer *leaf;
2441
2442         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2443                 leaf = path->nodes[i];
2444                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2445                         path->slots[i]++;
2446                         *level = i;
2447                         return 0;
2448                 } else {
2449                         free_extent_buffer(path->nodes[*level]);
2450                         path->nodes[*level] = NULL;
2451                         BUG_ON(*level > wc->active_node);
2452                         if (*level == wc->active_node)
2453                                 leave_shared_node(root, wc, *level);
2454                         *level = i + 1;
2455                 }
2456         }
2457         return 1;
2458 }
2459
2460 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2461                            int *level)
2462 {
2463         int i;
2464         struct extent_buffer *leaf;
2465
2466         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2467                 leaf = path->nodes[i];
2468                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2469                         path->slots[i]++;
2470                         *level = i;
2471                         return 0;
2472                 } else {
2473                         free_extent_buffer(path->nodes[*level]);
2474                         path->nodes[*level] = NULL;
2475                         *level = i + 1;
2476                 }
2477         }
2478         return 1;
2479 }
2480
2481 static int check_root_dir(struct inode_record *rec)
2482 {
2483         struct inode_backref *backref;
2484         int ret = -1;
2485
2486         if (!rec->found_inode_item || rec->errors)
2487                 goto out;
2488         if (rec->nlink != 1 || rec->found_link != 0)
2489                 goto out;
2490         if (list_empty(&rec->backrefs))
2491                 goto out;
2492         backref = to_inode_backref(rec->backrefs.next);
2493         if (!backref->found_inode_ref)
2494                 goto out;
2495         if (backref->index != 0 || backref->namelen != 2 ||
2496             memcmp(backref->name, "..", 2))
2497                 goto out;
2498         if (backref->found_dir_index || backref->found_dir_item)
2499                 goto out;
2500         ret = 0;
2501 out:
2502         return ret;
2503 }
2504
2505 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2506                               struct btrfs_root *root, struct btrfs_path *path,
2507                               struct inode_record *rec)
2508 {
2509         struct btrfs_inode_item *ei;
2510         struct btrfs_key key;
2511         int ret;
2512
2513         key.objectid = rec->ino;
2514         key.type = BTRFS_INODE_ITEM_KEY;
2515         key.offset = (u64)-1;
2516
2517         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2518         if (ret < 0)
2519                 goto out;
2520         if (ret) {
2521                 if (!path->slots[0]) {
2522                         ret = -ENOENT;
2523                         goto out;
2524                 }
2525                 path->slots[0]--;
2526                 ret = 0;
2527         }
2528         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2529         if (key.objectid != rec->ino) {
2530                 ret = -ENOENT;
2531                 goto out;
2532         }
2533
2534         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2535                             struct btrfs_inode_item);
2536         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2537         btrfs_mark_buffer_dirty(path->nodes[0]);
2538         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2539         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2540                root->root_key.objectid);
2541 out:
2542         btrfs_release_path(path);
2543         return ret;
2544 }
2545
2546 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2547                                     struct btrfs_root *root,
2548                                     struct btrfs_path *path,
2549                                     struct inode_record *rec)
2550 {
2551         int ret;
2552
2553         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2554         btrfs_release_path(path);
2555         if (!ret)
2556                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2557         return ret;
2558 }
2559
2560 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2561                                struct btrfs_root *root,
2562                                struct btrfs_path *path,
2563                                struct inode_record *rec)
2564 {
2565         struct btrfs_inode_item *ei;
2566         struct btrfs_key key;
2567         int ret = 0;
2568
2569         key.objectid = rec->ino;
2570         key.type = BTRFS_INODE_ITEM_KEY;
2571         key.offset = 0;
2572
2573         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2574         if (ret) {
2575                 if (ret > 0)
2576                         ret = -ENOENT;
2577                 goto out;
2578         }
2579
2580         /* Since ret == 0, no need to check anything */
2581         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2582                             struct btrfs_inode_item);
2583         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2584         btrfs_mark_buffer_dirty(path->nodes[0]);
2585         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2586         printf("reset nbytes for ino %llu root %llu\n",
2587                rec->ino, root->root_key.objectid);
2588 out:
2589         btrfs_release_path(path);
2590         return ret;
2591 }
2592
2593 static int add_missing_dir_index(struct btrfs_root *root,
2594                                  struct cache_tree *inode_cache,
2595                                  struct inode_record *rec,
2596                                  struct inode_backref *backref)
2597 {
2598         struct btrfs_path path;
2599         struct btrfs_trans_handle *trans;
2600         struct btrfs_dir_item *dir_item;
2601         struct extent_buffer *leaf;
2602         struct btrfs_key key;
2603         struct btrfs_disk_key disk_key;
2604         struct inode_record *dir_rec;
2605         unsigned long name_ptr;
2606         u32 data_size = sizeof(*dir_item) + backref->namelen;
2607         int ret;
2608
2609         trans = btrfs_start_transaction(root, 1);
2610         if (IS_ERR(trans))
2611                 return PTR_ERR(trans);
2612
2613         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2614                 (unsigned long long)rec->ino);
2615
2616         btrfs_init_path(&path);
2617         key.objectid = backref->dir;
2618         key.type = BTRFS_DIR_INDEX_KEY;
2619         key.offset = backref->index;
2620         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2621         BUG_ON(ret);
2622
2623         leaf = path.nodes[0];
2624         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2625
2626         disk_key.objectid = cpu_to_le64(rec->ino);
2627         disk_key.type = BTRFS_INODE_ITEM_KEY;
2628         disk_key.offset = 0;
2629
2630         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2631         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2632         btrfs_set_dir_data_len(leaf, dir_item, 0);
2633         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2634         name_ptr = (unsigned long)(dir_item + 1);
2635         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2636         btrfs_mark_buffer_dirty(leaf);
2637         btrfs_release_path(&path);
2638         btrfs_commit_transaction(trans, root);
2639
2640         backref->found_dir_index = 1;
2641         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2642         BUG_ON(IS_ERR(dir_rec));
2643         if (!dir_rec)
2644                 return 0;
2645         dir_rec->found_size += backref->namelen;
2646         if (dir_rec->found_size == dir_rec->isize &&
2647             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2648                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2649         if (dir_rec->found_size != dir_rec->isize)
2650                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2651
2652         return 0;
2653 }
2654
2655 static int delete_dir_index(struct btrfs_root *root,
2656                             struct inode_backref *backref)
2657 {
2658         struct btrfs_trans_handle *trans;
2659         struct btrfs_dir_item *di;
2660         struct btrfs_path path;
2661         int ret = 0;
2662
2663         trans = btrfs_start_transaction(root, 1);
2664         if (IS_ERR(trans))
2665                 return PTR_ERR(trans);
2666
2667         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2668                 (unsigned long long)backref->dir,
2669                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2670                 (unsigned long long)root->objectid);
2671
2672         btrfs_init_path(&path);
2673         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2674                                     backref->name, backref->namelen,
2675                                     backref->index, -1);
2676         if (IS_ERR(di)) {
2677                 ret = PTR_ERR(di);
2678                 btrfs_release_path(&path);
2679                 btrfs_commit_transaction(trans, root);
2680                 if (ret == -ENOENT)
2681                         return 0;
2682                 return ret;
2683         }
2684
2685         if (!di)
2686                 ret = btrfs_del_item(trans, root, &path);
2687         else
2688                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2689         BUG_ON(ret);
2690         btrfs_release_path(&path);
2691         btrfs_commit_transaction(trans, root);
2692         return ret;
2693 }
2694
2695 static int __create_inode_item(struct btrfs_trans_handle *trans,
2696                                struct btrfs_root *root, u64 ino, u64 size,
2697                                u64 nbytes, u64 nlink, u32 mode)
2698 {
2699         struct btrfs_inode_item ii;
2700         time_t now = time(NULL);
2701         int ret;
2702
2703         btrfs_set_stack_inode_size(&ii, size);
2704         btrfs_set_stack_inode_nbytes(&ii, nbytes);
2705         btrfs_set_stack_inode_nlink(&ii, nlink);
2706         btrfs_set_stack_inode_mode(&ii, mode);
2707         btrfs_set_stack_inode_generation(&ii, trans->transid);
2708         btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2709         btrfs_set_stack_timespec_sec(&ii.ctime, now);
2710         btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2711         btrfs_set_stack_timespec_sec(&ii.mtime, now);
2712         btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2713         btrfs_set_stack_timespec_sec(&ii.otime, 0);
2714         btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2715
2716         ret = btrfs_insert_inode(trans, root, ino, &ii);
2717         ASSERT(!ret);
2718
2719         warning("root %llu inode %llu recreating inode item, this may "
2720                 "be incomplete, please check permissions and content after "
2721                 "the fsck completes.\n", (unsigned long long)root->objectid,
2722                 (unsigned long long)ino);
2723
2724         return 0;
2725 }
2726
2727 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2728                                     struct btrfs_root *root, u64 ino,
2729                                     u8 filetype)
2730 {
2731         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2732
2733         return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2734 }
2735
2736 static int create_inode_item(struct btrfs_root *root,
2737                              struct inode_record *rec, int root_dir)
2738 {
2739         struct btrfs_trans_handle *trans;
2740         u64 nlink = 0;
2741         u32 mode = 0;
2742         u64 size = 0;
2743         int ret;
2744
2745         trans = btrfs_start_transaction(root, 1);
2746         if (IS_ERR(trans)) {
2747                 ret = PTR_ERR(trans);
2748                 return ret;
2749         }
2750
2751         nlink = root_dir ? 1 : rec->found_link;
2752         if (rec->found_dir_item) {
2753                 if (rec->found_file_extent)
2754                         fprintf(stderr, "root %llu inode %llu has both a dir "
2755                                 "item and extents, unsure if it is a dir or a "
2756                                 "regular file so setting it as a directory\n",
2757                                 (unsigned long long)root->objectid,
2758                                 (unsigned long long)rec->ino);
2759                 mode = S_IFDIR | 0755;
2760                 size = rec->found_size;
2761         } else if (!rec->found_dir_item) {
2762                 size = rec->extent_end;
2763                 mode =  S_IFREG | 0755;
2764         }
2765
2766         ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
2767                                   nlink, mode);
2768         btrfs_commit_transaction(trans, root);
2769         return 0;
2770 }
2771
2772 static int repair_inode_backrefs(struct btrfs_root *root,
2773                                  struct inode_record *rec,
2774                                  struct cache_tree *inode_cache,
2775                                  int delete)
2776 {
2777         struct inode_backref *tmp, *backref;
2778         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2779         int ret = 0;
2780         int repaired = 0;
2781
2782         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2783                 if (!delete && rec->ino == root_dirid) {
2784                         if (!rec->found_inode_item) {
2785                                 ret = create_inode_item(root, rec, 1);
2786                                 if (ret)
2787                                         break;
2788                                 repaired++;
2789                         }
2790                 }
2791
2792                 /* Index 0 for root dir's are special, don't mess with it */
2793                 if (rec->ino == root_dirid && backref->index == 0)
2794                         continue;
2795
2796                 if (delete &&
2797                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2798                      (backref->found_dir_index && backref->found_inode_ref &&
2799                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2800                         ret = delete_dir_index(root, backref);
2801                         if (ret)
2802                                 break;
2803                         repaired++;
2804                         list_del(&backref->list);
2805                         free(backref);
2806                         continue;
2807                 }
2808
2809                 if (!delete && !backref->found_dir_index &&
2810                     backref->found_dir_item && backref->found_inode_ref) {
2811                         ret = add_missing_dir_index(root, inode_cache, rec,
2812                                                     backref);
2813                         if (ret)
2814                                 break;
2815                         repaired++;
2816                         if (backref->found_dir_item &&
2817                             backref->found_dir_index) {
2818                                 if (!backref->errors &&
2819                                     backref->found_inode_ref) {
2820                                         list_del(&backref->list);
2821                                         free(backref);
2822                                         continue;
2823                                 }
2824                         }
2825                 }
2826
2827                 if (!delete && (!backref->found_dir_index &&
2828                                 !backref->found_dir_item &&
2829                                 backref->found_inode_ref)) {
2830                         struct btrfs_trans_handle *trans;
2831                         struct btrfs_key location;
2832
2833                         ret = check_dir_conflict(root, backref->name,
2834                                                  backref->namelen,
2835                                                  backref->dir,
2836                                                  backref->index);
2837                         if (ret) {
2838                                 /*
2839                                  * let nlink fixing routine to handle it,
2840                                  * which can do it better.
2841                                  */
2842                                 ret = 0;
2843                                 break;
2844                         }
2845                         location.objectid = rec->ino;
2846                         location.type = BTRFS_INODE_ITEM_KEY;
2847                         location.offset = 0;
2848
2849                         trans = btrfs_start_transaction(root, 1);
2850                         if (IS_ERR(trans)) {
2851                                 ret = PTR_ERR(trans);
2852                                 break;
2853                         }
2854                         fprintf(stderr, "adding missing dir index/item pair "
2855                                 "for inode %llu\n",
2856                                 (unsigned long long)rec->ino);
2857                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2858                                                     backref->namelen,
2859                                                     backref->dir, &location,
2860                                                     imode_to_type(rec->imode),
2861                                                     backref->index);
2862                         BUG_ON(ret);
2863                         btrfs_commit_transaction(trans, root);
2864                         repaired++;
2865                 }
2866
2867                 if (!delete && (backref->found_inode_ref &&
2868                                 backref->found_dir_index &&
2869                                 backref->found_dir_item &&
2870                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2871                                 !rec->found_inode_item)) {
2872                         ret = create_inode_item(root, rec, 0);
2873                         if (ret)
2874                                 break;
2875                         repaired++;
2876                 }
2877
2878         }
2879         return ret ? ret : repaired;
2880 }
2881
2882 /*
2883  * To determine the file type for nlink/inode_item repair
2884  *
2885  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2886  * Return -ENOENT if file type is not found.
2887  */
2888 static int find_file_type(struct inode_record *rec, u8 *type)
2889 {
2890         struct inode_backref *backref;
2891
2892         /* For inode item recovered case */
2893         if (rec->found_inode_item) {
2894                 *type = imode_to_type(rec->imode);
2895                 return 0;
2896         }
2897
2898         list_for_each_entry(backref, &rec->backrefs, list) {
2899                 if (backref->found_dir_index || backref->found_dir_item) {
2900                         *type = backref->filetype;
2901                         return 0;
2902                 }
2903         }
2904         return -ENOENT;
2905 }
2906
2907 /*
2908  * To determine the file name for nlink repair
2909  *
2910  * Return 0 if file name is found, set name and namelen.
2911  * Return -ENOENT if file name is not found.
2912  */
2913 static int find_file_name(struct inode_record *rec,
2914                           char *name, int *namelen)
2915 {
2916         struct inode_backref *backref;
2917
2918         list_for_each_entry(backref, &rec->backrefs, list) {
2919                 if (backref->found_dir_index || backref->found_dir_item ||
2920                     backref->found_inode_ref) {
2921                         memcpy(name, backref->name, backref->namelen);
2922                         *namelen = backref->namelen;
2923                         return 0;
2924                 }
2925         }
2926         return -ENOENT;
2927 }
2928
2929 /* Reset the nlink of the inode to the correct one */
2930 static int reset_nlink(struct btrfs_trans_handle *trans,
2931                        struct btrfs_root *root,
2932                        struct btrfs_path *path,
2933                        struct inode_record *rec)
2934 {
2935         struct inode_backref *backref;
2936         struct inode_backref *tmp;
2937         struct btrfs_key key;
2938         struct btrfs_inode_item *inode_item;
2939         int ret = 0;
2940
2941         /* We don't believe this either, reset it and iterate backref */
2942         rec->found_link = 0;
2943
2944         /* Remove all backref including the valid ones */
2945         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2946                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2947                                    backref->index, backref->name,
2948                                    backref->namelen, 0);
2949                 if (ret < 0)
2950                         goto out;
2951
2952                 /* remove invalid backref, so it won't be added back */
2953                 if (!(backref->found_dir_index &&
2954                       backref->found_dir_item &&
2955                       backref->found_inode_ref)) {
2956                         list_del(&backref->list);
2957                         free(backref);
2958                 } else {
2959                         rec->found_link++;
2960                 }
2961         }
2962
2963         /* Set nlink to 0 */
2964         key.objectid = rec->ino;
2965         key.type = BTRFS_INODE_ITEM_KEY;
2966         key.offset = 0;
2967         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2968         if (ret < 0)
2969                 goto out;
2970         if (ret > 0) {
2971                 ret = -ENOENT;
2972                 goto out;
2973         }
2974         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2975                                     struct btrfs_inode_item);
2976         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2977         btrfs_mark_buffer_dirty(path->nodes[0]);
2978         btrfs_release_path(path);
2979
2980         /*
2981          * Add back valid inode_ref/dir_item/dir_index,
2982          * add_link() will handle the nlink inc, so new nlink must be correct
2983          */
2984         list_for_each_entry(backref, &rec->backrefs, list) {
2985                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2986                                      backref->name, backref->namelen,
2987                                      backref->filetype, &backref->index, 1, 0);
2988                 if (ret < 0)
2989                         goto out;
2990         }
2991 out:
2992         btrfs_release_path(path);
2993         return ret;
2994 }
2995
2996 static int get_highest_inode(struct btrfs_trans_handle *trans,
2997                                 struct btrfs_root *root,
2998                                 struct btrfs_path *path,
2999                                 u64 *highest_ino)
3000 {
3001         struct btrfs_key key, found_key;
3002         int ret;
3003
3004         btrfs_init_path(path);
3005         key.objectid = BTRFS_LAST_FREE_OBJECTID;
3006         key.offset = -1;
3007         key.type = BTRFS_INODE_ITEM_KEY;
3008         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3009         if (ret == 1) {
3010                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3011                                 path->slots[0] - 1);
3012                 *highest_ino = found_key.objectid;
3013                 ret = 0;
3014         }
3015         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3016                 ret = -EOVERFLOW;
3017         btrfs_release_path(path);
3018         return ret;
3019 }
3020
3021 /*
3022  * Link inode to dir 'lost+found'. Increase @ref_count.
3023  *
3024  * Returns 0 means success.
3025  * Returns <0 means failure.
3026  */
3027 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3028                                    struct btrfs_root *root,
3029                                    struct btrfs_path *path,
3030                                    u64 ino, char *namebuf, u32 name_len,
3031                                    u8 filetype, u64 *ref_count)
3032 {
3033         char *dir_name = "lost+found";
3034         u64 lost_found_ino;
3035         int ret;
3036         u32 mode = 0700;
3037
3038         btrfs_release_path(path);
3039         ret = get_highest_inode(trans, root, path, &lost_found_ino);
3040         if (ret < 0)
3041                 goto out;
3042         lost_found_ino++;
3043
3044         ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3045                           BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3046                           mode);
3047         if (ret < 0) {
3048                 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3049                 goto out;
3050         }
3051         ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3052                              namebuf, name_len, filetype, NULL, 1, 0);
3053         /*
3054          * Add ".INO" suffix several times to handle case where
3055          * "FILENAME.INO" is already taken by another file.
3056          */
3057         while (ret == -EEXIST) {
3058                 /*
3059                  * Conflicting file name, add ".INO" as suffix * +1 for '.'
3060                  */
3061                 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3062                         ret = -EFBIG;
3063                         goto out;
3064                 }
3065                 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3066                          ".%llu", ino);
3067                 name_len += count_digits(ino) + 1;
3068                 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3069                                      name_len, filetype, NULL, 1, 0);
3070         }
3071         if (ret < 0) {
3072                 error("failed to link the inode %llu to %s dir: %s",
3073                       ino, dir_name, strerror(-ret));
3074                 goto out;
3075         }
3076
3077         ++*ref_count;
3078         printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3079                name_len, namebuf, dir_name);
3080 out:
3081         btrfs_release_path(path);
3082         if (ret)
3083                 error("failed to move file '%.*s' to '%s' dir", name_len,
3084                                 namebuf, dir_name);
3085         return ret;
3086 }
3087
3088 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3089                                struct btrfs_root *root,
3090                                struct btrfs_path *path,
3091                                struct inode_record *rec)
3092 {
3093         char namebuf[BTRFS_NAME_LEN] = {0};
3094         u8 type = 0;
3095         int namelen = 0;
3096         int name_recovered = 0;
3097         int type_recovered = 0;
3098         int ret = 0;
3099
3100         /*
3101          * Get file name and type first before these invalid inode ref
3102          * are deleted by remove_all_invalid_backref()
3103          */
3104         name_recovered = !find_file_name(rec, namebuf, &namelen);
3105         type_recovered = !find_file_type(rec, &type);
3106
3107         if (!name_recovered) {
3108                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3109                        rec->ino, rec->ino);
3110                 namelen = count_digits(rec->ino);
3111                 sprintf(namebuf, "%llu", rec->ino);
3112                 name_recovered = 1;
3113         }
3114         if (!type_recovered) {
3115                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3116                        rec->ino);
3117                 type = BTRFS_FT_REG_FILE;
3118                 type_recovered = 1;
3119         }
3120
3121         ret = reset_nlink(trans, root, path, rec);
3122         if (ret < 0) {
3123                 fprintf(stderr,
3124                         "Failed to reset nlink for inode %llu: %s\n",
3125                         rec->ino, strerror(-ret));
3126                 goto out;
3127         }
3128
3129         if (rec->found_link == 0) {
3130                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3131                                               namebuf, namelen, type,
3132                                               (u64 *)&rec->found_link);
3133                 if (ret)
3134                         goto out;
3135         }
3136         printf("Fixed the nlink of inode %llu\n", rec->ino);
3137 out:
3138         /*
3139          * Clear the flag anyway, or we will loop forever for the same inode
3140          * as it will not be removed from the bad inode list and the dead loop
3141          * happens.
3142          */
3143         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3144         btrfs_release_path(path);
3145         return ret;
3146 }
3147
3148 /*
3149  * Check if there is any normal(reg or prealloc) file extent for given
3150  * ino.
3151  * This is used to determine the file type when neither its dir_index/item or
3152  * inode_item exists.
3153  *
3154  * This will *NOT* report error, if any error happens, just consider it does
3155  * not have any normal file extent.
3156  */
3157 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3158 {
3159         struct btrfs_path path;
3160         struct btrfs_key key;
3161         struct btrfs_key found_key;
3162         struct btrfs_file_extent_item *fi;
3163         u8 type;
3164         int ret = 0;
3165
3166         btrfs_init_path(&path);
3167         key.objectid = ino;
3168         key.type = BTRFS_EXTENT_DATA_KEY;
3169         key.offset = 0;
3170
3171         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3172         if (ret < 0) {
3173                 ret = 0;
3174                 goto out;
3175         }
3176         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3177                 ret = btrfs_next_leaf(root, &path);
3178                 if (ret) {
3179                         ret = 0;
3180                         goto out;
3181                 }
3182         }
3183         while (1) {
3184                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3185                                       path.slots[0]);
3186                 if (found_key.objectid != ino ||
3187                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3188                         break;
3189                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3190                                     struct btrfs_file_extent_item);
3191                 type = btrfs_file_extent_type(path.nodes[0], fi);
3192                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3193                         ret = 1;
3194                         goto out;
3195                 }
3196         }
3197 out:
3198         btrfs_release_path(&path);
3199         return ret;
3200 }
3201
3202 static u32 btrfs_type_to_imode(u8 type)
3203 {
3204         static u32 imode_by_btrfs_type[] = {
3205                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3206                 [BTRFS_FT_DIR]          = S_IFDIR,
3207                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3208                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3209                 [BTRFS_FT_FIFO]         = S_IFIFO,
3210                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3211                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3212         };
3213
3214         return imode_by_btrfs_type[(type)];
3215 }
3216
3217 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3218                                 struct btrfs_root *root,
3219                                 struct btrfs_path *path,
3220                                 struct inode_record *rec)
3221 {
3222         u8 filetype;
3223         u32 mode = 0700;
3224         int type_recovered = 0;
3225         int ret = 0;
3226
3227         printf("Trying to rebuild inode:%llu\n", rec->ino);
3228
3229         type_recovered = !find_file_type(rec, &filetype);
3230
3231         /*
3232          * Try to determine inode type if type not found.
3233          *
3234          * For found regular file extent, it must be FILE.
3235          * For found dir_item/index, it must be DIR.
3236          *
3237          * For undetermined one, use FILE as fallback.
3238          *
3239          * TODO:
3240          * 1. If found backref(inode_index/item is already handled) to it,
3241          *    it must be DIR.
3242          *    Need new inode-inode ref structure to allow search for that.
3243          */
3244         if (!type_recovered) {
3245                 if (rec->found_file_extent &&
3246                     find_normal_file_extent(root, rec->ino)) {
3247                         type_recovered = 1;
3248                         filetype = BTRFS_FT_REG_FILE;
3249                 } else if (rec->found_dir_item) {
3250                         type_recovered = 1;
3251                         filetype = BTRFS_FT_DIR;
3252                 } else if (!list_empty(&rec->orphan_extents)) {
3253                         type_recovered = 1;
3254                         filetype = BTRFS_FT_REG_FILE;
3255                 } else{
3256                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3257                                rec->ino);
3258                         type_recovered = 1;
3259                         filetype = BTRFS_FT_REG_FILE;
3260                 }
3261         }
3262
3263         ret = btrfs_new_inode(trans, root, rec->ino,
3264                               mode | btrfs_type_to_imode(filetype));
3265         if (ret < 0)
3266                 goto out;
3267
3268         /*
3269          * Here inode rebuild is done, we only rebuild the inode item,
3270          * don't repair the nlink(like move to lost+found).
3271          * That is the job of nlink repair.
3272          *
3273          * We just fill the record and return
3274          */
3275         rec->found_dir_item = 1;
3276         rec->imode = mode | btrfs_type_to_imode(filetype);
3277         rec->nlink = 0;
3278         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3279         /* Ensure the inode_nlinks repair function will be called */
3280         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3281 out:
3282         return ret;
3283 }
3284
3285 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3286                                       struct btrfs_root *root,
3287                                       struct btrfs_path *path,
3288                                       struct inode_record *rec)
3289 {
3290         struct orphan_data_extent *orphan;
3291         struct orphan_data_extent *tmp;
3292         int ret = 0;
3293
3294         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3295                 /*
3296                  * Check for conflicting file extents
3297                  *
3298                  * Here we don't know whether the extents is compressed or not,
3299                  * so we can only assume it not compressed nor data offset,
3300                  * and use its disk_len as extent length.
3301                  */
3302                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3303                                        orphan->offset, orphan->disk_len, 0);
3304                 btrfs_release_path(path);
3305                 if (ret < 0)
3306                         goto out;
3307                 if (!ret) {
3308                         fprintf(stderr,
3309                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3310                                 orphan->disk_bytenr, orphan->disk_len);
3311                         ret = btrfs_free_extent(trans,
3312                                         root->fs_info->extent_root,
3313                                         orphan->disk_bytenr, orphan->disk_len,
3314                                         0, root->objectid, orphan->objectid,
3315                                         orphan->offset);
3316                         if (ret < 0)
3317                                 goto out;
3318                 }
3319                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3320                                 orphan->offset, orphan->disk_bytenr,
3321                                 orphan->disk_len, orphan->disk_len);
3322                 if (ret < 0)
3323                         goto out;
3324
3325                 /* Update file size info */
3326                 rec->found_size += orphan->disk_len;
3327                 if (rec->found_size == rec->nbytes)
3328                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3329
3330                 /* Update the file extent hole info too */
3331                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3332                                            orphan->disk_len);
3333                 if (ret < 0)
3334                         goto out;
3335                 if (RB_EMPTY_ROOT(&rec->holes))
3336                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3337
3338                 list_del(&orphan->list);
3339                 free(orphan);
3340         }
3341         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3342 out:
3343         return ret;
3344 }
3345
3346 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3347                                         struct btrfs_root *root,
3348                                         struct btrfs_path *path,
3349                                         struct inode_record *rec)
3350 {
3351         struct rb_node *node;
3352         struct file_extent_hole *hole;
3353         int found = 0;
3354         int ret = 0;
3355
3356         node = rb_first(&rec->holes);
3357
3358         while (node) {
3359                 found = 1;
3360                 hole = rb_entry(node, struct file_extent_hole, node);
3361                 ret = btrfs_punch_hole(trans, root, rec->ino,
3362                                        hole->start, hole->len);
3363                 if (ret < 0)
3364                         goto out;
3365                 ret = del_file_extent_hole(&rec->holes, hole->start,
3366                                            hole->len);
3367                 if (ret < 0)
3368                         goto out;
3369                 if (RB_EMPTY_ROOT(&rec->holes))
3370                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3371                 node = rb_first(&rec->holes);
3372         }
3373         /* special case for a file losing all its file extent */
3374         if (!found) {
3375                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3376                                        round_up(rec->isize,
3377                                                 root->fs_info->sectorsize));
3378                 if (ret < 0)
3379                         goto out;
3380         }
3381         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3382                rec->ino, root->objectid);
3383 out:
3384         return ret;
3385 }
3386
3387 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3388 {
3389         struct btrfs_trans_handle *trans;
3390         struct btrfs_path path;
3391         int ret = 0;
3392
3393         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3394                              I_ERR_NO_ORPHAN_ITEM |
3395                              I_ERR_LINK_COUNT_WRONG |
3396                              I_ERR_NO_INODE_ITEM |
3397                              I_ERR_FILE_EXTENT_ORPHAN |
3398                              I_ERR_FILE_EXTENT_DISCOUNT|
3399                              I_ERR_FILE_NBYTES_WRONG)))
3400                 return rec->errors;
3401
3402         /*
3403          * For nlink repair, it may create a dir and add link, so
3404          * 2 for parent(256)'s dir_index and dir_item
3405          * 2 for lost+found dir's inode_item and inode_ref
3406          * 1 for the new inode_ref of the file
3407          * 2 for lost+found dir's dir_index and dir_item for the file
3408          */
3409         trans = btrfs_start_transaction(root, 7);
3410         if (IS_ERR(trans))
3411                 return PTR_ERR(trans);
3412
3413         btrfs_init_path(&path);
3414         if (rec->errors & I_ERR_NO_INODE_ITEM)
3415                 ret = repair_inode_no_item(trans, root, &path, rec);
3416         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3417                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3418         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3419                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3420         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3421                 ret = repair_inode_isize(trans, root, &path, rec);
3422         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3423                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3424         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3425                 ret = repair_inode_nlinks(trans, root, &path, rec);
3426         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3427                 ret = repair_inode_nbytes(trans, root, &path, rec);
3428         btrfs_commit_transaction(trans, root);
3429         btrfs_release_path(&path);
3430         return ret;
3431 }
3432
3433 static int check_inode_recs(struct btrfs_root *root,
3434                             struct cache_tree *inode_cache)
3435 {
3436         struct cache_extent *cache;
3437         struct ptr_node *node;
3438         struct inode_record *rec;
3439         struct inode_backref *backref;
3440         int stage = 0;
3441         int ret = 0;
3442         int err = 0;
3443         u64 error = 0;
3444         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3445
3446         if (btrfs_root_refs(&root->root_item) == 0) {
3447                 if (!cache_tree_empty(inode_cache))
3448                         fprintf(stderr, "warning line %d\n", __LINE__);
3449                 return 0;
3450         }
3451
3452         /*
3453          * We need to repair backrefs first because we could change some of the
3454          * errors in the inode recs.
3455          *
3456          * We also need to go through and delete invalid backrefs first and then
3457          * add the correct ones second.  We do this because we may get EEXIST
3458          * when adding back the correct index because we hadn't yet deleted the
3459          * invalid index.
3460          *
3461          * For example, if we were missing a dir index then the directories
3462          * isize would be wrong, so if we fixed the isize to what we thought it
3463          * would be and then fixed the backref we'd still have a invalid fs, so
3464          * we need to add back the dir index and then check to see if the isize
3465          * is still wrong.
3466          */
3467         while (stage < 3) {
3468                 stage++;
3469                 if (stage == 3 && !err)
3470                         break;
3471
3472                 cache = search_cache_extent(inode_cache, 0);
3473                 while (repair && cache) {
3474                         node = container_of(cache, struct ptr_node, cache);
3475                         rec = node->data;
3476                         cache = next_cache_extent(cache);
3477
3478                         /* Need to free everything up and rescan */
3479                         if (stage == 3) {
3480                                 remove_cache_extent(inode_cache, &node->cache);
3481                                 free(node);
3482                                 free_inode_rec(rec);
3483                                 continue;
3484                         }
3485
3486                         if (list_empty(&rec->backrefs))
3487                                 continue;
3488
3489                         ret = repair_inode_backrefs(root, rec, inode_cache,
3490                                                     stage == 1);
3491                         if (ret < 0) {
3492                                 err = ret;
3493                                 stage = 2;
3494                                 break;
3495                         } if (ret > 0) {
3496                                 err = -EAGAIN;
3497                         }
3498                 }
3499         }
3500         if (err)
3501                 return err;
3502
3503         rec = get_inode_rec(inode_cache, root_dirid, 0);
3504         BUG_ON(IS_ERR(rec));
3505         if (rec) {
3506                 ret = check_root_dir(rec);
3507                 if (ret) {
3508                         fprintf(stderr, "root %llu root dir %llu error\n",
3509                                 (unsigned long long)root->root_key.objectid,
3510                                 (unsigned long long)root_dirid);
3511                         print_inode_error(root, rec);
3512                         error++;
3513                 }
3514         } else {
3515                 if (repair) {
3516                         struct btrfs_trans_handle *trans;
3517
3518                         trans = btrfs_start_transaction(root, 1);
3519                         if (IS_ERR(trans)) {
3520                                 err = PTR_ERR(trans);
3521                                 return err;
3522                         }
3523
3524                         fprintf(stderr,
3525                                 "root %llu missing its root dir, recreating\n",
3526                                 (unsigned long long)root->objectid);
3527
3528                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3529                         BUG_ON(ret);
3530
3531                         btrfs_commit_transaction(trans, root);
3532                         return -EAGAIN;
3533                 }
3534
3535                 fprintf(stderr, "root %llu root dir %llu not found\n",
3536                         (unsigned long long)root->root_key.objectid,
3537                         (unsigned long long)root_dirid);
3538         }
3539
3540         while (1) {
3541                 cache = search_cache_extent(inode_cache, 0);
3542                 if (!cache)
3543                         break;
3544                 node = container_of(cache, struct ptr_node, cache);
3545                 rec = node->data;
3546                 remove_cache_extent(inode_cache, &node->cache);
3547                 free(node);
3548                 if (rec->ino == root_dirid ||
3549                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3550                         free_inode_rec(rec);
3551                         continue;
3552                 }
3553
3554                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3555                         ret = check_orphan_item(root, rec->ino);
3556                         if (ret == 0)
3557                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3558                         if (can_free_inode_rec(rec)) {
3559                                 free_inode_rec(rec);
3560                                 continue;
3561                         }
3562                 }
3563
3564                 if (!rec->found_inode_item)
3565                         rec->errors |= I_ERR_NO_INODE_ITEM;
3566                 if (rec->found_link != rec->nlink)
3567                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3568                 if (repair) {
3569                         ret = try_repair_inode(root, rec);
3570                         if (ret == 0 && can_free_inode_rec(rec)) {
3571                                 free_inode_rec(rec);
3572                                 continue;
3573                         }
3574                         ret = 0;
3575                 }
3576
3577                 if (!(repair && ret == 0))
3578                         error++;
3579                 print_inode_error(root, rec);
3580                 list_for_each_entry(backref, &rec->backrefs, list) {
3581                         if (!backref->found_dir_item)
3582                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3583                         if (!backref->found_dir_index)
3584                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3585                         if (!backref->found_inode_ref)
3586                                 backref->errors |= REF_ERR_NO_INODE_REF;
3587                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3588                                 " namelen %u name %s filetype %d errors %x",
3589                                 (unsigned long long)backref->dir,
3590                                 (unsigned long long)backref->index,
3591                                 backref->namelen, backref->name,
3592                                 backref->filetype, backref->errors);
3593                         print_ref_error(backref->errors);
3594                 }
3595                 free_inode_rec(rec);
3596         }
3597         return (error > 0) ? -1 : 0;
3598 }
3599
3600 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3601                                         u64 objectid)
3602 {
3603         struct cache_extent *cache;
3604         struct root_record *rec = NULL;
3605         int ret;
3606
3607         cache = lookup_cache_extent(root_cache, objectid, 1);
3608         if (cache) {
3609                 rec = container_of(cache, struct root_record, cache);
3610         } else {
3611                 rec = calloc(1, sizeof(*rec));
3612                 if (!rec)
3613                         return ERR_PTR(-ENOMEM);
3614                 rec->objectid = objectid;
3615                 INIT_LIST_HEAD(&rec->backrefs);
3616                 rec->cache.start = objectid;
3617                 rec->cache.size = 1;
3618
3619                 ret = insert_cache_extent(root_cache, &rec->cache);
3620                 if (ret)
3621                         return ERR_PTR(-EEXIST);
3622         }
3623         return rec;
3624 }
3625
3626 static struct root_backref *get_root_backref(struct root_record *rec,
3627                                              u64 ref_root, u64 dir, u64 index,
3628                                              const char *name, int namelen)
3629 {
3630         struct root_backref *backref;
3631
3632         list_for_each_entry(backref, &rec->backrefs, list) {
3633                 if (backref->ref_root != ref_root || backref->dir != dir ||
3634                     backref->namelen != namelen)
3635                         continue;
3636                 if (memcmp(name, backref->name, namelen))
3637                         continue;
3638                 return backref;
3639         }
3640
3641         backref = calloc(1, sizeof(*backref) + namelen + 1);
3642         if (!backref)
3643                 return NULL;
3644         backref->ref_root = ref_root;
3645         backref->dir = dir;
3646         backref->index = index;
3647         backref->namelen = namelen;
3648         memcpy(backref->name, name, namelen);
3649         backref->name[namelen] = '\0';
3650         list_add_tail(&backref->list, &rec->backrefs);
3651         return backref;
3652 }
3653
3654 static void free_root_record(struct cache_extent *cache)
3655 {
3656         struct root_record *rec;
3657         struct root_backref *backref;
3658
3659         rec = container_of(cache, struct root_record, cache);
3660         while (!list_empty(&rec->backrefs)) {
3661                 backref = to_root_backref(rec->backrefs.next);
3662                 list_del(&backref->list);
3663                 free(backref);
3664         }
3665
3666         free(rec);
3667 }
3668
3669 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3670
3671 static int add_root_backref(struct cache_tree *root_cache,
3672                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3673                             const char *name, int namelen,
3674                             int item_type, int errors)
3675 {
3676         struct root_record *rec;
3677         struct root_backref *backref;
3678
3679         rec = get_root_rec(root_cache, root_id);
3680         BUG_ON(IS_ERR(rec));
3681         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3682         BUG_ON(!backref);
3683
3684         backref->errors |= errors;
3685
3686         if (item_type != BTRFS_DIR_ITEM_KEY) {
3687                 if (backref->found_dir_index || backref->found_back_ref ||
3688                     backref->found_forward_ref) {
3689                         if (backref->index != index)
3690                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3691                 } else {
3692                         backref->index = index;
3693                 }
3694         }
3695
3696         if (item_type == BTRFS_DIR_ITEM_KEY) {
3697                 if (backref->found_forward_ref)
3698                         rec->found_ref++;
3699                 backref->found_dir_item = 1;
3700         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3701                 backref->found_dir_index = 1;
3702         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3703                 if (backref->found_forward_ref)
3704                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3705                 else if (backref->found_dir_item)
3706                         rec->found_ref++;
3707                 backref->found_forward_ref = 1;
3708         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3709                 if (backref->found_back_ref)
3710                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3711                 backref->found_back_ref = 1;
3712         } else {
3713                 BUG_ON(1);
3714         }
3715
3716         if (backref->found_forward_ref && backref->found_dir_item)
3717                 backref->reachable = 1;
3718         return 0;
3719 }
3720
3721 static int merge_root_recs(struct btrfs_root *root,
3722                            struct cache_tree *src_cache,
3723                            struct cache_tree *dst_cache)
3724 {
3725         struct cache_extent *cache;
3726         struct ptr_node *node;
3727         struct inode_record *rec;
3728         struct inode_backref *backref;
3729         int ret = 0;
3730
3731         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3732                 free_inode_recs_tree(src_cache);
3733                 return 0;
3734         }
3735
3736         while (1) {
3737                 cache = search_cache_extent(src_cache, 0);
3738                 if (!cache)
3739                         break;
3740                 node = container_of(cache, struct ptr_node, cache);
3741                 rec = node->data;
3742                 remove_cache_extent(src_cache, &node->cache);
3743                 free(node);
3744
3745                 ret = is_child_root(root, root->objectid, rec->ino);
3746                 if (ret < 0)
3747                         break;
3748                 else if (ret == 0)
3749                         goto skip;
3750
3751                 list_for_each_entry(backref, &rec->backrefs, list) {
3752                         BUG_ON(backref->found_inode_ref);
3753                         if (backref->found_dir_item)
3754                                 add_root_backref(dst_cache, rec->ino,
3755                                         root->root_key.objectid, backref->dir,
3756                                         backref->index, backref->name,
3757                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3758                                         backref->errors);
3759                         if (backref->found_dir_index)
3760                                 add_root_backref(dst_cache, rec->ino,
3761                                         root->root_key.objectid, backref->dir,
3762                                         backref->index, backref->name,
3763                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3764                                         backref->errors);
3765                 }
3766 skip:
3767                 free_inode_rec(rec);
3768         }
3769         if (ret < 0)
3770                 return ret;
3771         return 0;
3772 }
3773
3774 static int check_root_refs(struct btrfs_root *root,
3775                            struct cache_tree *root_cache)
3776 {
3777         struct root_record *rec;
3778         struct root_record *ref_root;
3779         struct root_backref *backref;
3780         struct cache_extent *cache;
3781         int loop = 1;
3782         int ret;
3783         int error;
3784         int errors = 0;
3785
3786         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3787         BUG_ON(IS_ERR(rec));
3788         rec->found_ref = 1;
3789
3790         /* fixme: this can not detect circular references */
3791         while (loop) {
3792                 loop = 0;
3793                 cache = search_cache_extent(root_cache, 0);
3794                 while (1) {
3795                         if (!cache)
3796                                 break;
3797                         rec = container_of(cache, struct root_record, cache);
3798                         cache = next_cache_extent(cache);
3799
3800                         if (rec->found_ref == 0)
3801                                 continue;
3802
3803                         list_for_each_entry(backref, &rec->backrefs, list) {
3804                                 if (!backref->reachable)
3805                                         continue;
3806
3807                                 ref_root = get_root_rec(root_cache,
3808                                                         backref->ref_root);
3809                                 BUG_ON(IS_ERR(ref_root));
3810                                 if (ref_root->found_ref > 0)
3811                                         continue;
3812
3813                                 backref->reachable = 0;
3814                                 rec->found_ref--;
3815                                 if (rec->found_ref == 0)
3816                                         loop = 1;
3817                         }
3818                 }
3819         }
3820
3821         cache = search_cache_extent(root_cache, 0);
3822         while (1) {
3823                 if (!cache)
3824                         break;
3825                 rec = container_of(cache, struct root_record, cache);
3826                 cache = next_cache_extent(cache);
3827
3828                 if (rec->found_ref == 0 &&
3829                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3830                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3831                         ret = check_orphan_item(root->fs_info->tree_root,
3832                                                 rec->objectid);
3833                         if (ret == 0)
3834                                 continue;
3835
3836                         /*
3837                          * If we don't have a root item then we likely just have
3838                          * a dir item in a snapshot for this root but no actual
3839                          * ref key or anything so it's meaningless.
3840                          */
3841                         if (!rec->found_root_item)
3842                                 continue;
3843                         errors++;
3844                         fprintf(stderr, "fs tree %llu not referenced\n",
3845                                 (unsigned long long)rec->objectid);
3846                 }
3847
3848                 error = 0;
3849                 if (rec->found_ref > 0 && !rec->found_root_item)
3850                         error = 1;
3851                 list_for_each_entry(backref, &rec->backrefs, list) {
3852                         if (!backref->found_dir_item)
3853                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3854                         if (!backref->found_dir_index)
3855                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3856                         if (!backref->found_back_ref)
3857                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3858                         if (!backref->found_forward_ref)
3859                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3860                         if (backref->reachable && backref->errors)
3861                                 error = 1;
3862                 }
3863                 if (!error)
3864                         continue;
3865
3866                 errors++;
3867                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3868                         (unsigned long long)rec->objectid, rec->found_ref,
3869                          rec->found_root_item ? "" : "not found");
3870
3871                 list_for_each_entry(backref, &rec->backrefs, list) {
3872                         if (!backref->reachable)
3873                                 continue;
3874                         if (!backref->errors && rec->found_root_item)
3875                                 continue;
3876                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3877                                 " index %llu namelen %u name %s errors %x\n",
3878                                 (unsigned long long)backref->ref_root,
3879                                 (unsigned long long)backref->dir,
3880                                 (unsigned long long)backref->index,
3881                                 backref->namelen, backref->name,
3882                                 backref->errors);
3883                         print_ref_error(backref->errors);
3884                 }
3885         }
3886         return errors > 0 ? 1 : 0;
3887 }
3888
3889 static int process_root_ref(struct extent_buffer *eb, int slot,
3890                             struct btrfs_key *key,
3891                             struct cache_tree *root_cache)
3892 {
3893         u64 dirid;
3894         u64 index;
3895         u32 len;
3896         u32 name_len;
3897         struct btrfs_root_ref *ref;
3898         char namebuf[BTRFS_NAME_LEN];
3899         int error;
3900
3901         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3902
3903         dirid = btrfs_root_ref_dirid(eb, ref);
3904         index = btrfs_root_ref_sequence(eb, ref);
3905         name_len = btrfs_root_ref_name_len(eb, ref);
3906
3907         if (name_len <= BTRFS_NAME_LEN) {
3908                 len = name_len;
3909                 error = 0;
3910         } else {
3911                 len = BTRFS_NAME_LEN;
3912                 error = REF_ERR_NAME_TOO_LONG;
3913         }
3914         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3915
3916         if (key->type == BTRFS_ROOT_REF_KEY) {
3917                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3918                                  index, namebuf, len, key->type, error);
3919         } else {
3920                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3921                                  index, namebuf, len, key->type, error);
3922         }
3923         return 0;
3924 }
3925
3926 static void free_corrupt_block(struct cache_extent *cache)
3927 {
3928         struct btrfs_corrupt_block *corrupt;
3929
3930         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3931         free(corrupt);
3932 }
3933
3934 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3935
3936 /*
3937  * Repair the btree of the given root.
3938  *
3939  * The fix is to remove the node key in corrupt_blocks cache_tree.
3940  * and rebalance the tree.
3941  * After the fix, the btree should be writeable.
3942  */
3943 static int repair_btree(struct btrfs_root *root,
3944                         struct cache_tree *corrupt_blocks)
3945 {
3946         struct btrfs_trans_handle *trans;
3947         struct btrfs_path path;
3948         struct btrfs_corrupt_block *corrupt;
3949         struct cache_extent *cache;
3950         struct btrfs_key key;
3951         u64 offset;
3952         int level;
3953         int ret = 0;
3954
3955         if (cache_tree_empty(corrupt_blocks))
3956                 return 0;
3957
3958         trans = btrfs_start_transaction(root, 1);
3959         if (IS_ERR(trans)) {
3960                 ret = PTR_ERR(trans);
3961                 fprintf(stderr, "Error starting transaction: %s\n",
3962                         strerror(-ret));
3963                 return ret;
3964         }
3965         btrfs_init_path(&path);
3966         cache = first_cache_extent(corrupt_blocks);
3967         while (cache) {
3968                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3969                                        cache);
3970                 level = corrupt->level;
3971                 path.lowest_level = level;
3972                 key.objectid = corrupt->key.objectid;
3973                 key.type = corrupt->key.type;
3974                 key.offset = corrupt->key.offset;
3975
3976                 /*
3977                  * Here we don't want to do any tree balance, since it may
3978                  * cause a balance with corrupted brother leaf/node,
3979                  * so ins_len set to 0 here.
3980                  * Balance will be done after all corrupt node/leaf is deleted.
3981                  */
3982                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3983                 if (ret < 0)
3984                         goto out;
3985                 offset = btrfs_node_blockptr(path.nodes[level],
3986                                              path.slots[level]);
3987
3988                 /* Remove the ptr */
3989                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3990                 if (ret < 0)
3991                         goto out;
3992                 /*
3993                  * Remove the corresponding extent
3994                  * return value is not concerned.
3995                  */
3996                 btrfs_release_path(&path);
3997                 ret = btrfs_free_extent(trans, root, offset,
3998                                 root->fs_info->nodesize, 0,
3999                                 root->root_key.objectid, level - 1, 0);
4000                 cache = next_cache_extent(cache);
4001         }
4002
4003         /* Balance the btree using btrfs_search_slot() */
4004         cache = first_cache_extent(corrupt_blocks);
4005         while (cache) {
4006                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4007                                        cache);
4008                 memcpy(&key, &corrupt->key, sizeof(key));
4009                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4010                 if (ret < 0)
4011                         goto out;
4012                 /* return will always >0 since it won't find the item */
4013                 ret = 0;
4014                 btrfs_release_path(&path);
4015                 cache = next_cache_extent(cache);
4016         }
4017 out:
4018         btrfs_commit_transaction(trans, root);
4019         btrfs_release_path(&path);
4020         return ret;
4021 }
4022
4023 static int check_fs_root(struct btrfs_root *root,
4024                          struct cache_tree *root_cache,
4025                          struct walk_control *wc)
4026 {
4027         int ret = 0;
4028         int err = 0;
4029         int wret;
4030         int level;
4031         struct btrfs_path path;
4032         struct shared_node root_node;
4033         struct root_record *rec;
4034         struct btrfs_root_item *root_item = &root->root_item;
4035         struct cache_tree corrupt_blocks;
4036         struct orphan_data_extent *orphan;
4037         struct orphan_data_extent *tmp;
4038         enum btrfs_tree_block_status status;
4039         struct node_refs nrefs;
4040
4041         /*
4042          * Reuse the corrupt_block cache tree to record corrupted tree block
4043          *
4044          * Unlike the usage in extent tree check, here we do it in a per
4045          * fs/subvol tree base.
4046          */
4047         cache_tree_init(&corrupt_blocks);
4048         root->fs_info->corrupt_blocks = &corrupt_blocks;
4049
4050         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4051                 rec = get_root_rec(root_cache, root->root_key.objectid);
4052                 BUG_ON(IS_ERR(rec));
4053                 if (btrfs_root_refs(root_item) > 0)
4054                         rec->found_root_item = 1;
4055         }
4056
4057         btrfs_init_path(&path);
4058         memset(&root_node, 0, sizeof(root_node));
4059         cache_tree_init(&root_node.root_cache);
4060         cache_tree_init(&root_node.inode_cache);
4061         memset(&nrefs, 0, sizeof(nrefs));
4062
4063         /* Move the orphan extent record to corresponding inode_record */
4064         list_for_each_entry_safe(orphan, tmp,
4065                                  &root->orphan_data_extents, list) {
4066                 struct inode_record *inode;
4067
4068                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4069                                       1);
4070                 BUG_ON(IS_ERR(inode));
4071                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4072                 list_move(&orphan->list, &inode->orphan_extents);
4073         }
4074
4075         level = btrfs_header_level(root->node);
4076         memset(wc->nodes, 0, sizeof(wc->nodes));
4077         wc->nodes[level] = &root_node;
4078         wc->active_node = level;
4079         wc->root_level = level;
4080
4081         /* We may not have checked the root block, lets do that now */
4082         if (btrfs_is_leaf(root->node))
4083                 status = btrfs_check_leaf(root, NULL, root->node);
4084         else
4085                 status = btrfs_check_node(root, NULL, root->node);
4086         if (status != BTRFS_TREE_BLOCK_CLEAN)
4087                 return -EIO;
4088
4089         if (btrfs_root_refs(root_item) > 0 ||
4090             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4091                 path.nodes[level] = root->node;
4092                 extent_buffer_get(root->node);
4093                 path.slots[level] = 0;
4094         } else {
4095                 struct btrfs_key key;
4096                 struct btrfs_disk_key found_key;
4097
4098                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4099                 level = root_item->drop_level;
4100                 path.lowest_level = level;
4101                 if (level > btrfs_header_level(root->node) ||
4102                     level >= BTRFS_MAX_LEVEL) {
4103                         error("ignoring invalid drop level: %u", level);
4104                         goto skip_walking;
4105                 }
4106                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4107                 if (wret < 0)
4108                         goto skip_walking;
4109                 btrfs_node_key(path.nodes[level], &found_key,
4110                                 path.slots[level]);
4111                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4112                                         sizeof(found_key)));
4113         }
4114
4115         while (1) {
4116                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4117                 if (wret < 0)
4118                         ret = wret;
4119                 if (wret != 0)
4120                         break;
4121
4122                 wret = walk_up_tree(root, &path, wc, &level);
4123                 if (wret < 0)
4124                         ret = wret;
4125                 if (wret != 0)
4126                         break;
4127         }
4128 skip_walking:
4129         btrfs_release_path(&path);
4130
4131         if (!cache_tree_empty(&corrupt_blocks)) {
4132                 struct cache_extent *cache;
4133                 struct btrfs_corrupt_block *corrupt;
4134
4135                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4136                        root->root_key.objectid);
4137                 cache = first_cache_extent(&corrupt_blocks);
4138                 while (cache) {
4139                         corrupt = container_of(cache,
4140                                                struct btrfs_corrupt_block,
4141                                                cache);
4142                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4143                                cache->start, corrupt->level,
4144                                corrupt->key.objectid, corrupt->key.type,
4145                                corrupt->key.offset);
4146                         cache = next_cache_extent(cache);
4147                 }
4148                 if (repair) {
4149                         printf("Try to repair the btree for root %llu\n",
4150                                root->root_key.objectid);
4151                         ret = repair_btree(root, &corrupt_blocks);
4152                         if (ret < 0)
4153                                 fprintf(stderr, "Failed to repair btree: %s\n",
4154                                         strerror(-ret));
4155                         if (!ret)
4156                                 printf("Btree for root %llu is fixed\n",
4157                                        root->root_key.objectid);
4158                 }
4159         }
4160
4161         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4162         if (err < 0)
4163                 ret = err;
4164
4165         if (root_node.current) {
4166                 root_node.current->checked = 1;
4167                 maybe_free_inode_rec(&root_node.inode_cache,
4168                                 root_node.current);
4169         }
4170
4171         err = check_inode_recs(root, &root_node.inode_cache);
4172         if (!ret)
4173                 ret = err;
4174
4175         free_corrupt_blocks_tree(&corrupt_blocks);
4176         root->fs_info->corrupt_blocks = NULL;
4177         free_orphan_data_extents(&root->orphan_data_extents);
4178         return ret;
4179 }
4180
4181 static int fs_root_objectid(u64 objectid)
4182 {
4183         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4184             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4185                 return 1;
4186         return is_fstree(objectid);
4187 }
4188
4189 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4190                           struct cache_tree *root_cache)
4191 {
4192         struct btrfs_path path;
4193         struct btrfs_key key;
4194         struct walk_control wc;
4195         struct extent_buffer *leaf, *tree_node;
4196         struct btrfs_root *tmp_root;
4197         struct btrfs_root *tree_root = fs_info->tree_root;
4198         int ret;
4199         int err = 0;
4200
4201         if (ctx.progress_enabled) {
4202                 ctx.tp = TASK_FS_ROOTS;
4203                 task_start(ctx.info);
4204         }
4205
4206         /*
4207          * Just in case we made any changes to the extent tree that weren't
4208          * reflected into the free space cache yet.
4209          */
4210         if (repair)
4211                 reset_cached_block_groups(fs_info);
4212         memset(&wc, 0, sizeof(wc));
4213         cache_tree_init(&wc.shared);
4214         btrfs_init_path(&path);
4215
4216 again:
4217         key.offset = 0;
4218         key.objectid = 0;
4219         key.type = BTRFS_ROOT_ITEM_KEY;
4220         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4221         if (ret < 0) {
4222                 err = 1;
4223                 goto out;
4224         }
4225         tree_node = tree_root->node;
4226         while (1) {
4227                 if (tree_node != tree_root->node) {
4228                         free_root_recs_tree(root_cache);
4229                         btrfs_release_path(&path);
4230                         goto again;
4231                 }
4232                 leaf = path.nodes[0];
4233                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4234                         ret = btrfs_next_leaf(tree_root, &path);
4235                         if (ret) {
4236                                 if (ret < 0)
4237                                         err = 1;
4238                                 break;
4239                         }
4240                         leaf = path.nodes[0];
4241                 }
4242                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4243                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4244                     fs_root_objectid(key.objectid)) {
4245                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4246                                 tmp_root = btrfs_read_fs_root_no_cache(
4247                                                 fs_info, &key);
4248                         } else {
4249                                 key.offset = (u64)-1;
4250                                 tmp_root = btrfs_read_fs_root(
4251                                                 fs_info, &key);
4252                         }
4253                         if (IS_ERR(tmp_root)) {
4254                                 err = 1;
4255                                 goto next;
4256                         }
4257                         ret = check_fs_root(tmp_root, root_cache, &wc);
4258                         if (ret == -EAGAIN) {
4259                                 free_root_recs_tree(root_cache);
4260                                 btrfs_release_path(&path);
4261                                 goto again;
4262                         }
4263                         if (ret)
4264                                 err = 1;
4265                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4266                                 btrfs_free_fs_root(tmp_root);
4267                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4268                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4269                         process_root_ref(leaf, path.slots[0], &key,
4270                                          root_cache);
4271                 }
4272 next:
4273                 path.slots[0]++;
4274         }
4275 out:
4276         btrfs_release_path(&path);
4277         if (err)
4278                 free_extent_cache_tree(&wc.shared);
4279         if (!cache_tree_empty(&wc.shared))
4280                 fprintf(stderr, "warning line %d\n", __LINE__);
4281
4282         task_stop(ctx.info);
4283
4284         return err;
4285 }
4286
4287 /*
4288  * Find the @index according by @ino and name.
4289  * Notice:time efficiency is O(N)
4290  *
4291  * @root:       the root of the fs/file tree
4292  * @index_ret:  the index as return value
4293  * @namebuf:    the name to match
4294  * @name_len:   the length of name to match
4295  * @file_type:  the file_type of INODE_ITEM to match
4296  *
4297  * Returns 0 if found and *@index_ret will be modified with right value
4298  * Returns< 0 not found and *@index_ret will be (u64)-1
4299  */
4300 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4301                           u64 *index_ret, char *namebuf, u32 name_len,
4302                           u8 file_type)
4303 {
4304         struct btrfs_path path;
4305         struct extent_buffer *node;
4306         struct btrfs_dir_item *di;
4307         struct btrfs_key key;
4308         struct btrfs_key location;
4309         char name[BTRFS_NAME_LEN] = {0};
4310
4311         u32 total;
4312         u32 cur = 0;
4313         u32 len;
4314         u32 data_len;
4315         u8 filetype;
4316         int slot;
4317         int ret;
4318
4319         ASSERT(index_ret);
4320
4321         /* search from the last index */
4322         key.objectid = dirid;
4323         key.offset = (u64)-1;
4324         key.type = BTRFS_DIR_INDEX_KEY;
4325
4326         btrfs_init_path(&path);
4327         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4328         if (ret < 0)
4329                 return ret;
4330
4331 loop:
4332         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4333         if (ret) {
4334                 ret = -ENOENT;
4335                 *index_ret = (64)-1;
4336                 goto out;
4337         }
4338         /* Check whether inode_id/filetype/name match */
4339         node = path.nodes[0];
4340         slot = path.slots[0];
4341         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4342         total = btrfs_item_size_nr(node, slot);
4343         while (cur < total) {
4344                 ret = -ENOENT;
4345                 len = btrfs_dir_name_len(node, di);
4346                 data_len = btrfs_dir_data_len(node, di);
4347
4348                 btrfs_dir_item_key_to_cpu(node, di, &location);
4349                 if (location.objectid != location_id ||
4350                     location.type != BTRFS_INODE_ITEM_KEY ||
4351                     location.offset != 0)
4352                         goto next;
4353
4354                 filetype = btrfs_dir_type(node, di);
4355                 if (file_type != filetype)
4356                         goto next;
4357
4358                 if (len > BTRFS_NAME_LEN)
4359                         len = BTRFS_NAME_LEN;
4360
4361                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4362                 if (len != name_len || strncmp(namebuf, name, len))
4363                         goto next;
4364
4365                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4366                 *index_ret = key.offset;
4367                 ret = 0;
4368                 goto out;
4369 next:
4370                 len += sizeof(*di) + data_len;
4371                 di = (struct btrfs_dir_item *)((char *)di + len);
4372                 cur += len;
4373         }
4374         goto loop;
4375
4376 out:
4377         btrfs_release_path(&path);
4378         return ret;
4379 }
4380
4381 /*
4382  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4383  * INODE_REF/INODE_EXTREF match.
4384  *
4385  * @root:       the root of the fs/file tree
4386  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4387  *              value while find index
4388  * @location_key: location key of the struct btrfs_dir_item to match
4389  * @name:       the name to match
4390  * @namelen:    the length of name
4391  * @file_type:  the type of file to math
4392  *
4393  * Return 0 if no error occurred.
4394  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4395  * DIR_ITEM/DIR_INDEX
4396  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4397  * and DIR_ITEM/DIR_INDEX mismatch
4398  */
4399 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4400                          struct btrfs_key *location_key, char *name,
4401                          u32 namelen, u8 file_type)
4402 {
4403         struct btrfs_path path;
4404         struct extent_buffer *node;
4405         struct btrfs_dir_item *di;
4406         struct btrfs_key location;
4407         char namebuf[BTRFS_NAME_LEN] = {0};
4408         u32 total;
4409         u32 cur = 0;
4410         u32 len;
4411         u32 data_len;
4412         u8 filetype;
4413         int slot;
4414         int ret;
4415
4416         /* get the index by traversing all index */
4417         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4418                 ret = find_dir_index(root, key->objectid,
4419                                      location_key->objectid, &key->offset,
4420                                      name, namelen, file_type);
4421                 if (ret)
4422                         ret = DIR_INDEX_MISSING;
4423                 return ret;
4424         }
4425
4426         btrfs_init_path(&path);
4427         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4428         if (ret) {
4429                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4430                         DIR_INDEX_MISSING;
4431                 goto out;
4432         }
4433
4434         /* Check whether inode_id/filetype/name match */
4435         node = path.nodes[0];
4436         slot = path.slots[0];
4437         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4438         total = btrfs_item_size_nr(node, slot);
4439         while (cur < total) {
4440                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4441                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4442
4443                 len = btrfs_dir_name_len(node, di);
4444                 data_len = btrfs_dir_data_len(node, di);
4445
4446                 btrfs_dir_item_key_to_cpu(node, di, &location);
4447                 if (location.objectid != location_key->objectid ||
4448                     location.type != location_key->type ||
4449                     location.offset != location_key->offset)
4450                         goto next;
4451
4452                 filetype = btrfs_dir_type(node, di);
4453                 if (file_type != filetype)
4454                         goto next;
4455
4456                 if (len > BTRFS_NAME_LEN) {
4457                         len = BTRFS_NAME_LEN;
4458                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4459                         root->objectid,
4460                         key->type == BTRFS_DIR_ITEM_KEY ?
4461                         "DIR_ITEM" : "DIR_INDEX",
4462                         key->objectid, key->offset, len);
4463                 }
4464                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4465                                    len);
4466                 if (len != namelen || strncmp(namebuf, name, len))
4467                         goto next;
4468
4469                 ret = 0;
4470                 goto out;
4471 next:
4472                 len += sizeof(*di) + data_len;
4473                 di = (struct btrfs_dir_item *)((char *)di + len);
4474                 cur += len;
4475         }
4476
4477 out:
4478         btrfs_release_path(&path);
4479         return ret;
4480 }
4481
4482 /*
4483  * Prints inode ref error message
4484  */
4485 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4486                                 u64 index, const char *namebuf, int name_len,
4487                                 u8 filetype, int err)
4488 {
4489         if (!err)
4490                 return;
4491
4492         /* root dir error */
4493         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4494                 error(
4495         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4496                       root->objectid, key->objectid, key->offset, namebuf);
4497                 return;
4498         }
4499
4500         /* normal error */
4501         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4502                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4503                       root->objectid, key->offset,
4504                       btrfs_name_hash(namebuf, name_len),
4505                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4506                       namebuf, filetype);
4507         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4508                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4509                       root->objectid, key->offset, index,
4510                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4511                       namebuf, filetype);
4512 }
4513
4514 /*
4515  * Insert the missing inode item.
4516  *
4517  * Returns 0 means success.
4518  * Returns <0 means error.
4519  */
4520 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4521                                      u8 filetype)
4522 {
4523         struct btrfs_key key;
4524         struct btrfs_trans_handle *trans;
4525         struct btrfs_path path;
4526         int ret;
4527
4528         key.objectid = ino;
4529         key.type = BTRFS_INODE_ITEM_KEY;
4530         key.offset = 0;
4531
4532         btrfs_init_path(&path);
4533         trans = btrfs_start_transaction(root, 1);
4534         if (IS_ERR(trans)) {
4535                 ret = -EIO;
4536                 goto out;
4537         }
4538
4539         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4540         if (ret < 0 || !ret)
4541                 goto fail;
4542
4543         /* insert inode item */
4544         create_inode_item_lowmem(trans, root, ino, filetype);
4545         ret = 0;
4546 fail:
4547         btrfs_commit_transaction(trans, root);
4548 out:
4549         if (ret)
4550                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4551                       root->objectid, ino);
4552         btrfs_release_path(&path);
4553         return ret;
4554 }
4555
4556 /*
4557  * The ternary means dir item, dir index and relative inode ref.
4558  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4559  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4560  * strategy:
4561  * If two of three is missing or mismatched, delete the existing one.
4562  * If one of three is missing or mismatched, add the missing one.
4563  *
4564  * returns 0 means success.
4565  * returns not 0 means on error;
4566  */
4567 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4568                           u64 index, char *name, int name_len, u8 filetype,
4569                           int err)
4570 {
4571         struct btrfs_trans_handle *trans;
4572         int stage = 0;
4573         int ret = 0;
4574
4575         /*
4576          * stage shall be one of following valild values:
4577          *      0: Fine, nothing to do.
4578          *      1: One of three is wrong, so add missing one.
4579          *      2: Two of three is wrong, so delete existed one.
4580          */
4581         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4582                 stage++;
4583         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4584                 stage++;
4585         if (err & (INODE_REF_MISSING))
4586                 stage++;
4587
4588         /* stage must be smllarer than 3 */
4589         ASSERT(stage < 3);
4590
4591         trans = btrfs_start_transaction(root, 1);
4592         if (stage == 2) {
4593                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4594                                    name_len, 0);
4595                 goto out;
4596         }
4597         if (stage == 1) {
4598                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4599                                filetype, &index, 1, 1);
4600                 goto out;
4601         }
4602 out:
4603         btrfs_commit_transaction(trans, root);
4604
4605         if (ret)
4606                 error("fail to repair inode %llu name %s filetype %u",
4607                       ino, name, filetype);
4608         else
4609                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4610                        stage == 2 ? "Delete" : "Add",
4611                        ino, name, filetype);
4612
4613         return ret;
4614 }
4615
4616 /*
4617  * Traverse the given INODE_REF and call find_dir_item() to find related
4618  * DIR_ITEM/DIR_INDEX.
4619  *
4620  * @root:       the root of the fs/file tree
4621  * @ref_key:    the key of the INODE_REF
4622  * @path        the path provides node and slot
4623  * @refs:       the count of INODE_REF
4624  * @mode:       the st_mode of INODE_ITEM
4625  * @name_ret:   returns with the first ref's name
4626  * @name_len_ret:    len of the name_ret
4627  *
4628  * Return 0 if no error occurred.
4629  */
4630 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4631                            struct btrfs_path *path, char *name_ret,
4632                            u32 *namelen_ret, u64 *refs_ret, int mode)
4633 {
4634         struct btrfs_key key;
4635         struct btrfs_key location;
4636         struct btrfs_inode_ref *ref;
4637         struct extent_buffer *node;
4638         char namebuf[BTRFS_NAME_LEN] = {0};
4639         u32 total;
4640         u32 cur = 0;
4641         u32 len;
4642         u32 name_len;
4643         u64 index;
4644         int ret;
4645         int err = 0;
4646         int tmp_err;
4647         int slot;
4648         int need_research = 0;
4649         u64 refs;
4650
4651 begin:
4652         err = 0;
4653         cur = 0;
4654         refs = *refs_ret;
4655
4656         /* since after repair, path and the dir item may be changed */
4657         if (need_research) {
4658                 need_research = 0;
4659                 btrfs_release_path(path);
4660                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4661                 /* the item was deleted, let path point to the last checked item */
4662                 if (ret > 0) {
4663                         if (path->slots[0] == 0)
4664                                 btrfs_prev_leaf(root, path);
4665                         else
4666                                 path->slots[0]--;
4667                 }
4668                 if (ret)
4669                         goto out;
4670         }
4671
4672         location.objectid = ref_key->objectid;
4673         location.type = BTRFS_INODE_ITEM_KEY;
4674         location.offset = 0;
4675         node = path->nodes[0];
4676         slot = path->slots[0];
4677
4678         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4679         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4680         total = btrfs_item_size_nr(node, slot);
4681
4682 next:
4683         /* Update inode ref count */
4684         refs++;
4685         tmp_err = 0;
4686         index = btrfs_inode_ref_index(node, ref);
4687         name_len = btrfs_inode_ref_name_len(node, ref);
4688
4689         if (name_len <= BTRFS_NAME_LEN) {
4690                 len = name_len;
4691         } else {
4692                 len = BTRFS_NAME_LEN;
4693                 warning("root %llu INODE_REF[%llu %llu] name too long",
4694                         root->objectid, ref_key->objectid, ref_key->offset);
4695         }
4696
4697         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4698
4699         /* copy the first name found to name_ret */
4700         if (refs == 1 && name_ret) {
4701                 memcpy(name_ret, namebuf, len);
4702                 *namelen_ret = len;
4703         }
4704
4705         /* Check root dir ref */
4706         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4707                 if (index != 0 || len != strlen("..") ||
4708                     strncmp("..", namebuf, len) ||
4709                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4710                         /* set err bits then repair will delete the ref */
4711                         err |= DIR_INDEX_MISSING;
4712                         err |= DIR_ITEM_MISSING;
4713                 }
4714                 goto end;
4715         }
4716
4717         /* Find related DIR_INDEX */
4718         key.objectid = ref_key->offset;
4719         key.type = BTRFS_DIR_INDEX_KEY;
4720         key.offset = index;
4721         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4722                             imode_to_type(mode));
4723
4724         /* Find related dir_item */
4725         key.objectid = ref_key->offset;
4726         key.type = BTRFS_DIR_ITEM_KEY;
4727         key.offset = btrfs_name_hash(namebuf, len);
4728         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4729                             imode_to_type(mode));
4730 end:
4731         if (tmp_err && repair) {
4732                 ret = repair_ternary_lowmem(root, ref_key->offset,
4733                                             ref_key->objectid, index, namebuf,
4734                                             name_len, imode_to_type(mode),
4735                                             tmp_err);
4736                 if (!ret) {
4737                         need_research = 1;
4738                         goto begin;
4739                 }
4740         }
4741         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4742                             imode_to_type(mode), tmp_err);
4743         err |= tmp_err;
4744         len = sizeof(*ref) + name_len;
4745         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4746         cur += len;
4747         if (cur < total)
4748                 goto next;
4749
4750 out:
4751         *refs_ret = refs;
4752         return err;
4753 }
4754
4755 /*
4756  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4757  * DIR_ITEM/DIR_INDEX.
4758  *
4759  * @root:       the root of the fs/file tree
4760  * @ref_key:    the key of the INODE_EXTREF
4761  * @refs:       the count of INODE_EXTREF
4762  * @mode:       the st_mode of INODE_ITEM
4763  *
4764  * Return 0 if no error occurred.
4765  */
4766 static int check_inode_extref(struct btrfs_root *root,
4767                               struct btrfs_key *ref_key,
4768                               struct extent_buffer *node, int slot, u64 *refs,
4769                               int mode)
4770 {
4771         struct btrfs_key key;
4772         struct btrfs_key location;
4773         struct btrfs_inode_extref *extref;
4774         char namebuf[BTRFS_NAME_LEN] = {0};
4775         u32 total;
4776         u32 cur = 0;
4777         u32 len;
4778         u32 name_len;
4779         u64 index;
4780         u64 parent;
4781         int ret;
4782         int err = 0;
4783
4784         location.objectid = ref_key->objectid;
4785         location.type = BTRFS_INODE_ITEM_KEY;
4786         location.offset = 0;
4787
4788         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4789         total = btrfs_item_size_nr(node, slot);
4790
4791 next:
4792         /* update inode ref count */
4793         (*refs)++;
4794         name_len = btrfs_inode_extref_name_len(node, extref);
4795         index = btrfs_inode_extref_index(node, extref);
4796         parent = btrfs_inode_extref_parent(node, extref);
4797         if (name_len <= BTRFS_NAME_LEN) {
4798                 len = name_len;
4799         } else {
4800                 len = BTRFS_NAME_LEN;
4801                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4802                         root->objectid, ref_key->objectid, ref_key->offset);
4803         }
4804         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4805
4806         /* Check root dir ref name */
4807         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4808                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4809                       root->objectid, ref_key->objectid, ref_key->offset,
4810                       namebuf);
4811                 err |= ROOT_DIR_ERROR;
4812         }
4813
4814         /* find related dir_index */
4815         key.objectid = parent;
4816         key.type = BTRFS_DIR_INDEX_KEY;
4817         key.offset = index;
4818         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4819         err |= ret;
4820
4821         /* find related dir_item */
4822         key.objectid = parent;
4823         key.type = BTRFS_DIR_ITEM_KEY;
4824         key.offset = btrfs_name_hash(namebuf, len);
4825         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4826         err |= ret;
4827
4828         len = sizeof(*extref) + name_len;
4829         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4830         cur += len;
4831
4832         if (cur < total)
4833                 goto next;
4834
4835         return err;
4836 }
4837
4838 /*
4839  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4840  * DIR_ITEM/DIR_INDEX match.
4841  * Return with @index_ret.
4842  *
4843  * @root:       the root of the fs/file tree
4844  * @key:        the key of the INODE_REF/INODE_EXTREF
4845  * @name:       the name in the INODE_REF/INODE_EXTREF
4846  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4847  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
4848  *              value (64)-1 means do not check index
4849  * @ext_ref:    the EXTENDED_IREF feature
4850  *
4851  * Return 0 if no error occurred.
4852  * Return >0 for error bitmap
4853  */
4854 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4855                           char *name, int namelen, u64 *index_ret,
4856                           unsigned int ext_ref)
4857 {
4858         struct btrfs_path path;
4859         struct btrfs_inode_ref *ref;
4860         struct btrfs_inode_extref *extref;
4861         struct extent_buffer *node;
4862         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4863         u32 total;
4864         u32 cur = 0;
4865         u32 len;
4866         u32 ref_namelen;
4867         u64 ref_index;
4868         u64 parent;
4869         u64 dir_id;
4870         int slot;
4871         int ret;
4872
4873         ASSERT(index_ret);
4874
4875         btrfs_init_path(&path);
4876         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4877         if (ret) {
4878                 ret = INODE_REF_MISSING;
4879                 goto extref;
4880         }
4881
4882         node = path.nodes[0];
4883         slot = path.slots[0];
4884
4885         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4886         total = btrfs_item_size_nr(node, slot);
4887
4888         /* Iterate all entry of INODE_REF */
4889         while (cur < total) {
4890                 ret = INODE_REF_MISSING;
4891
4892                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4893                 ref_index = btrfs_inode_ref_index(node, ref);
4894                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4895                         goto next_ref;
4896
4897                 if (cur + sizeof(*ref) + ref_namelen > total ||
4898                     ref_namelen > BTRFS_NAME_LEN) {
4899                         warning("root %llu INODE %s[%llu %llu] name too long",
4900                                 root->objectid,
4901                                 key->type == BTRFS_INODE_REF_KEY ?
4902                                         "REF" : "EXTREF",
4903                                 key->objectid, key->offset);
4904
4905                         if (cur + sizeof(*ref) > total)
4906                                 break;
4907                         len = min_t(u32, total - cur - sizeof(*ref),
4908                                     BTRFS_NAME_LEN);
4909                 } else {
4910                         len = ref_namelen;
4911                 }
4912
4913                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4914                                    len);
4915
4916                 if (len != namelen || strncmp(ref_namebuf, name, len))
4917                         goto next_ref;
4918
4919                 *index_ret = ref_index;
4920                 ret = 0;
4921                 goto out;
4922 next_ref:
4923                 len = sizeof(*ref) + ref_namelen;
4924                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4925                 cur += len;
4926         }
4927
4928 extref:
4929         /* Skip if not support EXTENDED_IREF feature */
4930         if (!ext_ref)
4931                 goto out;
4932
4933         btrfs_release_path(&path);
4934         btrfs_init_path(&path);
4935
4936         dir_id = key->offset;
4937         key->type = BTRFS_INODE_EXTREF_KEY;
4938         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4939
4940         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4941         if (ret) {
4942                 ret = INODE_REF_MISSING;
4943                 goto out;
4944         }
4945
4946         node = path.nodes[0];
4947         slot = path.slots[0];
4948
4949         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4950         cur = 0;
4951         total = btrfs_item_size_nr(node, slot);
4952
4953         /* Iterate all entry of INODE_EXTREF */
4954         while (cur < total) {
4955                 ret = INODE_REF_MISSING;
4956
4957                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4958                 ref_index = btrfs_inode_extref_index(node, extref);
4959                 parent = btrfs_inode_extref_parent(node, extref);
4960                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4961                         goto next_extref;
4962
4963                 if (parent != dir_id)
4964                         goto next_extref;
4965
4966                 if (ref_namelen <= BTRFS_NAME_LEN) {
4967                         len = ref_namelen;
4968                 } else {
4969                         len = BTRFS_NAME_LEN;
4970                         warning("root %llu INODE %s[%llu %llu] name too long",
4971                                 root->objectid,
4972                                 key->type == BTRFS_INODE_REF_KEY ?
4973                                         "REF" : "EXTREF",
4974                                 key->objectid, key->offset);
4975                 }
4976                 read_extent_buffer(node, ref_namebuf,
4977                                    (unsigned long)(extref + 1), len);
4978
4979                 if (len != namelen || strncmp(ref_namebuf, name, len))
4980                         goto next_extref;
4981
4982                 *index_ret = ref_index;
4983                 ret = 0;
4984                 goto out;
4985
4986 next_extref:
4987                 len = sizeof(*extref) + ref_namelen;
4988                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4989                 cur += len;
4990
4991         }
4992 out:
4993         btrfs_release_path(&path);
4994         return ret;
4995 }
4996
4997 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4998                                u64 ino, u64 index, const char *namebuf,
4999                                int name_len, u8 filetype, int err)
5000 {
5001         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5002                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5003                       root->objectid, key->objectid, key->offset, namebuf,
5004                       filetype,
5005                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5006         }
5007
5008         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5009                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5010                       root->objectid, key->objectid, index, namebuf, filetype,
5011                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5012         }
5013
5014         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5015                 error(
5016                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5017                       root->objectid, ino, index, namebuf, filetype,
5018                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5019         }
5020
5021         if (err & INODE_REF_MISSING)
5022                 error(
5023                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5024                       root->objectid, ino, key->objectid, namebuf, filetype);
5025
5026 }
5027
5028 /*
5029  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5030  *
5031  * Returns error after repair
5032  */
5033 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5034                            u64 index, u8 filetype, char *namebuf, u32 name_len,
5035                            int err)
5036 {
5037         int ret;
5038
5039         if (err & INODE_ITEM_MISSING) {
5040                 ret = repair_inode_item_missing(root, ino, filetype);
5041                 if (!ret)
5042                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5043         }
5044
5045         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5046                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5047                                             name_len, filetype, err);
5048                 if (!ret) {
5049                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5050                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5051                         err &= ~(INODE_REF_MISSING);
5052                 }
5053         }
5054         return err;
5055 }
5056
5057 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5058                 u64 *size_ret)
5059 {
5060         struct btrfs_key key;
5061         struct btrfs_path path;
5062         u32 len;
5063         struct btrfs_dir_item *di;
5064         int ret;
5065         int cur = 0;
5066         int total = 0;
5067
5068         ASSERT(size_ret);
5069         *size_ret = 0;
5070
5071         key.objectid = ino;
5072         key.type = type;
5073         key.offset = (u64)-1;
5074
5075         btrfs_init_path(&path);
5076         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5077         if (ret < 0) {
5078                 ret = -EIO;
5079                 goto out;
5080         }
5081         /* if found, go to spacial case */
5082         if (ret == 0)
5083                 goto special_case;
5084
5085 loop:
5086         ret = btrfs_previous_item(root, &path, ino, type);
5087
5088         if (ret) {
5089                 ret = 0;
5090                 goto out;
5091         }
5092
5093 special_case:
5094         di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5095         cur = 0;
5096         total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5097
5098         while (cur < total) {
5099                 len = btrfs_dir_name_len(path.nodes[0], di);
5100                 if (len > BTRFS_NAME_LEN)
5101                         len = BTRFS_NAME_LEN;
5102                 *size_ret += len;
5103
5104                 len += btrfs_dir_data_len(path.nodes[0], di);
5105                 len += sizeof(*di);
5106                 di = (struct btrfs_dir_item *)((char *)di + len);
5107                 cur += len;
5108         }
5109         goto loop;
5110
5111 out:
5112         btrfs_release_path(&path);
5113         return ret;
5114 }
5115
5116 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5117 {
5118         u64 item_size;
5119         u64 index_size;
5120         int ret;
5121
5122         ASSERT(size);
5123         ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5124         if (ret)
5125                 goto out;
5126
5127         ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5128         if (ret)
5129                 goto out;
5130
5131         *size = item_size + index_size;
5132
5133 out:
5134         if (ret)
5135                 error("failed to count root %llu INODE[%llu] root size",
5136                       root->objectid, ino);
5137         return ret;
5138 }
5139
5140 /*
5141  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5142  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5143  *
5144  * @root:       the root of the fs/file tree
5145  * @key:        the key of the INODE_REF/INODE_EXTREF
5146  * @path:       the path
5147  * @size:       the st_size of the INODE_ITEM
5148  * @ext_ref:    the EXTENDED_IREF feature
5149  *
5150  * Return 0 if no error occurred.
5151  * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5152  */
5153 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5154                           struct btrfs_path *path, u64 *size,
5155                           unsigned int ext_ref)
5156 {
5157         struct btrfs_dir_item *di;
5158         struct btrfs_inode_item *ii;
5159         struct btrfs_key key;
5160         struct btrfs_key location;
5161         struct extent_buffer *node;
5162         int slot;
5163         char namebuf[BTRFS_NAME_LEN] = {0};
5164         u32 total;
5165         u32 cur = 0;
5166         u32 len;
5167         u32 name_len;
5168         u32 data_len;
5169         u8 filetype;
5170         u32 mode = 0;
5171         u64 index;
5172         int ret;
5173         int err;
5174         int tmp_err;
5175         int need_research = 0;
5176
5177         /*
5178          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5179          * ignore index check.
5180          */
5181         if (di_key->type == BTRFS_DIR_INDEX_KEY)
5182                 index = di_key->offset;
5183         else
5184                 index = (u64)-1;
5185 begin:
5186         err = 0;
5187         cur = 0;
5188
5189         /* since after repair, path and the dir item may be changed */
5190         if (need_research) {
5191                 need_research = 0;
5192                 err |= DIR_COUNT_AGAIN;
5193                 btrfs_release_path(path);
5194                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5195                 /* the item was deleted, let path point the last checked item */
5196                 if (ret > 0) {
5197                         if (path->slots[0] == 0)
5198                                 btrfs_prev_leaf(root, path);
5199                         else
5200                                 path->slots[0]--;
5201                 }
5202                 if (ret)
5203                         goto out;
5204         }
5205
5206         node = path->nodes[0];
5207         slot = path->slots[0];
5208
5209         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5210         total = btrfs_item_size_nr(node, slot);
5211         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5212
5213         while (cur < total) {
5214                 data_len = btrfs_dir_data_len(node, di);
5215                 tmp_err = 0;
5216                 if (data_len)
5217                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5218                               root->objectid,
5219               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5220                               di_key->objectid, di_key->offset, data_len);
5221
5222                 name_len = btrfs_dir_name_len(node, di);
5223                 if (name_len <= BTRFS_NAME_LEN) {
5224                         len = name_len;
5225                 } else {
5226                         len = BTRFS_NAME_LEN;
5227                         warning("root %llu %s[%llu %llu] name too long",
5228                                 root->objectid,
5229                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5230                                 di_key->objectid, di_key->offset);
5231                 }
5232                 (*size) += name_len;
5233                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5234                                    len);
5235                 filetype = btrfs_dir_type(node, di);
5236
5237                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5238                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5239                         err |= -EIO;
5240                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5241                         root->objectid, di_key->objectid, di_key->offset,
5242                         namebuf, len, filetype, di_key->offset,
5243                         btrfs_name_hash(namebuf, len));
5244                 }
5245
5246                 btrfs_dir_item_key_to_cpu(node, di, &location);
5247                 /* Ignore related ROOT_ITEM check */
5248                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5249                         goto next;
5250
5251                 btrfs_release_path(path);
5252                 /* Check relative INODE_ITEM(existence/filetype) */
5253                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5254                 if (ret) {
5255                         tmp_err |= INODE_ITEM_MISSING;
5256                         goto next;
5257                 }
5258
5259                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5260                                     struct btrfs_inode_item);
5261                 mode = btrfs_inode_mode(path->nodes[0], ii);
5262                 if (imode_to_type(mode) != filetype) {
5263                         tmp_err |= INODE_ITEM_MISMATCH;
5264                         goto next;
5265                 }
5266
5267                 /* Check relative INODE_REF/INODE_EXTREF */
5268                 key.objectid = location.objectid;
5269                 key.type = BTRFS_INODE_REF_KEY;
5270                 key.offset = di_key->objectid;
5271                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5272                                           &index, ext_ref);
5273
5274                 /* check relative INDEX/ITEM */
5275                 key.objectid = di_key->objectid;
5276                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5277                         key.type = BTRFS_DIR_INDEX_KEY;
5278                         key.offset = index;
5279                 } else {
5280                         key.type = BTRFS_DIR_ITEM_KEY;
5281                         key.offset = btrfs_name_hash(namebuf, name_len);
5282                 }
5283
5284                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5285                                          name_len, filetype);
5286                 /* find_dir_item may find index */
5287                 if (key.type == BTRFS_DIR_INDEX_KEY)
5288                         index = key.offset;
5289 next:
5290
5291                 if (tmp_err && repair) {
5292                         ret = repair_dir_item(root, di_key->objectid,
5293                                               location.objectid, index,
5294                                               imode_to_type(mode), namebuf,
5295                                               name_len, tmp_err);
5296                         if (ret != tmp_err) {
5297                                 need_research = 1;
5298                                 goto begin;
5299                         }
5300                 }
5301                 btrfs_release_path(path);
5302                 print_dir_item_err(root, di_key, location.objectid, index,
5303                                    namebuf, name_len, filetype, tmp_err);
5304                 err |= tmp_err;
5305                 len = sizeof(*di) + name_len + data_len;
5306                 di = (struct btrfs_dir_item *)((char *)di + len);
5307                 cur += len;
5308
5309                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5310                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5311                               root->objectid, di_key->objectid,
5312                               di_key->offset);
5313                         break;
5314                 }
5315         }
5316 out:
5317         /* research path */
5318         btrfs_release_path(path);
5319         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5320         if (ret)
5321                 err |= ret > 0 ? -ENOENT : ret;
5322         return err;
5323 }
5324
5325 /*
5326  * Check file extent datasum/hole, update the size of the file extents,
5327  * check and update the last offset of the file extent.
5328  *
5329  * @root:       the root of fs/file tree.
5330  * @fkey:       the key of the file extent.
5331  * @nodatasum:  INODE_NODATASUM feature.
5332  * @size:       the sum of all EXTENT_DATA items size for this inode.
5333  * @end:        the offset of the last extent.
5334  *
5335  * Return 0 if no error occurred.
5336  */
5337 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5338                              struct extent_buffer *node, int slot,
5339                              unsigned int nodatasum, u64 *size, u64 *end)
5340 {
5341         struct btrfs_file_extent_item *fi;
5342         u64 disk_bytenr;
5343         u64 disk_num_bytes;
5344         u64 extent_num_bytes;
5345         u64 extent_offset;
5346         u64 csum_found;         /* In byte size, sectorsize aligned */
5347         u64 search_start;       /* Logical range start we search for csum */
5348         u64 search_len;         /* Logical range len we search for csum */
5349         unsigned int extent_type;
5350         unsigned int is_hole;
5351         int compressed = 0;
5352         int ret;
5353         int err = 0;
5354
5355         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5356
5357         /* Check inline extent */
5358         extent_type = btrfs_file_extent_type(node, fi);
5359         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5360                 struct btrfs_item *e = btrfs_item_nr(slot);
5361                 u32 item_inline_len;
5362
5363                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5364                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5365                 compressed = btrfs_file_extent_compression(node, fi);
5366                 if (extent_num_bytes == 0) {
5367                         error(
5368                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5369                                 root->objectid, fkey->objectid, fkey->offset);
5370                         err |= FILE_EXTENT_ERROR;
5371                 }
5372                 if (!compressed && extent_num_bytes != item_inline_len) {
5373                         error(
5374                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5375                                 root->objectid, fkey->objectid, fkey->offset,
5376                                 extent_num_bytes, item_inline_len);
5377                         err |= FILE_EXTENT_ERROR;
5378                 }
5379                 *end += extent_num_bytes;
5380                 *size += extent_num_bytes;
5381                 return err;
5382         }
5383
5384         /* Check extent type */
5385         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5386                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5387                 err |= FILE_EXTENT_ERROR;
5388                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5389                       root->objectid, fkey->objectid, fkey->offset);
5390                 return err;
5391         }
5392
5393         /* Check REG_EXTENT/PREALLOC_EXTENT */
5394         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5395         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5396         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5397         extent_offset = btrfs_file_extent_offset(node, fi);
5398         compressed = btrfs_file_extent_compression(node, fi);
5399         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5400
5401         /*
5402          * Check EXTENT_DATA csum
5403          *
5404          * For plain (uncompressed) extent, we should only check the range
5405          * we're referring to, as it's possible that part of prealloc extent
5406          * has been written, and has csum:
5407          *
5408          * |<--- Original large preallocated extent A ---->|
5409          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5410          *      No csum                         Has csum
5411          *
5412          * For compressed extent, we should check the whole range.
5413          */
5414         if (!compressed) {
5415                 search_start = disk_bytenr + extent_offset;
5416                 search_len = extent_num_bytes;
5417         } else {
5418                 search_start = disk_bytenr;
5419                 search_len = disk_num_bytes;
5420         }
5421         ret = count_csum_range(root, search_start, search_len, &csum_found);
5422         if (csum_found > 0 && nodatasum) {
5423                 err |= ODD_CSUM_ITEM;
5424                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5425                       root->objectid, fkey->objectid, fkey->offset);
5426         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5427                    !is_hole && (ret < 0 || csum_found < search_len)) {
5428                 err |= CSUM_ITEM_MISSING;
5429                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5430                       root->objectid, fkey->objectid, fkey->offset,
5431                       csum_found, search_len);
5432         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5433                 err |= ODD_CSUM_ITEM;
5434                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5435                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5436         }
5437
5438         /* Check EXTENT_DATA hole */
5439         if (!no_holes && *end != fkey->offset) {
5440                 err |= FILE_EXTENT_ERROR;
5441                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5442                       root->objectid, fkey->objectid, fkey->offset);
5443         }
5444
5445         *end += extent_num_bytes;
5446         if (!is_hole)
5447                 *size += extent_num_bytes;
5448
5449         return err;
5450 }
5451
5452 /*
5453  * Set inode item nbytes to @nbytes
5454  *
5455  * Returns  0     on success
5456  * Returns  != 0  on error
5457  */
5458 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5459                                       struct btrfs_path *path,
5460                                       u64 ino, u64 nbytes)
5461 {
5462         struct btrfs_trans_handle *trans;
5463         struct btrfs_inode_item *ii;
5464         struct btrfs_key key;
5465         struct btrfs_key research_key;
5466         int err = 0;
5467         int ret;
5468
5469         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5470
5471         key.objectid = ino;
5472         key.type = BTRFS_INODE_ITEM_KEY;
5473         key.offset = 0;
5474
5475         trans = btrfs_start_transaction(root, 1);
5476         if (IS_ERR(trans)) {
5477                 ret = PTR_ERR(trans);
5478                 err |= ret;
5479                 goto out;
5480         }
5481
5482         btrfs_release_path(path);
5483         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5484         if (ret > 0)
5485                 ret = -ENOENT;
5486         if (ret) {
5487                 err |= ret;
5488                 goto fail;
5489         }
5490
5491         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5492                             struct btrfs_inode_item);
5493         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5494         btrfs_mark_buffer_dirty(path->nodes[0]);
5495 fail:
5496         btrfs_commit_transaction(trans, root);
5497 out:
5498         if (ret)
5499                 error("failed to set nbytes in inode %llu root %llu",
5500                       ino, root->root_key.objectid);
5501         else
5502                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5503                        root->root_key.objectid, nbytes);
5504
5505         /* research path */
5506         btrfs_release_path(path);
5507         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5508         err |= ret;
5509
5510         return err;
5511 }
5512
5513 /*
5514  * Set directory inode isize to @isize.
5515  *
5516  * Returns 0     on success.
5517  * Returns != 0  on error.
5518  */
5519 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5520                                    struct btrfs_path *path,
5521                                    u64 ino, u64 isize)
5522 {
5523         struct btrfs_trans_handle *trans;
5524         struct btrfs_inode_item *ii;
5525         struct btrfs_key key;
5526         struct btrfs_key research_key;
5527         int ret;
5528         int err = 0;
5529
5530         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5531
5532         key.objectid = ino;
5533         key.type = BTRFS_INODE_ITEM_KEY;
5534         key.offset = 0;
5535
5536         trans = btrfs_start_transaction(root, 1);
5537         if (IS_ERR(trans)) {
5538                 ret = PTR_ERR(trans);
5539                 err |= ret;
5540                 goto out;
5541         }
5542
5543         btrfs_release_path(path);
5544         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5545         if (ret > 0)
5546                 ret = -ENOENT;
5547         if (ret) {
5548                 err |= ret;
5549                 goto fail;
5550         }
5551
5552         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5553                             struct btrfs_inode_item);
5554         btrfs_set_inode_size(path->nodes[0], ii, isize);
5555         btrfs_mark_buffer_dirty(path->nodes[0]);
5556 fail:
5557         btrfs_commit_transaction(trans, root);
5558 out:
5559         if (ret)
5560                 error("failed to set isize in inode %llu root %llu",
5561                       ino, root->root_key.objectid);
5562         else
5563                 printf("Set isize in inode %llu root %llu to %llu\n",
5564                        ino, root->root_key.objectid, isize);
5565
5566         btrfs_release_path(path);
5567         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5568         err |= ret;
5569
5570         return err;
5571 }
5572
5573 /*
5574  * Wrapper function for btrfs_add_orphan_item().
5575  *
5576  * Returns 0     on success.
5577  * Returns != 0  on error.
5578  */
5579 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5580                                            struct btrfs_path *path, u64 ino)
5581 {
5582         struct btrfs_trans_handle *trans;
5583         struct btrfs_key research_key;
5584         int ret;
5585         int err = 0;
5586
5587         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5588
5589         trans = btrfs_start_transaction(root, 1);
5590         if (IS_ERR(trans)) {
5591                 ret = PTR_ERR(trans);
5592                 err |= ret;
5593                 goto out;
5594         }
5595
5596         btrfs_release_path(path);
5597         ret = btrfs_add_orphan_item(trans, root, path, ino);
5598         err |= ret;
5599         btrfs_commit_transaction(trans, root);
5600 out:
5601         if (ret)
5602                 error("failed to add inode %llu as orphan item root %llu",
5603                       ino, root->root_key.objectid);
5604         else
5605                 printf("Added inode %llu as orphan item root %llu\n",
5606                        ino, root->root_key.objectid);
5607
5608         btrfs_release_path(path);
5609         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5610         err |= ret;
5611
5612         return err;
5613 }
5614
5615 /* Set inode_item nlink to @ref_count.
5616  * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5617  *
5618  * Returns 0 on success
5619  */
5620 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5621                                       struct btrfs_path *path, u64 ino,
5622                                       const char *name, u32 namelen,
5623                                       u64 ref_count, u8 filetype, u64 *nlink)
5624 {
5625         struct btrfs_trans_handle *trans;
5626         struct btrfs_inode_item *ii;
5627         struct btrfs_key key;
5628         struct btrfs_key old_key;
5629         char namebuf[BTRFS_NAME_LEN] = {0};
5630         int name_len;
5631         int ret;
5632         int ret2;
5633
5634         /* save the key */
5635         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5636
5637         if (name && namelen) {
5638                 ASSERT(namelen <= BTRFS_NAME_LEN);
5639                 memcpy(namebuf, name, namelen);
5640                 name_len = namelen;
5641         } else {
5642                 sprintf(namebuf, "%llu", ino);
5643                 name_len = count_digits(ino);
5644                 printf("Can't find file name for inode %llu, use %s instead\n",
5645                        ino, namebuf);
5646         }
5647
5648         trans = btrfs_start_transaction(root, 1);
5649         if (IS_ERR(trans)) {
5650                 ret = PTR_ERR(trans);
5651                 goto out;
5652         }
5653
5654         btrfs_release_path(path);
5655         /* if refs is 0, put it into lostfound */
5656         if (ref_count == 0) {
5657                 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5658                                               name_len, filetype, &ref_count);
5659                 if (ret)
5660                         goto fail;
5661         }
5662
5663         /* reset inode_item's nlink to ref_count */
5664         key.objectid = ino;
5665         key.type = BTRFS_INODE_ITEM_KEY;
5666         key.offset = 0;
5667
5668         btrfs_release_path(path);
5669         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5670         if (ret > 0)
5671                 ret = -ENOENT;
5672         if (ret)
5673                 goto fail;
5674
5675         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5676                             struct btrfs_inode_item);
5677         btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5678         btrfs_mark_buffer_dirty(path->nodes[0]);
5679
5680         if (nlink)
5681                 *nlink = ref_count;
5682 fail:
5683         btrfs_commit_transaction(trans, root);
5684 out:
5685         if (ret)
5686                 error(
5687         "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5688                        root->objectid, ino, namebuf, filetype);
5689         else
5690                 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5691                        root->objectid, ino, namebuf, filetype);
5692
5693         /* research */
5694         btrfs_release_path(path);
5695         ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5696         if (ret2 < 0)
5697                 return ret |= ret2;
5698         return ret;
5699 }
5700
5701 /*
5702  * Check INODE_ITEM and related ITEMs (the same inode number)
5703  * 1. check link count
5704  * 2. check inode ref/extref
5705  * 3. check dir item/index
5706  *
5707  * @ext_ref:    the EXTENDED_IREF feature
5708  *
5709  * Return 0 if no error occurred.
5710  * Return >0 for error or hit the traversal is done(by error bitmap)
5711  */
5712 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5713                             unsigned int ext_ref)
5714 {
5715         struct extent_buffer *node;
5716         struct btrfs_inode_item *ii;
5717         struct btrfs_key key;
5718         u64 inode_id;
5719         u32 mode;
5720         u64 nlink;
5721         u64 nbytes;
5722         u64 isize;
5723         u64 size = 0;
5724         u64 refs = 0;
5725         u64 extent_end = 0;
5726         u64 extent_size = 0;
5727         unsigned int dir;
5728         unsigned int nodatasum;
5729         int slot;
5730         int ret;
5731         int err = 0;
5732         char namebuf[BTRFS_NAME_LEN] = {0};
5733         u32 name_len = 0;
5734
5735         node = path->nodes[0];
5736         slot = path->slots[0];
5737
5738         btrfs_item_key_to_cpu(node, &key, slot);
5739         inode_id = key.objectid;
5740
5741         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5742                 ret = btrfs_next_item(root, path);
5743                 if (ret > 0)
5744                         err |= LAST_ITEM;
5745                 return err;
5746         }
5747
5748         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5749         isize = btrfs_inode_size(node, ii);
5750         nbytes = btrfs_inode_nbytes(node, ii);
5751         mode = btrfs_inode_mode(node, ii);
5752         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5753         nlink = btrfs_inode_nlink(node, ii);
5754         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5755
5756         while (1) {
5757                 ret = btrfs_next_item(root, path);
5758                 if (ret < 0) {
5759                         /* out will fill 'err' rusing current statistics */
5760                         goto out;
5761                 } else if (ret > 0) {
5762                         err |= LAST_ITEM;
5763                         goto out;
5764                 }
5765
5766                 node = path->nodes[0];
5767                 slot = path->slots[0];
5768                 btrfs_item_key_to_cpu(node, &key, slot);
5769                 if (key.objectid != inode_id)
5770                         goto out;
5771
5772                 switch (key.type) {
5773                 case BTRFS_INODE_REF_KEY:
5774                         ret = check_inode_ref(root, &key, path, namebuf,
5775                                               &name_len, &refs, mode);
5776                         err |= ret;
5777                         break;
5778                 case BTRFS_INODE_EXTREF_KEY:
5779                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5780                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5781                                         root->objectid, key.objectid,
5782                                         key.offset);
5783                         ret = check_inode_extref(root, &key, node, slot, &refs,
5784                                                  mode);
5785                         err |= ret;
5786                         break;
5787                 case BTRFS_DIR_ITEM_KEY:
5788                 case BTRFS_DIR_INDEX_KEY:
5789                         if (!dir) {
5790                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5791                                         root->objectid, inode_id,
5792                                         imode_to_type(mode), key.objectid,
5793                                         key.offset);
5794                         }
5795                         ret = check_dir_item(root, &key, path, &size, ext_ref);
5796                         err |= ret;
5797                         break;
5798                 case BTRFS_EXTENT_DATA_KEY:
5799                         if (dir) {
5800                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5801                                         root->objectid, inode_id, key.objectid,
5802                                         key.offset);
5803                         }
5804                         ret = check_file_extent(root, &key, node, slot,
5805                                                 nodatasum, &extent_size,
5806                                                 &extent_end);
5807                         err |= ret;
5808                         break;
5809                 case BTRFS_XATTR_ITEM_KEY:
5810                         break;
5811                 default:
5812                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5813                               key.objectid, key.type, key.offset);
5814                 }
5815         }
5816
5817 out:
5818         /* verify INODE_ITEM nlink/isize/nbytes */
5819         if (dir) {
5820                 if (repair && (err & DIR_COUNT_AGAIN)) {
5821                         err &= ~DIR_COUNT_AGAIN;
5822                         count_dir_isize(root, inode_id, &size);
5823                 }
5824
5825                 if ((nlink != 1 || refs != 1) && repair) {
5826                         ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5827                                 namebuf, name_len, refs, imode_to_type(mode),
5828                                 &nlink);
5829                 }
5830
5831                 if (nlink != 1) {
5832                         err |= LINK_COUNT_ERROR;
5833                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5834                               root->objectid, inode_id, nlink);
5835                 }
5836
5837                 /*
5838                  * Just a warning, as dir inode nbytes is just an
5839                  * instructive value.
5840                  */
5841                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5842                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5843                                 root->objectid, inode_id,
5844                                 root->fs_info->nodesize);
5845                 }
5846
5847                 if (isize != size) {
5848                         if (repair)
5849                                 ret = repair_dir_isize_lowmem(root, path,
5850                                                               inode_id, size);
5851                         if (!repair || ret) {
5852                                 err |= ISIZE_ERROR;
5853                                 error(
5854                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5855                                       root->objectid, inode_id, isize, size);
5856                         }
5857                 }
5858         } else {
5859                 if (nlink != refs) {
5860                         if (repair)
5861                                 ret = repair_inode_nlinks_lowmem(root, path,
5862                                          inode_id, namebuf, name_len, refs,
5863                                          imode_to_type(mode), &nlink);
5864                         if (!repair || ret) {
5865                                 err |= LINK_COUNT_ERROR;
5866                                 error(
5867                 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5868                                       root->objectid, inode_id, nlink, refs);
5869                         }
5870                 } else if (!nlink) {
5871                         if (repair)
5872                                 ret = repair_inode_orphan_item_lowmem(root,
5873                                                               path, inode_id);
5874                         if (!repair || ret) {
5875                                 err |= ORPHAN_ITEM;
5876                                 error("root %llu INODE[%llu] is orphan item",
5877                                       root->objectid, inode_id);
5878                         }
5879                 }
5880
5881                 if (!nbytes && !no_holes && extent_end < isize) {
5882                         err |= NBYTES_ERROR;
5883                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5884                               root->objectid, inode_id, isize);
5885                 }
5886
5887                 if (nbytes != extent_size) {
5888                         if (repair)
5889                                 ret = repair_inode_nbytes_lowmem(root, path,
5890                                                          inode_id, extent_size);
5891                         if (!repair || ret) {
5892                                 err |= NBYTES_ERROR;
5893                                 error(
5894         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5895                                       root->objectid, inode_id, nbytes,
5896                                       extent_size);
5897                         }
5898                 }
5899         }
5900
5901         return err;
5902 }
5903
5904 /*
5905  * Insert the missing inode item and inode ref.
5906  *
5907  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
5908  * Root dir should be handled specially because root dir is the root of fs.
5909  *
5910  * returns err (>0 or 0) after repair
5911  */
5912 static int repair_fs_first_inode(struct btrfs_root *root, int err)
5913 {
5914         struct btrfs_trans_handle *trans;
5915         struct btrfs_key key;
5916         struct btrfs_path path;
5917         int filetype = BTRFS_FT_DIR;
5918         int ret = 0;
5919
5920         btrfs_init_path(&path);
5921
5922         if (err & INODE_REF_MISSING) {
5923                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5924                 key.type = BTRFS_INODE_REF_KEY;
5925                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5926
5927                 trans = btrfs_start_transaction(root, 1);
5928                 if (IS_ERR(trans)) {
5929                         ret = PTR_ERR(trans);
5930                         goto out;
5931                 }
5932
5933                 btrfs_release_path(&path);
5934                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
5935                 if (ret)
5936                         goto trans_fail;
5937
5938                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
5939                                              BTRFS_FIRST_FREE_OBJECTID,
5940                                              BTRFS_FIRST_FREE_OBJECTID, 0);
5941                 if (ret)
5942                         goto trans_fail;
5943
5944                 printf("Add INODE_REF[%llu %llu] name %s\n",
5945                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
5946                        "..");
5947                 err &= ~INODE_REF_MISSING;
5948 trans_fail:
5949                 if (ret)
5950                         error("fail to insert first inode's ref");
5951                 btrfs_commit_transaction(trans, root);
5952         }
5953
5954         if (err & INODE_ITEM_MISSING) {
5955                 ret = repair_inode_item_missing(root,
5956                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
5957                 if (ret)
5958                         goto out;
5959                 err &= ~INODE_ITEM_MISSING;
5960         }
5961 out:
5962         if (ret)
5963                 error("fail to repair first inode");
5964         btrfs_release_path(&path);
5965         return err;
5966 }
5967
5968 /*
5969  * check first root dir's inode_item and inode_ref
5970  *
5971  * returns 0 means no error
5972  * returns >0 means error
5973  * returns <0 means fatal error
5974  */
5975 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5976 {
5977         struct btrfs_path path;
5978         struct btrfs_key key;
5979         struct btrfs_inode_item *ii;
5980         u64 index;
5981         u32 mode;
5982         int err = 0;
5983         int ret;
5984
5985         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5986         key.type = BTRFS_INODE_ITEM_KEY;
5987         key.offset = 0;
5988
5989         /* For root being dropped, we don't need to check first inode */
5990         if (btrfs_root_refs(&root->root_item) == 0 &&
5991             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5992             BTRFS_FIRST_FREE_OBJECTID)
5993                 return 0;
5994
5995         btrfs_init_path(&path);
5996         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5997         if (ret < 0)
5998                 goto out;
5999         if (ret > 0) {
6000                 ret = 0;
6001                 err |= INODE_ITEM_MISSING;
6002         } else {
6003                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6004                                     struct btrfs_inode_item);
6005                 mode = btrfs_inode_mode(path.nodes[0], ii);
6006                 if (imode_to_type(mode) != BTRFS_FT_DIR)
6007                         err |= INODE_ITEM_MISMATCH;
6008         }
6009
6010         /* lookup first inode ref */
6011         key.offset = BTRFS_FIRST_FREE_OBJECTID;
6012         key.type = BTRFS_INODE_REF_KEY;
6013         /* special index value */
6014         index = 0;
6015
6016         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6017         if (ret < 0)
6018                 goto out;
6019         err |= ret;
6020
6021 out:
6022         btrfs_release_path(&path);
6023
6024         if (err && repair)
6025                 err = repair_fs_first_inode(root, err);
6026
6027         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6028                 error("root dir INODE_ITEM is %s",
6029                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6030         if (err & INODE_REF_MISSING)
6031                 error("root dir INODE_REF is missing");
6032
6033         return ret < 0 ? ret : err;
6034 }
6035
6036 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6037                                                 u64 parent, u64 root)
6038 {
6039         struct rb_node *node;
6040         struct tree_backref *back = NULL;
6041         struct tree_backref match = {
6042                 .node = {
6043                         .is_data = 0,
6044                 },
6045         };
6046
6047         if (parent) {
6048                 match.parent = parent;
6049                 match.node.full_backref = 1;
6050         } else {
6051                 match.root = root;
6052         }
6053
6054         node = rb_search(&rec->backref_tree, &match.node.node,
6055                          (rb_compare_keys)compare_extent_backref, NULL);
6056         if (node)
6057                 back = to_tree_backref(rb_node_to_extent_backref(node));
6058
6059         return back;
6060 }
6061
6062 static struct data_backref *find_data_backref(struct extent_record *rec,
6063                                                 u64 parent, u64 root,
6064                                                 u64 owner, u64 offset,
6065                                                 int found_ref,
6066                                                 u64 disk_bytenr, u64 bytes)
6067 {
6068         struct rb_node *node;
6069         struct data_backref *back = NULL;
6070         struct data_backref match = {
6071                 .node = {
6072                         .is_data = 1,
6073                 },
6074                 .owner = owner,
6075                 .offset = offset,
6076                 .bytes = bytes,
6077                 .found_ref = found_ref,
6078                 .disk_bytenr = disk_bytenr,
6079         };
6080
6081         if (parent) {
6082                 match.parent = parent;
6083                 match.node.full_backref = 1;
6084         } else {
6085                 match.root = root;
6086         }
6087
6088         node = rb_search(&rec->backref_tree, &match.node.node,
6089                          (rb_compare_keys)compare_extent_backref, NULL);
6090         if (node)
6091                 back = to_data_backref(rb_node_to_extent_backref(node));
6092
6093         return back;
6094 }
6095 /*
6096  * Iterate all item on the tree and call check_inode_item() to check.
6097  *
6098  * @root:       the root of the tree to be checked.
6099  * @ext_ref:    the EXTENDED_IREF feature
6100  *
6101  * Return 0 if no error found.
6102  * Return <0 for error.
6103  */
6104 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6105 {
6106         struct btrfs_path path;
6107         struct node_refs nrefs;
6108         struct btrfs_root_item *root_item = &root->root_item;
6109         int ret;
6110         int level;
6111         int err = 0;
6112
6113         /*
6114          * We need to manually check the first inode item(256)
6115          * As the following traversal function will only start from
6116          * the first inode item in the leaf, if inode item(256) is missing
6117          * we will just skip it forever.
6118          */
6119         ret = check_fs_first_inode(root, ext_ref);
6120         if (ret < 0)
6121                 return ret;
6122         err |= !!ret;
6123
6124         memset(&nrefs, 0, sizeof(nrefs));
6125         level = btrfs_header_level(root->node);
6126         btrfs_init_path(&path);
6127
6128         if (btrfs_root_refs(root_item) > 0 ||
6129             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6130                 path.nodes[level] = root->node;
6131                 path.slots[level] = 0;
6132                 extent_buffer_get(root->node);
6133         } else {
6134                 struct btrfs_key key;
6135
6136                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6137                 level = root_item->drop_level;
6138                 path.lowest_level = level;
6139                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6140                 if (ret < 0)
6141                         goto out;
6142                 ret = 0;
6143         }
6144
6145         while (1) {
6146                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
6147                 err |= !!ret;
6148
6149                 /* if ret is negative, walk shall stop */
6150                 if (ret < 0) {
6151                         ret = err;
6152                         break;
6153                 }
6154
6155                 ret = walk_up_tree_v2(root, &path, &level);
6156                 if (ret != 0) {
6157                         /* Normal exit, reset ret to err */
6158                         ret = err;
6159                         break;
6160                 }
6161         }
6162
6163 out:
6164         btrfs_release_path(&path);
6165         return ret;
6166 }
6167
6168 /*
6169  * Find the relative ref for root_ref and root_backref.
6170  *
6171  * @root:       the root of the root tree.
6172  * @ref_key:    the key of the root ref.
6173  *
6174  * Return 0 if no error occurred.
6175  */
6176 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6177                           struct extent_buffer *node, int slot)
6178 {
6179         struct btrfs_path path;
6180         struct btrfs_key key;
6181         struct btrfs_root_ref *ref;
6182         struct btrfs_root_ref *backref;
6183         char ref_name[BTRFS_NAME_LEN] = {0};
6184         char backref_name[BTRFS_NAME_LEN] = {0};
6185         u64 ref_dirid;
6186         u64 ref_seq;
6187         u32 ref_namelen;
6188         u64 backref_dirid;
6189         u64 backref_seq;
6190         u32 backref_namelen;
6191         u32 len;
6192         int ret;
6193         int err = 0;
6194
6195         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6196         ref_dirid = btrfs_root_ref_dirid(node, ref);
6197         ref_seq = btrfs_root_ref_sequence(node, ref);
6198         ref_namelen = btrfs_root_ref_name_len(node, ref);
6199
6200         if (ref_namelen <= BTRFS_NAME_LEN) {
6201                 len = ref_namelen;
6202         } else {
6203                 len = BTRFS_NAME_LEN;
6204                 warning("%s[%llu %llu] ref_name too long",
6205                         ref_key->type == BTRFS_ROOT_REF_KEY ?
6206                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6207                         ref_key->offset);
6208         }
6209         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6210
6211         /* Find relative root_ref */
6212         key.objectid = ref_key->offset;
6213         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6214         key.offset = ref_key->objectid;
6215
6216         btrfs_init_path(&path);
6217         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6218         if (ret) {
6219                 err |= ROOT_REF_MISSING;
6220                 error("%s[%llu %llu] couldn't find relative ref",
6221                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6222                       "ROOT_REF" : "ROOT_BACKREF",
6223                       ref_key->objectid, ref_key->offset);
6224                 goto out;
6225         }
6226
6227         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6228                                  struct btrfs_root_ref);
6229         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6230         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6231         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6232
6233         if (backref_namelen <= BTRFS_NAME_LEN) {
6234                 len = backref_namelen;
6235         } else {
6236                 len = BTRFS_NAME_LEN;
6237                 warning("%s[%llu %llu] ref_name too long",
6238                         key.type == BTRFS_ROOT_REF_KEY ?
6239                         "ROOT_REF" : "ROOT_BACKREF",
6240                         key.objectid, key.offset);
6241         }
6242         read_extent_buffer(path.nodes[0], backref_name,
6243                            (unsigned long)(backref + 1), len);
6244
6245         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6246             ref_namelen != backref_namelen ||
6247             strncmp(ref_name, backref_name, len)) {
6248                 err |= ROOT_REF_MISMATCH;
6249                 error("%s[%llu %llu] mismatch relative ref",
6250                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6251                       "ROOT_REF" : "ROOT_BACKREF",
6252                       ref_key->objectid, ref_key->offset);
6253         }
6254 out:
6255         btrfs_release_path(&path);
6256         return err;
6257 }
6258
6259 /*
6260  * Check all fs/file tree in low_memory mode.
6261  *
6262  * 1. for fs tree root item, call check_fs_root_v2()
6263  * 2. for fs tree root ref/backref, call check_root_ref()
6264  *
6265  * Return 0 if no error occurred.
6266  */
6267 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6268 {
6269         struct btrfs_root *tree_root = fs_info->tree_root;
6270         struct btrfs_root *cur_root = NULL;
6271         struct btrfs_path path;
6272         struct btrfs_key key;
6273         struct extent_buffer *node;
6274         unsigned int ext_ref;
6275         int slot;
6276         int ret;
6277         int err = 0;
6278
6279         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6280
6281         btrfs_init_path(&path);
6282         key.objectid = BTRFS_FS_TREE_OBJECTID;
6283         key.offset = 0;
6284         key.type = BTRFS_ROOT_ITEM_KEY;
6285
6286         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6287         if (ret < 0) {
6288                 err = ret;
6289                 goto out;
6290         } else if (ret > 0) {
6291                 err = -ENOENT;
6292                 goto out;
6293         }
6294
6295         while (1) {
6296                 node = path.nodes[0];
6297                 slot = path.slots[0];
6298                 btrfs_item_key_to_cpu(node, &key, slot);
6299                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6300                         goto out;
6301                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6302                     fs_root_objectid(key.objectid)) {
6303                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6304                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6305                                                                        &key);
6306                         } else {
6307                                 key.offset = (u64)-1;
6308                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6309                         }
6310
6311                         if (IS_ERR(cur_root)) {
6312                                 error("Fail to read fs/subvol tree: %lld",
6313                                       key.objectid);
6314                                 err = -EIO;
6315                                 goto next;
6316                         }
6317
6318                         ret = check_fs_root_v2(cur_root, ext_ref);
6319                         err |= ret;
6320
6321                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6322                                 btrfs_free_fs_root(cur_root);
6323                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6324                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6325                         ret = check_root_ref(tree_root, &key, node, slot);
6326                         err |= ret;
6327                 }
6328 next:
6329                 ret = btrfs_next_item(tree_root, &path);
6330                 if (ret > 0)
6331                         goto out;
6332                 if (ret < 0) {
6333                         err = ret;
6334                         goto out;
6335                 }
6336         }
6337
6338 out:
6339         btrfs_release_path(&path);
6340         return err;
6341 }
6342
6343 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6344                           struct cache_tree *root_cache)
6345 {
6346         int ret;
6347
6348         if (!ctx.progress_enabled)
6349                 fprintf(stderr, "checking fs roots\n");
6350         if (check_mode == CHECK_MODE_LOWMEM)
6351                 ret = check_fs_roots_v2(fs_info);
6352         else
6353                 ret = check_fs_roots(fs_info, root_cache);
6354
6355         return ret;
6356 }
6357
6358 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6359 {
6360         struct extent_backref *back, *tmp;
6361         struct tree_backref *tback;
6362         struct data_backref *dback;
6363         u64 found = 0;
6364         int err = 0;
6365
6366         rbtree_postorder_for_each_entry_safe(back, tmp,
6367                                              &rec->backref_tree, node) {
6368                 if (!back->found_extent_tree) {
6369                         err = 1;
6370                         if (!print_errs)
6371                                 goto out;
6372                         if (back->is_data) {
6373                                 dback = to_data_backref(back);
6374                                 fprintf(stderr, "Data backref %llu %s %llu"
6375                                         " owner %llu offset %llu num_refs %lu"
6376                                         " not found in extent tree\n",
6377                                         (unsigned long long)rec->start,
6378                                         back->full_backref ?
6379                                         "parent" : "root",
6380                                         back->full_backref ?
6381                                         (unsigned long long)dback->parent:
6382                                         (unsigned long long)dback->root,
6383                                         (unsigned long long)dback->owner,
6384                                         (unsigned long long)dback->offset,
6385                                         (unsigned long)dback->num_refs);
6386                         } else {
6387                                 tback = to_tree_backref(back);
6388                                 fprintf(stderr, "Tree backref %llu parent %llu"
6389                                         " root %llu not found in extent tree\n",
6390                                         (unsigned long long)rec->start,
6391                                         (unsigned long long)tback->parent,
6392                                         (unsigned long long)tback->root);
6393                         }
6394                 }
6395                 if (!back->is_data && !back->found_ref) {
6396                         err = 1;
6397                         if (!print_errs)
6398                                 goto out;
6399                         tback = to_tree_backref(back);
6400                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6401                                 (unsigned long long)rec->start,
6402                                 back->full_backref ? "parent" : "root",
6403                                 back->full_backref ?
6404                                 (unsigned long long)tback->parent :
6405                                 (unsigned long long)tback->root, back);
6406                 }
6407                 if (back->is_data) {
6408                         dback = to_data_backref(back);
6409                         if (dback->found_ref != dback->num_refs) {
6410                                 err = 1;
6411                                 if (!print_errs)
6412                                         goto out;
6413                                 fprintf(stderr, "Incorrect local backref count"
6414                                         " on %llu %s %llu owner %llu"
6415                                         " offset %llu found %u wanted %u back %p\n",
6416                                         (unsigned long long)rec->start,
6417                                         back->full_backref ?
6418                                         "parent" : "root",
6419                                         back->full_backref ?
6420                                         (unsigned long long)dback->parent:
6421                                         (unsigned long long)dback->root,
6422                                         (unsigned long long)dback->owner,
6423                                         (unsigned long long)dback->offset,
6424                                         dback->found_ref, dback->num_refs, back);
6425                         }
6426                         if (dback->disk_bytenr != rec->start) {
6427                                 err = 1;
6428                                 if (!print_errs)
6429                                         goto out;
6430                                 fprintf(stderr, "Backref disk bytenr does not"
6431                                         " match extent record, bytenr=%llu, "
6432                                         "ref bytenr=%llu\n",
6433                                         (unsigned long long)rec->start,
6434                                         (unsigned long long)dback->disk_bytenr);
6435                         }
6436
6437                         if (dback->bytes != rec->nr) {
6438                                 err = 1;
6439                                 if (!print_errs)
6440                                         goto out;
6441                                 fprintf(stderr, "Backref bytes do not match "
6442                                         "extent backref, bytenr=%llu, ref "
6443                                         "bytes=%llu, backref bytes=%llu\n",
6444                                         (unsigned long long)rec->start,
6445                                         (unsigned long long)rec->nr,
6446                                         (unsigned long long)dback->bytes);
6447                         }
6448                 }
6449                 if (!back->is_data) {
6450                         found += 1;
6451                 } else {
6452                         dback = to_data_backref(back);
6453                         found += dback->found_ref;
6454                 }
6455         }
6456         if (found != rec->refs) {
6457                 err = 1;
6458                 if (!print_errs)
6459                         goto out;
6460                 fprintf(stderr, "Incorrect global backref count "
6461                         "on %llu found %llu wanted %llu\n",
6462                         (unsigned long long)rec->start,
6463                         (unsigned long long)found,
6464                         (unsigned long long)rec->refs);
6465         }
6466 out:
6467         return err;
6468 }
6469
6470 static void __free_one_backref(struct rb_node *node)
6471 {
6472         struct extent_backref *back = rb_node_to_extent_backref(node);
6473
6474         free(back);
6475 }
6476
6477 static void free_all_extent_backrefs(struct extent_record *rec)
6478 {
6479         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6480 }
6481
6482 static void free_extent_record_cache(struct cache_tree *extent_cache)
6483 {
6484         struct cache_extent *cache;
6485         struct extent_record *rec;
6486
6487         while (1) {
6488                 cache = first_cache_extent(extent_cache);
6489                 if (!cache)
6490                         break;
6491                 rec = container_of(cache, struct extent_record, cache);
6492                 remove_cache_extent(extent_cache, cache);
6493                 free_all_extent_backrefs(rec);
6494                 free(rec);
6495         }
6496 }
6497
6498 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6499                                  struct extent_record *rec)
6500 {
6501         if (rec->content_checked && rec->owner_ref_checked &&
6502             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6503             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6504             !rec->bad_full_backref && !rec->crossing_stripes &&
6505             !rec->wrong_chunk_type) {
6506                 remove_cache_extent(extent_cache, &rec->cache);
6507                 free_all_extent_backrefs(rec);
6508                 list_del_init(&rec->list);
6509                 free(rec);
6510         }
6511         return 0;
6512 }
6513
6514 static int check_owner_ref(struct btrfs_root *root,
6515                             struct extent_record *rec,
6516                             struct extent_buffer *buf)
6517 {
6518         struct extent_backref *node, *tmp;
6519         struct tree_backref *back;
6520         struct btrfs_root *ref_root;
6521         struct btrfs_key key;
6522         struct btrfs_path path;
6523         struct extent_buffer *parent;
6524         int level;
6525         int found = 0;
6526         int ret;
6527
6528         rbtree_postorder_for_each_entry_safe(node, tmp,
6529                                              &rec->backref_tree, node) {
6530                 if (node->is_data)
6531                         continue;
6532                 if (!node->found_ref)
6533                         continue;
6534                 if (node->full_backref)
6535                         continue;
6536                 back = to_tree_backref(node);
6537                 if (btrfs_header_owner(buf) == back->root)
6538                         return 0;
6539         }
6540         BUG_ON(rec->is_root);
6541
6542         /* try to find the block by search corresponding fs tree */
6543         key.objectid = btrfs_header_owner(buf);
6544         key.type = BTRFS_ROOT_ITEM_KEY;
6545         key.offset = (u64)-1;
6546
6547         ref_root = btrfs_read_fs_root(root->fs_info, &key);
6548         if (IS_ERR(ref_root))
6549                 return 1;
6550
6551         level = btrfs_header_level(buf);
6552         if (level == 0)
6553                 btrfs_item_key_to_cpu(buf, &key, 0);
6554         else
6555                 btrfs_node_key_to_cpu(buf, &key, 0);
6556
6557         btrfs_init_path(&path);
6558         path.lowest_level = level + 1;
6559         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6560         if (ret < 0)
6561                 return 0;
6562
6563         parent = path.nodes[level + 1];
6564         if (parent && buf->start == btrfs_node_blockptr(parent,
6565                                                         path.slots[level + 1]))
6566                 found = 1;
6567
6568         btrfs_release_path(&path);
6569         return found ? 0 : 1;
6570 }
6571
6572 static int is_extent_tree_record(struct extent_record *rec)
6573 {
6574         struct extent_backref *node, *tmp;
6575         struct tree_backref *back;
6576         int is_extent = 0;
6577
6578         rbtree_postorder_for_each_entry_safe(node, tmp,
6579                                              &rec->backref_tree, node) {
6580                 if (node->is_data)
6581                         return 0;
6582                 back = to_tree_backref(node);
6583                 if (node->full_backref)
6584                         return 0;
6585                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6586                         is_extent = 1;
6587         }
6588         return is_extent;
6589 }
6590
6591
6592 static int record_bad_block_io(struct btrfs_fs_info *info,
6593                                struct cache_tree *extent_cache,
6594                                u64 start, u64 len)
6595 {
6596         struct extent_record *rec;
6597         struct cache_extent *cache;
6598         struct btrfs_key key;
6599
6600         cache = lookup_cache_extent(extent_cache, start, len);
6601         if (!cache)
6602                 return 0;
6603
6604         rec = container_of(cache, struct extent_record, cache);
6605         if (!is_extent_tree_record(rec))
6606                 return 0;
6607
6608         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6609         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6610 }
6611
6612 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6613                        struct extent_buffer *buf, int slot)
6614 {
6615         if (btrfs_header_level(buf)) {
6616                 struct btrfs_key_ptr ptr1, ptr2;
6617
6618                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6619                                    sizeof(struct btrfs_key_ptr));
6620                 read_extent_buffer(buf, &ptr2,
6621                                    btrfs_node_key_ptr_offset(slot + 1),
6622                                    sizeof(struct btrfs_key_ptr));
6623                 write_extent_buffer(buf, &ptr1,
6624                                     btrfs_node_key_ptr_offset(slot + 1),
6625                                     sizeof(struct btrfs_key_ptr));
6626                 write_extent_buffer(buf, &ptr2,
6627                                     btrfs_node_key_ptr_offset(slot),
6628                                     sizeof(struct btrfs_key_ptr));
6629                 if (slot == 0) {
6630                         struct btrfs_disk_key key;
6631                         btrfs_node_key(buf, &key, 0);
6632                         btrfs_fixup_low_keys(root, path, &key,
6633                                              btrfs_header_level(buf) + 1);
6634                 }
6635         } else {
6636                 struct btrfs_item *item1, *item2;
6637                 struct btrfs_key k1, k2;
6638                 char *item1_data, *item2_data;
6639                 u32 item1_offset, item2_offset, item1_size, item2_size;
6640
6641                 item1 = btrfs_item_nr(slot);
6642                 item2 = btrfs_item_nr(slot + 1);
6643                 btrfs_item_key_to_cpu(buf, &k1, slot);
6644                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6645                 item1_offset = btrfs_item_offset(buf, item1);
6646                 item2_offset = btrfs_item_offset(buf, item2);
6647                 item1_size = btrfs_item_size(buf, item1);
6648                 item2_size = btrfs_item_size(buf, item2);
6649
6650                 item1_data = malloc(item1_size);
6651                 if (!item1_data)
6652                         return -ENOMEM;
6653                 item2_data = malloc(item2_size);
6654                 if (!item2_data) {
6655                         free(item1_data);
6656                         return -ENOMEM;
6657                 }
6658
6659                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6660                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6661
6662                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6663                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6664                 free(item1_data);
6665                 free(item2_data);
6666
6667                 btrfs_set_item_offset(buf, item1, item2_offset);
6668                 btrfs_set_item_offset(buf, item2, item1_offset);
6669                 btrfs_set_item_size(buf, item1, item2_size);
6670                 btrfs_set_item_size(buf, item2, item1_size);
6671
6672                 path->slots[0] = slot;
6673                 btrfs_set_item_key_unsafe(root, path, &k2);
6674                 path->slots[0] = slot + 1;
6675                 btrfs_set_item_key_unsafe(root, path, &k1);
6676         }
6677         return 0;
6678 }
6679
6680 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6681 {
6682         struct extent_buffer *buf;
6683         struct btrfs_key k1, k2;
6684         int i;
6685         int level = path->lowest_level;
6686         int ret = -EIO;
6687
6688         buf = path->nodes[level];
6689         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6690                 if (level) {
6691                         btrfs_node_key_to_cpu(buf, &k1, i);
6692                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6693                 } else {
6694                         btrfs_item_key_to_cpu(buf, &k1, i);
6695                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6696                 }
6697                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6698                         continue;
6699                 ret = swap_values(root, path, buf, i);
6700                 if (ret)
6701                         break;
6702                 btrfs_mark_buffer_dirty(buf);
6703                 i = 0;
6704         }
6705         return ret;
6706 }
6707
6708 static int delete_bogus_item(struct btrfs_root *root,
6709                              struct btrfs_path *path,
6710                              struct extent_buffer *buf, int slot)
6711 {
6712         struct btrfs_key key;
6713         int nritems = btrfs_header_nritems(buf);
6714
6715         btrfs_item_key_to_cpu(buf, &key, slot);
6716
6717         /* These are all the keys we can deal with missing. */
6718         if (key.type != BTRFS_DIR_INDEX_KEY &&
6719             key.type != BTRFS_EXTENT_ITEM_KEY &&
6720             key.type != BTRFS_METADATA_ITEM_KEY &&
6721             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6722             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6723                 return -1;
6724
6725         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6726                (unsigned long long)key.objectid, key.type,
6727                (unsigned long long)key.offset, slot, buf->start);
6728         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6729                               btrfs_item_nr_offset(slot + 1),
6730                               sizeof(struct btrfs_item) *
6731                               (nritems - slot - 1));
6732         btrfs_set_header_nritems(buf, nritems - 1);
6733         if (slot == 0) {
6734                 struct btrfs_disk_key disk_key;
6735
6736                 btrfs_item_key(buf, &disk_key, 0);
6737                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6738         }
6739         btrfs_mark_buffer_dirty(buf);
6740         return 0;
6741 }
6742
6743 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6744 {
6745         struct extent_buffer *buf;
6746         int i;
6747         int ret = 0;
6748
6749         /* We should only get this for leaves */
6750         BUG_ON(path->lowest_level);
6751         buf = path->nodes[0];
6752 again:
6753         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6754                 unsigned int shift = 0, offset;
6755
6756                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6757                     BTRFS_LEAF_DATA_SIZE(root)) {
6758                         if (btrfs_item_end_nr(buf, i) >
6759                             BTRFS_LEAF_DATA_SIZE(root)) {
6760                                 ret = delete_bogus_item(root, path, buf, i);
6761                                 if (!ret)
6762                                         goto again;
6763                                 fprintf(stderr, "item is off the end of the "
6764                                         "leaf, can't fix\n");
6765                                 ret = -EIO;
6766                                 break;
6767                         }
6768                         shift = BTRFS_LEAF_DATA_SIZE(root) -
6769                                 btrfs_item_end_nr(buf, i);
6770                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6771                            btrfs_item_offset_nr(buf, i - 1)) {
6772                         if (btrfs_item_end_nr(buf, i) >
6773                             btrfs_item_offset_nr(buf, i - 1)) {
6774                                 ret = delete_bogus_item(root, path, buf, i);
6775                                 if (!ret)
6776                                         goto again;
6777                                 fprintf(stderr, "items overlap, can't fix\n");
6778                                 ret = -EIO;
6779                                 break;
6780                         }
6781                         shift = btrfs_item_offset_nr(buf, i - 1) -
6782                                 btrfs_item_end_nr(buf, i);
6783                 }
6784                 if (!shift)
6785                         continue;
6786
6787                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6788                        i, shift, (unsigned long long)buf->start);
6789                 offset = btrfs_item_offset_nr(buf, i);
6790                 memmove_extent_buffer(buf,
6791                                       btrfs_leaf_data(buf) + offset + shift,
6792                                       btrfs_leaf_data(buf) + offset,
6793                                       btrfs_item_size_nr(buf, i));
6794                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6795                                       offset + shift);
6796                 btrfs_mark_buffer_dirty(buf);
6797         }
6798
6799         /*
6800          * We may have moved things, in which case we want to exit so we don't
6801          * write those changes out.  Once we have proper abort functionality in
6802          * progs this can be changed to something nicer.
6803          */
6804         BUG_ON(ret);
6805         return ret;
6806 }
6807
6808 /*
6809  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6810  * then just return -EIO.
6811  */
6812 static int try_to_fix_bad_block(struct btrfs_root *root,
6813                                 struct extent_buffer *buf,
6814                                 enum btrfs_tree_block_status status)
6815 {
6816         struct btrfs_trans_handle *trans;
6817         struct ulist *roots;
6818         struct ulist_node *node;
6819         struct btrfs_root *search_root;
6820         struct btrfs_path path;
6821         struct ulist_iterator iter;
6822         struct btrfs_key root_key, key;
6823         int ret;
6824
6825         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6826             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6827                 return -EIO;
6828
6829         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6830         if (ret)
6831                 return -EIO;
6832
6833         btrfs_init_path(&path);
6834         ULIST_ITER_INIT(&iter);
6835         while ((node = ulist_next(roots, &iter))) {
6836                 root_key.objectid = node->val;
6837                 root_key.type = BTRFS_ROOT_ITEM_KEY;
6838                 root_key.offset = (u64)-1;
6839
6840                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6841                 if (IS_ERR(root)) {
6842                         ret = -EIO;
6843                         break;
6844                 }
6845
6846
6847                 trans = btrfs_start_transaction(search_root, 0);
6848                 if (IS_ERR(trans)) {
6849                         ret = PTR_ERR(trans);
6850                         break;
6851                 }
6852
6853                 path.lowest_level = btrfs_header_level(buf);
6854                 path.skip_check_block = 1;
6855                 if (path.lowest_level)
6856                         btrfs_node_key_to_cpu(buf, &key, 0);
6857                 else
6858                         btrfs_item_key_to_cpu(buf, &key, 0);
6859                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6860                 if (ret) {
6861                         ret = -EIO;
6862                         btrfs_commit_transaction(trans, search_root);
6863                         break;
6864                 }
6865                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6866                         ret = fix_key_order(search_root, &path);
6867                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6868                         ret = fix_item_offset(search_root, &path);
6869                 if (ret) {
6870                         btrfs_commit_transaction(trans, search_root);
6871                         break;
6872                 }
6873                 btrfs_release_path(&path);
6874                 btrfs_commit_transaction(trans, search_root);
6875         }
6876         ulist_free(roots);
6877         btrfs_release_path(&path);
6878         return ret;
6879 }
6880
6881 static int check_block(struct btrfs_root *root,
6882                        struct cache_tree *extent_cache,
6883                        struct extent_buffer *buf, u64 flags)
6884 {
6885         struct extent_record *rec;
6886         struct cache_extent *cache;
6887         struct btrfs_key key;
6888         enum btrfs_tree_block_status status;
6889         int ret = 0;
6890         int level;
6891
6892         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6893         if (!cache)
6894                 return 1;
6895         rec = container_of(cache, struct extent_record, cache);
6896         rec->generation = btrfs_header_generation(buf);
6897
6898         level = btrfs_header_level(buf);
6899         if (btrfs_header_nritems(buf) > 0) {
6900
6901                 if (level == 0)
6902                         btrfs_item_key_to_cpu(buf, &key, 0);
6903                 else
6904                         btrfs_node_key_to_cpu(buf, &key, 0);
6905
6906                 rec->info_objectid = key.objectid;
6907         }
6908         rec->info_level = level;
6909
6910         if (btrfs_is_leaf(buf))
6911                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6912         else
6913                 status = btrfs_check_node(root, &rec->parent_key, buf);
6914
6915         if (status != BTRFS_TREE_BLOCK_CLEAN) {
6916                 if (repair)
6917                         status = try_to_fix_bad_block(root, buf, status);
6918                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6919                         ret = -EIO;
6920                         fprintf(stderr, "bad block %llu\n",
6921                                 (unsigned long long)buf->start);
6922                 } else {
6923                         /*
6924                          * Signal to callers we need to start the scan over
6925                          * again since we'll have cowed blocks.
6926                          */
6927                         ret = -EAGAIN;
6928                 }
6929         } else {
6930                 rec->content_checked = 1;
6931                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6932                         rec->owner_ref_checked = 1;
6933                 else {
6934                         ret = check_owner_ref(root, rec, buf);
6935                         if (!ret)
6936                                 rec->owner_ref_checked = 1;
6937                 }
6938         }
6939         if (!ret)
6940                 maybe_free_extent_rec(extent_cache, rec);
6941         return ret;
6942 }
6943
6944 #if 0
6945 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6946                                                 u64 parent, u64 root)
6947 {
6948         struct list_head *cur = rec->backrefs.next;
6949         struct extent_backref *node;
6950         struct tree_backref *back;
6951
6952         while(cur != &rec->backrefs) {
6953                 node = to_extent_backref(cur);
6954                 cur = cur->next;
6955                 if (node->is_data)
6956                         continue;
6957                 back = to_tree_backref(node);
6958                 if (parent > 0) {
6959                         if (!node->full_backref)
6960                                 continue;
6961                         if (parent == back->parent)
6962                                 return back;
6963                 } else {
6964                         if (node->full_backref)
6965                                 continue;
6966                         if (back->root == root)
6967                                 return back;
6968                 }
6969         }
6970         return NULL;
6971 }
6972 #endif
6973
6974 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6975                                                 u64 parent, u64 root)
6976 {
6977         struct tree_backref *ref = malloc(sizeof(*ref));
6978
6979         if (!ref)
6980                 return NULL;
6981         memset(&ref->node, 0, sizeof(ref->node));
6982         if (parent > 0) {
6983                 ref->parent = parent;
6984                 ref->node.full_backref = 1;
6985         } else {
6986                 ref->root = root;
6987                 ref->node.full_backref = 0;
6988         }
6989
6990         return ref;
6991 }
6992
6993 #if 0
6994 static struct data_backref *find_data_backref(struct extent_record *rec,
6995                                                 u64 parent, u64 root,
6996                                                 u64 owner, u64 offset,
6997                                                 int found_ref,
6998                                                 u64 disk_bytenr, u64 bytes)
6999 {
7000         struct list_head *cur = rec->backrefs.next;
7001         struct extent_backref *node;
7002         struct data_backref *back;
7003
7004         while(cur != &rec->backrefs) {
7005                 node = to_extent_backref(cur);
7006                 cur = cur->next;
7007                 if (!node->is_data)
7008                         continue;
7009                 back = to_data_backref(node);
7010                 if (parent > 0) {
7011                         if (!node->full_backref)
7012                                 continue;
7013                         if (parent == back->parent)
7014                                 return back;
7015                 } else {
7016                         if (node->full_backref)
7017                                 continue;
7018                         if (back->root == root && back->owner == owner &&
7019                             back->offset == offset) {
7020                                 if (found_ref && node->found_ref &&
7021                                     (back->bytes != bytes ||
7022                                     back->disk_bytenr != disk_bytenr))
7023                                         continue;
7024                                 return back;
7025                         }
7026                 }
7027         }
7028         return NULL;
7029 }
7030 #endif
7031
7032 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7033                                                 u64 parent, u64 root,
7034                                                 u64 owner, u64 offset,
7035                                                 u64 max_size)
7036 {
7037         struct data_backref *ref = malloc(sizeof(*ref));
7038
7039         if (!ref)
7040                 return NULL;
7041         memset(&ref->node, 0, sizeof(ref->node));
7042         ref->node.is_data = 1;
7043
7044         if (parent > 0) {
7045                 ref->parent = parent;
7046                 ref->owner = 0;
7047                 ref->offset = 0;
7048                 ref->node.full_backref = 1;
7049         } else {
7050                 ref->root = root;
7051                 ref->owner = owner;
7052                 ref->offset = offset;
7053                 ref->node.full_backref = 0;
7054         }
7055         ref->bytes = max_size;
7056         ref->found_ref = 0;
7057         ref->num_refs = 0;
7058         if (max_size > rec->max_size)
7059                 rec->max_size = max_size;
7060         return ref;
7061 }
7062
7063 /* Check if the type of extent matches with its chunk */
7064 static void check_extent_type(struct extent_record *rec)
7065 {
7066         struct btrfs_block_group_cache *bg_cache;
7067
7068         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7069         if (!bg_cache)
7070                 return;
7071
7072         /* data extent, check chunk directly*/
7073         if (!rec->metadata) {
7074                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7075                         rec->wrong_chunk_type = 1;
7076                 return;
7077         }
7078
7079         /* metadata extent, check the obvious case first */
7080         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7081                                  BTRFS_BLOCK_GROUP_METADATA))) {
7082                 rec->wrong_chunk_type = 1;
7083                 return;
7084         }
7085
7086         /*
7087          * Check SYSTEM extent, as it's also marked as metadata, we can only
7088          * make sure it's a SYSTEM extent by its backref
7089          */
7090         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7091                 struct extent_backref *node;
7092                 struct tree_backref *tback;
7093                 u64 bg_type;
7094
7095                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7096                 if (node->is_data) {
7097                         /* tree block shouldn't have data backref */
7098                         rec->wrong_chunk_type = 1;
7099                         return;
7100                 }
7101                 tback = container_of(node, struct tree_backref, node);
7102
7103                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7104                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7105                 else
7106                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
7107                 if (!(bg_cache->flags & bg_type))
7108                         rec->wrong_chunk_type = 1;
7109         }
7110 }
7111
7112 /*
7113  * Allocate a new extent record, fill default values from @tmpl and insert int
7114  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7115  * the cache, otherwise it fails.
7116  */
7117 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7118                 struct extent_record *tmpl)
7119 {
7120         struct extent_record *rec;
7121         int ret = 0;
7122
7123         BUG_ON(tmpl->max_size == 0);
7124         rec = malloc(sizeof(*rec));
7125         if (!rec)
7126                 return -ENOMEM;
7127         rec->start = tmpl->start;
7128         rec->max_size = tmpl->max_size;
7129         rec->nr = max(tmpl->nr, tmpl->max_size);
7130         rec->found_rec = tmpl->found_rec;
7131         rec->content_checked = tmpl->content_checked;
7132         rec->owner_ref_checked = tmpl->owner_ref_checked;
7133         rec->num_duplicates = 0;
7134         rec->metadata = tmpl->metadata;
7135         rec->flag_block_full_backref = FLAG_UNSET;
7136         rec->bad_full_backref = 0;
7137         rec->crossing_stripes = 0;
7138         rec->wrong_chunk_type = 0;
7139         rec->is_root = tmpl->is_root;
7140         rec->refs = tmpl->refs;
7141         rec->extent_item_refs = tmpl->extent_item_refs;
7142         rec->parent_generation = tmpl->parent_generation;
7143         INIT_LIST_HEAD(&rec->backrefs);
7144         INIT_LIST_HEAD(&rec->dups);
7145         INIT_LIST_HEAD(&rec->list);
7146         rec->backref_tree = RB_ROOT;
7147         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7148         rec->cache.start = tmpl->start;
7149         rec->cache.size = tmpl->nr;
7150         ret = insert_cache_extent(extent_cache, &rec->cache);
7151         if (ret) {
7152                 free(rec);
7153                 return ret;
7154         }
7155         bytes_used += rec->nr;
7156
7157         if (tmpl->metadata)
7158                 rec->crossing_stripes = check_crossing_stripes(global_info,
7159                                 rec->start, global_info->nodesize);
7160         check_extent_type(rec);
7161         return ret;
7162 }
7163
7164 /*
7165  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7166  * some are hints:
7167  * - refs              - if found, increase refs
7168  * - is_root           - if found, set
7169  * - content_checked   - if found, set
7170  * - owner_ref_checked - if found, set
7171  *
7172  * If not found, create a new one, initialize and insert.
7173  */
7174 static int add_extent_rec(struct cache_tree *extent_cache,
7175                 struct extent_record *tmpl)
7176 {
7177         struct extent_record *rec;
7178         struct cache_extent *cache;
7179         int ret = 0;
7180         int dup = 0;
7181
7182         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7183         if (cache) {
7184                 rec = container_of(cache, struct extent_record, cache);
7185                 if (tmpl->refs)
7186                         rec->refs++;
7187                 if (rec->nr == 1)
7188                         rec->nr = max(tmpl->nr, tmpl->max_size);
7189
7190                 /*
7191                  * We need to make sure to reset nr to whatever the extent
7192                  * record says was the real size, this way we can compare it to
7193                  * the backrefs.
7194                  */
7195                 if (tmpl->found_rec) {
7196                         if (tmpl->start != rec->start || rec->found_rec) {
7197                                 struct extent_record *tmp;
7198
7199                                 dup = 1;
7200                                 if (list_empty(&rec->list))
7201                                         list_add_tail(&rec->list,
7202                                                       &duplicate_extents);
7203
7204                                 /*
7205                                  * We have to do this song and dance in case we
7206                                  * find an extent record that falls inside of
7207                                  * our current extent record but does not have
7208                                  * the same objectid.
7209                                  */
7210                                 tmp = malloc(sizeof(*tmp));
7211                                 if (!tmp)
7212                                         return -ENOMEM;
7213                                 tmp->start = tmpl->start;
7214                                 tmp->max_size = tmpl->max_size;
7215                                 tmp->nr = tmpl->nr;
7216                                 tmp->found_rec = 1;
7217                                 tmp->metadata = tmpl->metadata;
7218                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7219                                 INIT_LIST_HEAD(&tmp->list);
7220                                 list_add_tail(&tmp->list, &rec->dups);
7221                                 rec->num_duplicates++;
7222                         } else {
7223                                 rec->nr = tmpl->nr;
7224                                 rec->found_rec = 1;
7225                         }
7226                 }
7227
7228                 if (tmpl->extent_item_refs && !dup) {
7229                         if (rec->extent_item_refs) {
7230                                 fprintf(stderr, "block %llu rec "
7231                                         "extent_item_refs %llu, passed %llu\n",
7232                                         (unsigned long long)tmpl->start,
7233                                         (unsigned long long)
7234                                                         rec->extent_item_refs,
7235                                         (unsigned long long)tmpl->extent_item_refs);
7236                         }
7237                         rec->extent_item_refs = tmpl->extent_item_refs;
7238                 }
7239                 if (tmpl->is_root)
7240                         rec->is_root = 1;
7241                 if (tmpl->content_checked)
7242                         rec->content_checked = 1;
7243                 if (tmpl->owner_ref_checked)
7244                         rec->owner_ref_checked = 1;
7245                 memcpy(&rec->parent_key, &tmpl->parent_key,
7246                                 sizeof(tmpl->parent_key));
7247                 if (tmpl->parent_generation)
7248                         rec->parent_generation = tmpl->parent_generation;
7249                 if (rec->max_size < tmpl->max_size)
7250                         rec->max_size = tmpl->max_size;
7251
7252                 /*
7253                  * A metadata extent can't cross stripe_len boundary, otherwise
7254                  * kernel scrub won't be able to handle it.
7255                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7256                  * it.
7257                  */
7258                 if (tmpl->metadata)
7259                         rec->crossing_stripes = check_crossing_stripes(
7260                                         global_info, rec->start,
7261                                         global_info->nodesize);
7262                 check_extent_type(rec);
7263                 maybe_free_extent_rec(extent_cache, rec);
7264                 return ret;
7265         }
7266
7267         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7268
7269         return ret;
7270 }
7271
7272 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7273                             u64 parent, u64 root, int found_ref)
7274 {
7275         struct extent_record *rec;
7276         struct tree_backref *back;
7277         struct cache_extent *cache;
7278         int ret;
7279         bool insert = false;
7280
7281         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7282         if (!cache) {
7283                 struct extent_record tmpl;
7284
7285                 memset(&tmpl, 0, sizeof(tmpl));
7286                 tmpl.start = bytenr;
7287                 tmpl.nr = 1;
7288                 tmpl.metadata = 1;
7289                 tmpl.max_size = 1;
7290
7291                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7292                 if (ret)
7293                         return ret;
7294
7295                 /* really a bug in cache_extent implement now */
7296                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7297                 if (!cache)
7298                         return -ENOENT;
7299         }
7300
7301         rec = container_of(cache, struct extent_record, cache);
7302         if (rec->start != bytenr) {
7303                 /*
7304                  * Several cause, from unaligned bytenr to over lapping extents
7305                  */
7306                 return -EEXIST;
7307         }
7308
7309         back = find_tree_backref(rec, parent, root);
7310         if (!back) {
7311                 back = alloc_tree_backref(rec, parent, root);
7312                 if (!back)
7313                         return -ENOMEM;
7314                 insert = true;
7315         }
7316
7317         if (found_ref) {
7318                 if (back->node.found_ref) {
7319                         fprintf(stderr, "Extent back ref already exists "
7320                                 "for %llu parent %llu root %llu \n",
7321                                 (unsigned long long)bytenr,
7322                                 (unsigned long long)parent,
7323                                 (unsigned long long)root);
7324                 }
7325                 back->node.found_ref = 1;
7326         } else {
7327                 if (back->node.found_extent_tree) {
7328                         fprintf(stderr, "Extent back ref already exists "
7329                                 "for %llu parent %llu root %llu \n",
7330                                 (unsigned long long)bytenr,
7331                                 (unsigned long long)parent,
7332                                 (unsigned long long)root);
7333                 }
7334                 back->node.found_extent_tree = 1;
7335         }
7336         if (insert)
7337                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7338                         compare_extent_backref));
7339         check_extent_type(rec);
7340         maybe_free_extent_rec(extent_cache, rec);
7341         return 0;
7342 }
7343
7344 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7345                             u64 parent, u64 root, u64 owner, u64 offset,
7346                             u32 num_refs, int found_ref, u64 max_size)
7347 {
7348         struct extent_record *rec;
7349         struct data_backref *back;
7350         struct cache_extent *cache;
7351         int ret;
7352         bool insert = false;
7353
7354         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7355         if (!cache) {
7356                 struct extent_record tmpl;
7357
7358                 memset(&tmpl, 0, sizeof(tmpl));
7359                 tmpl.start = bytenr;
7360                 tmpl.nr = 1;
7361                 tmpl.max_size = max_size;
7362
7363                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7364                 if (ret)
7365                         return ret;
7366
7367                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7368                 if (!cache)
7369                         abort();
7370         }
7371
7372         rec = container_of(cache, struct extent_record, cache);
7373         if (rec->max_size < max_size)
7374                 rec->max_size = max_size;
7375
7376         /*
7377          * If found_ref is set then max_size is the real size and must match the
7378          * existing refs.  So if we have already found a ref then we need to
7379          * make sure that this ref matches the existing one, otherwise we need
7380          * to add a new backref so we can notice that the backrefs don't match
7381          * and we need to figure out who is telling the truth.  This is to
7382          * account for that awful fsync bug I introduced where we'd end up with
7383          * a btrfs_file_extent_item that would have its length include multiple
7384          * prealloc extents or point inside of a prealloc extent.
7385          */
7386         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7387                                  bytenr, max_size);
7388         if (!back) {
7389                 back = alloc_data_backref(rec, parent, root, owner, offset,
7390                                           max_size);
7391                 BUG_ON(!back);
7392                 insert = true;
7393         }
7394
7395         if (found_ref) {
7396                 BUG_ON(num_refs != 1);
7397                 if (back->node.found_ref)
7398                         BUG_ON(back->bytes != max_size);
7399                 back->node.found_ref = 1;
7400                 back->found_ref += 1;
7401                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7402                         back->bytes = max_size;
7403                         back->disk_bytenr = bytenr;
7404
7405                         /* Need to reinsert if not already in the tree */
7406                         if (!insert) {
7407                                 rb_erase(&back->node.node, &rec->backref_tree);
7408                                 insert = true;
7409                         }
7410                 }
7411                 rec->refs += 1;
7412                 rec->content_checked = 1;
7413                 rec->owner_ref_checked = 1;
7414         } else {
7415                 if (back->node.found_extent_tree) {
7416                         fprintf(stderr, "Extent back ref already exists "
7417                                 "for %llu parent %llu root %llu "
7418                                 "owner %llu offset %llu num_refs %lu\n",
7419                                 (unsigned long long)bytenr,
7420                                 (unsigned long long)parent,
7421                                 (unsigned long long)root,
7422                                 (unsigned long long)owner,
7423                                 (unsigned long long)offset,
7424                                 (unsigned long)num_refs);
7425                 }
7426                 back->num_refs = num_refs;
7427                 back->node.found_extent_tree = 1;
7428         }
7429         if (insert)
7430                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7431                         compare_extent_backref));
7432
7433         maybe_free_extent_rec(extent_cache, rec);
7434         return 0;
7435 }
7436
7437 static int add_pending(struct cache_tree *pending,
7438                        struct cache_tree *seen, u64 bytenr, u32 size)
7439 {
7440         int ret;
7441         ret = add_cache_extent(seen, bytenr, size);
7442         if (ret)
7443                 return ret;
7444         add_cache_extent(pending, bytenr, size);
7445         return 0;
7446 }
7447
7448 static int pick_next_pending(struct cache_tree *pending,
7449                         struct cache_tree *reada,
7450                         struct cache_tree *nodes,
7451                         u64 last, struct block_info *bits, int bits_nr,
7452                         int *reada_bits)
7453 {
7454         unsigned long node_start = last;
7455         struct cache_extent *cache;
7456         int ret;
7457
7458         cache = search_cache_extent(reada, 0);
7459         if (cache) {
7460                 bits[0].start = cache->start;
7461                 bits[0].size = cache->size;
7462                 *reada_bits = 1;
7463                 return 1;
7464         }
7465         *reada_bits = 0;
7466         if (node_start > 32768)
7467                 node_start -= 32768;
7468
7469         cache = search_cache_extent(nodes, node_start);
7470         if (!cache)
7471                 cache = search_cache_extent(nodes, 0);
7472
7473         if (!cache) {
7474                  cache = search_cache_extent(pending, 0);
7475                  if (!cache)
7476                          return 0;
7477                  ret = 0;
7478                  do {
7479                          bits[ret].start = cache->start;
7480                          bits[ret].size = cache->size;
7481                          cache = next_cache_extent(cache);
7482                          ret++;
7483                  } while (cache && ret < bits_nr);
7484                  return ret;
7485         }
7486
7487         ret = 0;
7488         do {
7489                 bits[ret].start = cache->start;
7490                 bits[ret].size = cache->size;
7491                 cache = next_cache_extent(cache);
7492                 ret++;
7493         } while (cache && ret < bits_nr);
7494
7495         if (bits_nr - ret > 8) {
7496                 u64 lookup = bits[0].start + bits[0].size;
7497                 struct cache_extent *next;
7498                 next = search_cache_extent(pending, lookup);
7499                 while(next) {
7500                         if (next->start - lookup > 32768)
7501                                 break;
7502                         bits[ret].start = next->start;
7503                         bits[ret].size = next->size;
7504                         lookup = next->start + next->size;
7505                         ret++;
7506                         if (ret == bits_nr)
7507                                 break;
7508                         next = next_cache_extent(next);
7509                         if (!next)
7510                                 break;
7511                 }
7512         }
7513         return ret;
7514 }
7515
7516 static void free_chunk_record(struct cache_extent *cache)
7517 {
7518         struct chunk_record *rec;
7519
7520         rec = container_of(cache, struct chunk_record, cache);
7521         list_del_init(&rec->list);
7522         list_del_init(&rec->dextents);
7523         free(rec);
7524 }
7525
7526 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7527 {
7528         cache_tree_free_extents(chunk_cache, free_chunk_record);
7529 }
7530
7531 static void free_device_record(struct rb_node *node)
7532 {
7533         struct device_record *rec;
7534
7535         rec = container_of(node, struct device_record, node);
7536         free(rec);
7537 }
7538
7539 FREE_RB_BASED_TREE(device_cache, free_device_record);
7540
7541 int insert_block_group_record(struct block_group_tree *tree,
7542                               struct block_group_record *bg_rec)
7543 {
7544         int ret;
7545
7546         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7547         if (ret)
7548                 return ret;
7549
7550         list_add_tail(&bg_rec->list, &tree->block_groups);
7551         return 0;
7552 }
7553
7554 static void free_block_group_record(struct cache_extent *cache)
7555 {
7556         struct block_group_record *rec;
7557
7558         rec = container_of(cache, struct block_group_record, cache);
7559         list_del_init(&rec->list);
7560         free(rec);
7561 }
7562
7563 void free_block_group_tree(struct block_group_tree *tree)
7564 {
7565         cache_tree_free_extents(&tree->tree, free_block_group_record);
7566 }
7567
7568 int insert_device_extent_record(struct device_extent_tree *tree,
7569                                 struct device_extent_record *de_rec)
7570 {
7571         int ret;
7572
7573         /*
7574          * Device extent is a bit different from the other extents, because
7575          * the extents which belong to the different devices may have the
7576          * same start and size, so we need use the special extent cache
7577          * search/insert functions.
7578          */
7579         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7580         if (ret)
7581                 return ret;
7582
7583         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7584         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7585         return 0;
7586 }
7587
7588 static void free_device_extent_record(struct cache_extent *cache)
7589 {
7590         struct device_extent_record *rec;
7591
7592         rec = container_of(cache, struct device_extent_record, cache);
7593         if (!list_empty(&rec->chunk_list))
7594                 list_del_init(&rec->chunk_list);
7595         if (!list_empty(&rec->device_list))
7596                 list_del_init(&rec->device_list);
7597         free(rec);
7598 }
7599
7600 void free_device_extent_tree(struct device_extent_tree *tree)
7601 {
7602         cache_tree_free_extents(&tree->tree, free_device_extent_record);
7603 }
7604
7605 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7606 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7607                                  struct extent_buffer *leaf, int slot)
7608 {
7609         struct btrfs_extent_ref_v0 *ref0;
7610         struct btrfs_key key;
7611         int ret;
7612
7613         btrfs_item_key_to_cpu(leaf, &key, slot);
7614         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7615         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7616                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7617                                 0, 0);
7618         } else {
7619                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7620                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7621         }
7622         return ret;
7623 }
7624 #endif
7625
7626 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7627                                             struct btrfs_key *key,
7628                                             int slot)
7629 {
7630         struct btrfs_chunk *ptr;
7631         struct chunk_record *rec;
7632         int num_stripes, i;
7633
7634         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7635         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7636
7637         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7638         if (!rec) {
7639                 fprintf(stderr, "memory allocation failed\n");
7640                 exit(-1);
7641         }
7642
7643         INIT_LIST_HEAD(&rec->list);
7644         INIT_LIST_HEAD(&rec->dextents);
7645         rec->bg_rec = NULL;
7646
7647         rec->cache.start = key->offset;
7648         rec->cache.size = btrfs_chunk_length(leaf, ptr);
7649
7650         rec->generation = btrfs_header_generation(leaf);
7651
7652         rec->objectid = key->objectid;
7653         rec->type = key->type;
7654         rec->offset = key->offset;
7655
7656         rec->length = rec->cache.size;
7657         rec->owner = btrfs_chunk_owner(leaf, ptr);
7658         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7659         rec->type_flags = btrfs_chunk_type(leaf, ptr);
7660         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7661         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7662         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7663         rec->num_stripes = num_stripes;
7664         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7665
7666         for (i = 0; i < rec->num_stripes; ++i) {
7667                 rec->stripes[i].devid =
7668                         btrfs_stripe_devid_nr(leaf, ptr, i);
7669                 rec->stripes[i].offset =
7670                         btrfs_stripe_offset_nr(leaf, ptr, i);
7671                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7672                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7673                                 BTRFS_UUID_SIZE);
7674         }
7675
7676         return rec;
7677 }
7678
7679 static int process_chunk_item(struct cache_tree *chunk_cache,
7680                               struct btrfs_key *key, struct extent_buffer *eb,
7681                               int slot)
7682 {
7683         struct chunk_record *rec;
7684         struct btrfs_chunk *chunk;
7685         int ret = 0;
7686
7687         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7688         /*
7689          * Do extra check for this chunk item,
7690          *
7691          * It's still possible one can craft a leaf with CHUNK_ITEM, with
7692          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7693          * and owner<->key_type check.
7694          */
7695         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7696                                       key->offset);
7697         if (ret < 0) {
7698                 error("chunk(%llu, %llu) is not valid, ignore it",
7699                       key->offset, btrfs_chunk_length(eb, chunk));
7700                 return 0;
7701         }
7702         rec = btrfs_new_chunk_record(eb, key, slot);
7703         ret = insert_cache_extent(chunk_cache, &rec->cache);
7704         if (ret) {
7705                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7706                         rec->offset, rec->length);
7707                 free(rec);
7708         }
7709
7710         return ret;
7711 }
7712
7713 static int process_device_item(struct rb_root *dev_cache,
7714                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7715 {
7716         struct btrfs_dev_item *ptr;
7717         struct device_record *rec;
7718         int ret = 0;
7719
7720         ptr = btrfs_item_ptr(eb,
7721                 slot, struct btrfs_dev_item);
7722
7723         rec = malloc(sizeof(*rec));
7724         if (!rec) {
7725                 fprintf(stderr, "memory allocation failed\n");
7726                 return -ENOMEM;
7727         }
7728
7729         rec->devid = key->offset;
7730         rec->generation = btrfs_header_generation(eb);
7731
7732         rec->objectid = key->objectid;
7733         rec->type = key->type;
7734         rec->offset = key->offset;
7735
7736         rec->devid = btrfs_device_id(eb, ptr);
7737         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7738         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7739
7740         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7741         if (ret) {
7742                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7743                 free(rec);
7744         }
7745
7746         return ret;
7747 }
7748
7749 struct block_group_record *
7750 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7751                              int slot)
7752 {
7753         struct btrfs_block_group_item *ptr;
7754         struct block_group_record *rec;
7755
7756         rec = calloc(1, sizeof(*rec));
7757         if (!rec) {
7758                 fprintf(stderr, "memory allocation failed\n");
7759                 exit(-1);
7760         }
7761
7762         rec->cache.start = key->objectid;
7763         rec->cache.size = key->offset;
7764
7765         rec->generation = btrfs_header_generation(leaf);
7766
7767         rec->objectid = key->objectid;
7768         rec->type = key->type;
7769         rec->offset = key->offset;
7770
7771         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7772         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7773
7774         INIT_LIST_HEAD(&rec->list);
7775
7776         return rec;
7777 }
7778
7779 static int process_block_group_item(struct block_group_tree *block_group_cache,
7780                                     struct btrfs_key *key,
7781                                     struct extent_buffer *eb, int slot)
7782 {
7783         struct block_group_record *rec;
7784         int ret = 0;
7785
7786         rec = btrfs_new_block_group_record(eb, key, slot);
7787         ret = insert_block_group_record(block_group_cache, rec);
7788         if (ret) {
7789                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7790                         rec->objectid, rec->offset);
7791                 free(rec);
7792         }
7793
7794         return ret;
7795 }
7796
7797 struct device_extent_record *
7798 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7799                                struct btrfs_key *key, int slot)
7800 {
7801         struct device_extent_record *rec;
7802         struct btrfs_dev_extent *ptr;
7803
7804         rec = calloc(1, sizeof(*rec));
7805         if (!rec) {
7806                 fprintf(stderr, "memory allocation failed\n");
7807                 exit(-1);
7808         }
7809
7810         rec->cache.objectid = key->objectid;
7811         rec->cache.start = key->offset;
7812
7813         rec->generation = btrfs_header_generation(leaf);
7814
7815         rec->objectid = key->objectid;
7816         rec->type = key->type;
7817         rec->offset = key->offset;
7818
7819         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7820         rec->chunk_objecteid =
7821                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7822         rec->chunk_offset =
7823                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7824         rec->length = btrfs_dev_extent_length(leaf, ptr);
7825         rec->cache.size = rec->length;
7826
7827         INIT_LIST_HEAD(&rec->chunk_list);
7828         INIT_LIST_HEAD(&rec->device_list);
7829
7830         return rec;
7831 }
7832
7833 static int
7834 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7835                            struct btrfs_key *key, struct extent_buffer *eb,
7836                            int slot)
7837 {
7838         struct device_extent_record *rec;
7839         int ret;
7840
7841         rec = btrfs_new_device_extent_record(eb, key, slot);
7842         ret = insert_device_extent_record(dev_extent_cache, rec);
7843         if (ret) {
7844                 fprintf(stderr,
7845                         "Device extent[%llu, %llu, %llu] existed.\n",
7846                         rec->objectid, rec->offset, rec->length);
7847                 free(rec);
7848         }
7849
7850         return ret;
7851 }
7852
7853 static int process_extent_item(struct btrfs_root *root,
7854                                struct cache_tree *extent_cache,
7855                                struct extent_buffer *eb, int slot)
7856 {
7857         struct btrfs_extent_item *ei;
7858         struct btrfs_extent_inline_ref *iref;
7859         struct btrfs_extent_data_ref *dref;
7860         struct btrfs_shared_data_ref *sref;
7861         struct btrfs_key key;
7862         struct extent_record tmpl;
7863         unsigned long end;
7864         unsigned long ptr;
7865         int ret;
7866         int type;
7867         u32 item_size = btrfs_item_size_nr(eb, slot);
7868         u64 refs = 0;
7869         u64 offset;
7870         u64 num_bytes;
7871         int metadata = 0;
7872
7873         btrfs_item_key_to_cpu(eb, &key, slot);
7874
7875         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7876                 metadata = 1;
7877                 num_bytes = root->fs_info->nodesize;
7878         } else {
7879                 num_bytes = key.offset;
7880         }
7881
7882         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7883                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7884                       key.objectid, root->fs_info->sectorsize);
7885                 return -EIO;
7886         }
7887         if (item_size < sizeof(*ei)) {
7888 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7889                 struct btrfs_extent_item_v0 *ei0;
7890                 BUG_ON(item_size != sizeof(*ei0));
7891                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7892                 refs = btrfs_extent_refs_v0(eb, ei0);
7893 #else
7894                 BUG();
7895 #endif
7896                 memset(&tmpl, 0, sizeof(tmpl));
7897                 tmpl.start = key.objectid;
7898                 tmpl.nr = num_bytes;
7899                 tmpl.extent_item_refs = refs;
7900                 tmpl.metadata = metadata;
7901                 tmpl.found_rec = 1;
7902                 tmpl.max_size = num_bytes;
7903
7904                 return add_extent_rec(extent_cache, &tmpl);
7905         }
7906
7907         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7908         refs = btrfs_extent_refs(eb, ei);
7909         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7910                 metadata = 1;
7911         else
7912                 metadata = 0;
7913         if (metadata && num_bytes != root->fs_info->nodesize) {
7914                 error("ignore invalid metadata extent, length %llu does not equal to %u",
7915                       num_bytes, root->fs_info->nodesize);
7916                 return -EIO;
7917         }
7918         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7919                 error("ignore invalid data extent, length %llu is not aligned to %u",
7920                       num_bytes, root->fs_info->sectorsize);
7921                 return -EIO;
7922         }
7923
7924         memset(&tmpl, 0, sizeof(tmpl));
7925         tmpl.start = key.objectid;
7926         tmpl.nr = num_bytes;
7927         tmpl.extent_item_refs = refs;
7928         tmpl.metadata = metadata;
7929         tmpl.found_rec = 1;
7930         tmpl.max_size = num_bytes;
7931         add_extent_rec(extent_cache, &tmpl);
7932
7933         ptr = (unsigned long)(ei + 1);
7934         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7935             key.type == BTRFS_EXTENT_ITEM_KEY)
7936                 ptr += sizeof(struct btrfs_tree_block_info);
7937
7938         end = (unsigned long)ei + item_size;
7939         while (ptr < end) {
7940                 iref = (struct btrfs_extent_inline_ref *)ptr;
7941                 type = btrfs_extent_inline_ref_type(eb, iref);
7942                 offset = btrfs_extent_inline_ref_offset(eb, iref);
7943                 switch (type) {
7944                 case BTRFS_TREE_BLOCK_REF_KEY:
7945                         ret = add_tree_backref(extent_cache, key.objectid,
7946                                         0, offset, 0);
7947                         if (ret < 0)
7948                                 error(
7949                         "add_tree_backref failed (extent items tree block): %s",
7950                                       strerror(-ret));
7951                         break;
7952                 case BTRFS_SHARED_BLOCK_REF_KEY:
7953                         ret = add_tree_backref(extent_cache, key.objectid,
7954                                         offset, 0, 0);
7955                         if (ret < 0)
7956                                 error(
7957                         "add_tree_backref failed (extent items shared block): %s",
7958                                       strerror(-ret));
7959                         break;
7960                 case BTRFS_EXTENT_DATA_REF_KEY:
7961                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7962                         add_data_backref(extent_cache, key.objectid, 0,
7963                                         btrfs_extent_data_ref_root(eb, dref),
7964                                         btrfs_extent_data_ref_objectid(eb,
7965                                                                        dref),
7966                                         btrfs_extent_data_ref_offset(eb, dref),
7967                                         btrfs_extent_data_ref_count(eb, dref),
7968                                         0, num_bytes);
7969                         break;
7970                 case BTRFS_SHARED_DATA_REF_KEY:
7971                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
7972                         add_data_backref(extent_cache, key.objectid, offset,
7973                                         0, 0, 0,
7974                                         btrfs_shared_data_ref_count(eb, sref),
7975                                         0, num_bytes);
7976                         break;
7977                 default:
7978                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7979                                 key.objectid, key.type, num_bytes);
7980                         goto out;
7981                 }
7982                 ptr += btrfs_extent_inline_ref_size(type);
7983         }
7984         WARN_ON(ptr > end);
7985 out:
7986         return 0;
7987 }
7988
7989 static int check_cache_range(struct btrfs_root *root,
7990                              struct btrfs_block_group_cache *cache,
7991                              u64 offset, u64 bytes)
7992 {
7993         struct btrfs_free_space *entry;
7994         u64 *logical;
7995         u64 bytenr;
7996         int stripe_len;
7997         int i, nr, ret;
7998
7999         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8000                 bytenr = btrfs_sb_offset(i);
8001                 ret = btrfs_rmap_block(root->fs_info,
8002                                        cache->key.objectid, bytenr, 0,
8003                                        &logical, &nr, &stripe_len);
8004                 if (ret)
8005                         return ret;
8006
8007                 while (nr--) {
8008                         if (logical[nr] + stripe_len <= offset)
8009                                 continue;
8010                         if (offset + bytes <= logical[nr])
8011                                 continue;
8012                         if (logical[nr] == offset) {
8013                                 if (stripe_len >= bytes) {
8014                                         free(logical);
8015                                         return 0;
8016                                 }
8017                                 bytes -= stripe_len;
8018                                 offset += stripe_len;
8019                         } else if (logical[nr] < offset) {
8020                                 if (logical[nr] + stripe_len >=
8021                                     offset + bytes) {
8022                                         free(logical);
8023                                         return 0;
8024                                 }
8025                                 bytes = (offset + bytes) -
8026                                         (logical[nr] + stripe_len);
8027                                 offset = logical[nr] + stripe_len;
8028                         } else {
8029                                 /*
8030                                  * Could be tricky, the super may land in the
8031                                  * middle of the area we're checking.  First
8032                                  * check the easiest case, it's at the end.
8033                                  */
8034                                 if (logical[nr] + stripe_len >=
8035                                     bytes + offset) {
8036                                         bytes = logical[nr] - offset;
8037                                         continue;
8038                                 }
8039
8040                                 /* Check the left side */
8041                                 ret = check_cache_range(root, cache,
8042                                                         offset,
8043                                                         logical[nr] - offset);
8044                                 if (ret) {
8045                                         free(logical);
8046                                         return ret;
8047                                 }
8048
8049                                 /* Now we continue with the right side */
8050                                 bytes = (offset + bytes) -
8051                                         (logical[nr] + stripe_len);
8052                                 offset = logical[nr] + stripe_len;
8053                         }
8054                 }
8055
8056                 free(logical);
8057         }
8058
8059         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8060         if (!entry) {
8061                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8062                         offset, offset+bytes);
8063                 return -EINVAL;
8064         }
8065
8066         if (entry->offset != offset) {
8067                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8068                         entry->offset);
8069                 return -EINVAL;
8070         }
8071
8072         if (entry->bytes != bytes) {
8073                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8074                         bytes, entry->bytes, offset);
8075                 return -EINVAL;
8076         }
8077
8078         unlink_free_space(cache->free_space_ctl, entry);
8079         free(entry);
8080         return 0;
8081 }
8082
8083 static int verify_space_cache(struct btrfs_root *root,
8084                               struct btrfs_block_group_cache *cache)
8085 {
8086         struct btrfs_path path;
8087         struct extent_buffer *leaf;
8088         struct btrfs_key key;
8089         u64 last;
8090         int ret = 0;
8091
8092         root = root->fs_info->extent_root;
8093
8094         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8095
8096         btrfs_init_path(&path);
8097         key.objectid = last;
8098         key.offset = 0;
8099         key.type = BTRFS_EXTENT_ITEM_KEY;
8100         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8101         if (ret < 0)
8102                 goto out;
8103         ret = 0;
8104         while (1) {
8105                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8106                         ret = btrfs_next_leaf(root, &path);
8107                         if (ret < 0)
8108                                 goto out;
8109                         if (ret > 0) {
8110                                 ret = 0;
8111                                 break;
8112                         }
8113                 }
8114                 leaf = path.nodes[0];
8115                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8116                 if (key.objectid >= cache->key.offset + cache->key.objectid)
8117                         break;
8118                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8119                     key.type != BTRFS_METADATA_ITEM_KEY) {
8120                         path.slots[0]++;
8121                         continue;
8122                 }
8123
8124                 if (last == key.objectid) {
8125                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
8126                                 last = key.objectid + key.offset;
8127                         else
8128                                 last = key.objectid + root->fs_info->nodesize;
8129                         path.slots[0]++;
8130                         continue;
8131                 }
8132
8133                 ret = check_cache_range(root, cache, last,
8134                                         key.objectid - last);
8135                 if (ret)
8136                         break;
8137                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8138                         last = key.objectid + key.offset;
8139                 else
8140                         last = key.objectid + root->fs_info->nodesize;
8141                 path.slots[0]++;
8142         }
8143
8144         if (last < cache->key.objectid + cache->key.offset)
8145                 ret = check_cache_range(root, cache, last,
8146                                         cache->key.objectid +
8147                                         cache->key.offset - last);
8148
8149 out:
8150         btrfs_release_path(&path);
8151
8152         if (!ret &&
8153             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8154                 fprintf(stderr, "There are still entries left in the space "
8155                         "cache\n");
8156                 ret = -EINVAL;
8157         }
8158
8159         return ret;
8160 }
8161
8162 static int check_space_cache(struct btrfs_root *root)
8163 {
8164         struct btrfs_block_group_cache *cache;
8165         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8166         int ret;
8167         int error = 0;
8168
8169         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8170             btrfs_super_generation(root->fs_info->super_copy) !=
8171             btrfs_super_cache_generation(root->fs_info->super_copy)) {
8172                 printf("cache and super generation don't match, space cache "
8173                        "will be invalidated\n");
8174                 return 0;
8175         }
8176
8177         if (ctx.progress_enabled) {
8178                 ctx.tp = TASK_FREE_SPACE;
8179                 task_start(ctx.info);
8180         }
8181
8182         while (1) {
8183                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8184                 if (!cache)
8185                         break;
8186
8187                 start = cache->key.objectid + cache->key.offset;
8188                 if (!cache->free_space_ctl) {
8189                         if (btrfs_init_free_space_ctl(cache,
8190                                                 root->fs_info->sectorsize)) {
8191                                 ret = -ENOMEM;
8192                                 break;
8193                         }
8194                 } else {
8195                         btrfs_remove_free_space_cache(cache);
8196                 }
8197
8198                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8199                         ret = exclude_super_stripes(root, cache);
8200                         if (ret) {
8201                                 fprintf(stderr, "could not exclude super stripes: %s\n",
8202                                         strerror(-ret));
8203                                 error++;
8204                                 continue;
8205                         }
8206                         ret = load_free_space_tree(root->fs_info, cache);
8207                         free_excluded_extents(root, cache);
8208                         if (ret < 0) {
8209                                 fprintf(stderr, "could not load free space tree: %s\n",
8210                                         strerror(-ret));
8211                                 error++;
8212                                 continue;
8213                         }
8214                         error += ret;
8215                 } else {
8216                         ret = load_free_space_cache(root->fs_info, cache);
8217                         if (!ret)
8218                                 continue;
8219                 }
8220
8221                 ret = verify_space_cache(root, cache);
8222                 if (ret) {
8223                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8224                                 cache->key.objectid);
8225                         error++;
8226                 }
8227         }
8228
8229         task_stop(ctx.info);
8230
8231         return error ? -EINVAL : 0;
8232 }
8233
8234 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8235                         u64 num_bytes, unsigned long leaf_offset,
8236                         struct extent_buffer *eb) {
8237
8238         struct btrfs_fs_info *fs_info = root->fs_info;
8239         u64 offset = 0;
8240         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8241         char *data;
8242         unsigned long csum_offset;
8243         u32 csum;
8244         u32 csum_expected;
8245         u64 read_len;
8246         u64 data_checked = 0;
8247         u64 tmp;
8248         int ret = 0;
8249         int mirror;
8250         int num_copies;
8251
8252         if (num_bytes % fs_info->sectorsize)
8253                 return -EINVAL;
8254
8255         data = malloc(num_bytes);
8256         if (!data)
8257                 return -ENOMEM;
8258
8259         while (offset < num_bytes) {
8260                 mirror = 0;
8261 again:
8262                 read_len = num_bytes - offset;
8263                 /* read as much space once a time */
8264                 ret = read_extent_data(fs_info, data + offset,
8265                                 bytenr + offset, &read_len, mirror);
8266                 if (ret)
8267                         goto out;
8268                 data_checked = 0;
8269                 /* verify every 4k data's checksum */
8270                 while (data_checked < read_len) {
8271                         csum = ~(u32)0;
8272                         tmp = offset + data_checked;
8273
8274                         csum = btrfs_csum_data((char *)data + tmp,
8275                                                csum, fs_info->sectorsize);
8276                         btrfs_csum_final(csum, (u8 *)&csum);
8277
8278                         csum_offset = leaf_offset +
8279                                  tmp / fs_info->sectorsize * csum_size;
8280                         read_extent_buffer(eb, (char *)&csum_expected,
8281                                            csum_offset, csum_size);
8282                         /* try another mirror */
8283                         if (csum != csum_expected) {
8284                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8285                                                 mirror, bytenr + tmp,
8286                                                 csum, csum_expected);
8287                                 num_copies = btrfs_num_copies(root->fs_info,
8288                                                 bytenr, num_bytes);
8289                                 if (mirror < num_copies - 1) {
8290                                         mirror += 1;
8291                                         goto again;
8292                                 }
8293                         }
8294                         data_checked += fs_info->sectorsize;
8295                 }
8296                 offset += read_len;
8297         }
8298 out:
8299         free(data);
8300         return ret;
8301 }
8302
8303 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8304                                u64 num_bytes)
8305 {
8306         struct btrfs_path path;
8307         struct extent_buffer *leaf;
8308         struct btrfs_key key;
8309         int ret;
8310
8311         btrfs_init_path(&path);
8312         key.objectid = bytenr;
8313         key.type = BTRFS_EXTENT_ITEM_KEY;
8314         key.offset = (u64)-1;
8315
8316 again:
8317         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8318                                 0, 0);
8319         if (ret < 0) {
8320                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8321                 btrfs_release_path(&path);
8322                 return ret;
8323         } else if (ret) {
8324                 if (path.slots[0] > 0) {
8325                         path.slots[0]--;
8326                 } else {
8327                         ret = btrfs_prev_leaf(root, &path);
8328                         if (ret < 0) {
8329                                 goto out;
8330                         } else if (ret > 0) {
8331                                 ret = 0;
8332                                 goto out;
8333                         }
8334                 }
8335         }
8336
8337         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8338
8339         /*
8340          * Block group items come before extent items if they have the same
8341          * bytenr, so walk back one more just in case.  Dear future traveller,
8342          * first congrats on mastering time travel.  Now if it's not too much
8343          * trouble could you go back to 2006 and tell Chris to make the
8344          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8345          * EXTENT_ITEM_KEY please?
8346          */
8347         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8348                 if (path.slots[0] > 0) {
8349                         path.slots[0]--;
8350                 } else {
8351                         ret = btrfs_prev_leaf(root, &path);
8352                         if (ret < 0) {
8353                                 goto out;
8354                         } else if (ret > 0) {
8355                                 ret = 0;
8356                                 goto out;
8357                         }
8358                 }
8359                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8360         }
8361
8362         while (num_bytes) {
8363                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8364                         ret = btrfs_next_leaf(root, &path);
8365                         if (ret < 0) {
8366                                 fprintf(stderr, "Error going to next leaf "
8367                                         "%d\n", ret);
8368                                 btrfs_release_path(&path);
8369                                 return ret;
8370                         } else if (ret) {
8371                                 break;
8372                         }
8373                 }
8374                 leaf = path.nodes[0];
8375                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8376                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8377                         path.slots[0]++;
8378                         continue;
8379                 }
8380                 if (key.objectid + key.offset < bytenr) {
8381                         path.slots[0]++;
8382                         continue;
8383                 }
8384                 if (key.objectid > bytenr + num_bytes)
8385                         break;
8386
8387                 if (key.objectid == bytenr) {
8388                         if (key.offset >= num_bytes) {
8389                                 num_bytes = 0;
8390                                 break;
8391                         }
8392                         num_bytes -= key.offset;
8393                         bytenr += key.offset;
8394                 } else if (key.objectid < bytenr) {
8395                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8396                                 num_bytes = 0;
8397                                 break;
8398                         }
8399                         num_bytes = (bytenr + num_bytes) -
8400                                 (key.objectid + key.offset);
8401                         bytenr = key.objectid + key.offset;
8402                 } else {
8403                         if (key.objectid + key.offset < bytenr + num_bytes) {
8404                                 u64 new_start = key.objectid + key.offset;
8405                                 u64 new_bytes = bytenr + num_bytes - new_start;
8406
8407                                 /*
8408                                  * Weird case, the extent is in the middle of
8409                                  * our range, we'll have to search one side
8410                                  * and then the other.  Not sure if this happens
8411                                  * in real life, but no harm in coding it up
8412                                  * anyway just in case.
8413                                  */
8414                                 btrfs_release_path(&path);
8415                                 ret = check_extent_exists(root, new_start,
8416                                                           new_bytes);
8417                                 if (ret) {
8418                                         fprintf(stderr, "Right section didn't "
8419                                                 "have a record\n");
8420                                         break;
8421                                 }
8422                                 num_bytes = key.objectid - bytenr;
8423                                 goto again;
8424                         }
8425                         num_bytes = key.objectid - bytenr;
8426                 }
8427                 path.slots[0]++;
8428         }
8429         ret = 0;
8430
8431 out:
8432         if (num_bytes && !ret) {
8433                 fprintf(stderr, "There are no extents for csum range "
8434                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8435                 ret = 1;
8436         }
8437
8438         btrfs_release_path(&path);
8439         return ret;
8440 }
8441
8442 static int check_csums(struct btrfs_root *root)
8443 {
8444         struct btrfs_path path;
8445         struct extent_buffer *leaf;
8446         struct btrfs_key key;
8447         u64 offset = 0, num_bytes = 0;
8448         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8449         int errors = 0;
8450         int ret;
8451         u64 data_len;
8452         unsigned long leaf_offset;
8453
8454         root = root->fs_info->csum_root;
8455         if (!extent_buffer_uptodate(root->node)) {
8456                 fprintf(stderr, "No valid csum tree found\n");
8457                 return -ENOENT;
8458         }
8459
8460         btrfs_init_path(&path);
8461         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8462         key.type = BTRFS_EXTENT_CSUM_KEY;
8463         key.offset = 0;
8464         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8465         if (ret < 0) {
8466                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8467                 btrfs_release_path(&path);
8468                 return ret;
8469         }
8470
8471         if (ret > 0 && path.slots[0])
8472                 path.slots[0]--;
8473         ret = 0;
8474
8475         while (1) {
8476                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8477                         ret = btrfs_next_leaf(root, &path);
8478                         if (ret < 0) {
8479                                 fprintf(stderr, "Error going to next leaf "
8480                                         "%d\n", ret);
8481                                 break;
8482                         }
8483                         if (ret)
8484                                 break;
8485                 }
8486                 leaf = path.nodes[0];
8487
8488                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8489                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8490                         path.slots[0]++;
8491                         continue;
8492                 }
8493
8494                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8495                               csum_size) * root->fs_info->sectorsize;
8496                 if (!check_data_csum)
8497                         goto skip_csum_check;
8498                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8499                 ret = check_extent_csums(root, key.offset, data_len,
8500                                          leaf_offset, leaf);
8501                 if (ret)
8502                         break;
8503 skip_csum_check:
8504                 if (!num_bytes) {
8505                         offset = key.offset;
8506                 } else if (key.offset != offset + num_bytes) {
8507                         ret = check_extent_exists(root, offset, num_bytes);
8508                         if (ret) {
8509                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8510                                         "there is no extent record\n",
8511                                         offset, offset+num_bytes);
8512                                 errors++;
8513                         }
8514                         offset = key.offset;
8515                         num_bytes = 0;
8516                 }
8517                 num_bytes += data_len;
8518                 path.slots[0]++;
8519         }
8520
8521         btrfs_release_path(&path);
8522         return errors;
8523 }
8524
8525 static int is_dropped_key(struct btrfs_key *key,
8526                           struct btrfs_key *drop_key) {
8527         if (key->objectid < drop_key->objectid)
8528                 return 1;
8529         else if (key->objectid == drop_key->objectid) {
8530                 if (key->type < drop_key->type)
8531                         return 1;
8532                 else if (key->type == drop_key->type) {
8533                         if (key->offset < drop_key->offset)
8534                                 return 1;
8535                 }
8536         }
8537         return 0;
8538 }
8539
8540 /*
8541  * Here are the rules for FULL_BACKREF.
8542  *
8543  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8544  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8545  *      FULL_BACKREF set.
8546  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
8547  *    if it happened after the relocation occurred since we'll have dropped the
8548  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8549  *    have no real way to know for sure.
8550  *
8551  * We process the blocks one root at a time, and we start from the lowest root
8552  * objectid and go to the highest.  So we can just lookup the owner backref for
8553  * the record and if we don't find it then we know it doesn't exist and we have
8554  * a FULL BACKREF.
8555  *
8556  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8557  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8558  * be set or not and then we can check later once we've gathered all the refs.
8559  */
8560 static int calc_extent_flag(struct cache_tree *extent_cache,
8561                            struct extent_buffer *buf,
8562                            struct root_item_record *ri,
8563                            u64 *flags)
8564 {
8565         struct extent_record *rec;
8566         struct cache_extent *cache;
8567         struct tree_backref *tback;
8568         u64 owner = 0;
8569
8570         cache = lookup_cache_extent(extent_cache, buf->start, 1);
8571         /* we have added this extent before */
8572         if (!cache)
8573                 return -ENOENT;
8574
8575         rec = container_of(cache, struct extent_record, cache);
8576
8577         /*
8578          * Except file/reloc tree, we can not have
8579          * FULL BACKREF MODE
8580          */
8581         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8582                 goto normal;
8583         /*
8584          * root node
8585          */
8586         if (buf->start == ri->bytenr)
8587                 goto normal;
8588
8589         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8590                 goto full_backref;
8591
8592         owner = btrfs_header_owner(buf);
8593         if (owner == ri->objectid)
8594                 goto normal;
8595
8596         tback = find_tree_backref(rec, 0, owner);
8597         if (!tback)
8598                 goto full_backref;
8599 normal:
8600         *flags = 0;
8601         if (rec->flag_block_full_backref != FLAG_UNSET &&
8602             rec->flag_block_full_backref != 0)
8603                 rec->bad_full_backref = 1;
8604         return 0;
8605 full_backref:
8606         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8607         if (rec->flag_block_full_backref != FLAG_UNSET &&
8608             rec->flag_block_full_backref != 1)
8609                 rec->bad_full_backref = 1;
8610         return 0;
8611 }
8612
8613 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8614 {
8615         fprintf(stderr, "Invalid key type(");
8616         print_key_type(stderr, 0, key_type);
8617         fprintf(stderr, ") found in root(");
8618         print_objectid(stderr, rootid, 0);
8619         fprintf(stderr, ")\n");
8620 }
8621
8622 /*
8623  * Check if the key is valid with its extent buffer.
8624  *
8625  * This is a early check in case invalid key exists in a extent buffer
8626  * This is not comprehensive yet, but should prevent wrong key/item passed
8627  * further
8628  */
8629 static int check_type_with_root(u64 rootid, u8 key_type)
8630 {
8631         switch (key_type) {
8632         /* Only valid in chunk tree */
8633         case BTRFS_DEV_ITEM_KEY:
8634         case BTRFS_CHUNK_ITEM_KEY:
8635                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8636                         goto err;
8637                 break;
8638         /* valid in csum and log tree */
8639         case BTRFS_CSUM_TREE_OBJECTID:
8640                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8641                       is_fstree(rootid)))
8642                         goto err;
8643                 break;
8644         case BTRFS_EXTENT_ITEM_KEY:
8645         case BTRFS_METADATA_ITEM_KEY:
8646         case BTRFS_BLOCK_GROUP_ITEM_KEY:
8647                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8648                         goto err;
8649                 break;
8650         case BTRFS_ROOT_ITEM_KEY:
8651                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8652                         goto err;
8653                 break;
8654         case BTRFS_DEV_EXTENT_KEY:
8655                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8656                         goto err;
8657                 break;
8658         }
8659         return 0;
8660 err:
8661         report_mismatch_key_root(key_type, rootid);
8662         return -EINVAL;
8663 }
8664
8665 static int run_next_block(struct btrfs_root *root,
8666                           struct block_info *bits,
8667                           int bits_nr,
8668                           u64 *last,
8669                           struct cache_tree *pending,
8670                           struct cache_tree *seen,
8671                           struct cache_tree *reada,
8672                           struct cache_tree *nodes,
8673                           struct cache_tree *extent_cache,
8674                           struct cache_tree *chunk_cache,
8675                           struct rb_root *dev_cache,
8676                           struct block_group_tree *block_group_cache,
8677                           struct device_extent_tree *dev_extent_cache,
8678                           struct root_item_record *ri)
8679 {
8680         struct btrfs_fs_info *fs_info = root->fs_info;
8681         struct extent_buffer *buf;
8682         struct extent_record *rec = NULL;
8683         u64 bytenr;
8684         u32 size;
8685         u64 parent;
8686         u64 owner;
8687         u64 flags;
8688         u64 ptr;
8689         u64 gen = 0;
8690         int ret = 0;
8691         int i;
8692         int nritems;
8693         struct btrfs_key key;
8694         struct cache_extent *cache;
8695         int reada_bits;
8696
8697         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8698                                     bits_nr, &reada_bits);
8699         if (nritems == 0)
8700                 return 1;
8701
8702         if (!reada_bits) {
8703                 for(i = 0; i < nritems; i++) {
8704                         ret = add_cache_extent(reada, bits[i].start,
8705                                                bits[i].size);
8706                         if (ret == -EEXIST)
8707                                 continue;
8708
8709                         /* fixme, get the parent transid */
8710                         readahead_tree_block(fs_info, bits[i].start, 0);
8711                 }
8712         }
8713         *last = bits[0].start;
8714         bytenr = bits[0].start;
8715         size = bits[0].size;
8716
8717         cache = lookup_cache_extent(pending, bytenr, size);
8718         if (cache) {
8719                 remove_cache_extent(pending, cache);
8720                 free(cache);
8721         }
8722         cache = lookup_cache_extent(reada, bytenr, size);
8723         if (cache) {
8724                 remove_cache_extent(reada, cache);
8725                 free(cache);
8726         }
8727         cache = lookup_cache_extent(nodes, bytenr, size);
8728         if (cache) {
8729                 remove_cache_extent(nodes, cache);
8730                 free(cache);
8731         }
8732         cache = lookup_cache_extent(extent_cache, bytenr, size);
8733         if (cache) {
8734                 rec = container_of(cache, struct extent_record, cache);
8735                 gen = rec->parent_generation;
8736         }
8737
8738         /* fixme, get the real parent transid */
8739         buf = read_tree_block(root->fs_info, bytenr, gen);
8740         if (!extent_buffer_uptodate(buf)) {
8741                 record_bad_block_io(root->fs_info,
8742                                     extent_cache, bytenr, size);
8743                 goto out;
8744         }
8745
8746         nritems = btrfs_header_nritems(buf);
8747
8748         flags = 0;
8749         if (!init_extent_tree) {
8750                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8751                                        btrfs_header_level(buf), 1, NULL,
8752                                        &flags);
8753                 if (ret < 0) {
8754                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8755                         if (ret < 0) {
8756                                 fprintf(stderr, "Couldn't calc extent flags\n");
8757                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8758                         }
8759                 }
8760         } else {
8761                 flags = 0;
8762                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8763                 if (ret < 0) {
8764                         fprintf(stderr, "Couldn't calc extent flags\n");
8765                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8766                 }
8767         }
8768
8769         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8770                 if (ri != NULL &&
8771                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8772                     ri->objectid == btrfs_header_owner(buf)) {
8773                         /*
8774                          * Ok we got to this block from it's original owner and
8775                          * we have FULL_BACKREF set.  Relocation can leave
8776                          * converted blocks over so this is altogether possible,
8777                          * however it's not possible if the generation > the
8778                          * last snapshot, so check for this case.
8779                          */
8780                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8781                             btrfs_header_generation(buf) > ri->last_snapshot) {
8782                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8783                                 rec->bad_full_backref = 1;
8784                         }
8785                 }
8786         } else {
8787                 if (ri != NULL &&
8788                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8789                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8790                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8791                         rec->bad_full_backref = 1;
8792                 }
8793         }
8794
8795         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8796                 rec->flag_block_full_backref = 1;
8797                 parent = bytenr;
8798                 owner = 0;
8799         } else {
8800                 rec->flag_block_full_backref = 0;
8801                 parent = 0;
8802                 owner = btrfs_header_owner(buf);
8803         }
8804
8805         ret = check_block(root, extent_cache, buf, flags);
8806         if (ret)
8807                 goto out;
8808
8809         if (btrfs_is_leaf(buf)) {
8810                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8811                 for (i = 0; i < nritems; i++) {
8812                         struct btrfs_file_extent_item *fi;
8813                         btrfs_item_key_to_cpu(buf, &key, i);
8814                         /*
8815                          * Check key type against the leaf owner.
8816                          * Could filter quite a lot of early error if
8817                          * owner is correct
8818                          */
8819                         if (check_type_with_root(btrfs_header_owner(buf),
8820                                                  key.type)) {
8821                                 fprintf(stderr, "ignoring invalid key\n");
8822                                 continue;
8823                         }
8824                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8825                                 process_extent_item(root, extent_cache, buf,
8826                                                     i);
8827                                 continue;
8828                         }
8829                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8830                                 process_extent_item(root, extent_cache, buf,
8831                                                     i);
8832                                 continue;
8833                         }
8834                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8835                                 total_csum_bytes +=
8836                                         btrfs_item_size_nr(buf, i);
8837                                 continue;
8838                         }
8839                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8840                                 process_chunk_item(chunk_cache, &key, buf, i);
8841                                 continue;
8842                         }
8843                         if (key.type == BTRFS_DEV_ITEM_KEY) {
8844                                 process_device_item(dev_cache, &key, buf, i);
8845                                 continue;
8846                         }
8847                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8848                                 process_block_group_item(block_group_cache,
8849                                         &key, buf, i);
8850                                 continue;
8851                         }
8852                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
8853                                 process_device_extent_item(dev_extent_cache,
8854                                         &key, buf, i);
8855                                 continue;
8856
8857                         }
8858                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8859 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8860                                 process_extent_ref_v0(extent_cache, buf, i);
8861 #else
8862                                 BUG();
8863 #endif
8864                                 continue;
8865                         }
8866
8867                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8868                                 ret = add_tree_backref(extent_cache,
8869                                                 key.objectid, 0, key.offset, 0);
8870                                 if (ret < 0)
8871                                         error(
8872                                 "add_tree_backref failed (leaf tree block): %s",
8873                                               strerror(-ret));
8874                                 continue;
8875                         }
8876                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8877                                 ret = add_tree_backref(extent_cache,
8878                                                 key.objectid, key.offset, 0, 0);
8879                                 if (ret < 0)
8880                                         error(
8881                                 "add_tree_backref failed (leaf shared block): %s",
8882                                               strerror(-ret));
8883                                 continue;
8884                         }
8885                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8886                                 struct btrfs_extent_data_ref *ref;
8887                                 ref = btrfs_item_ptr(buf, i,
8888                                                 struct btrfs_extent_data_ref);
8889                                 add_data_backref(extent_cache,
8890                                         key.objectid, 0,
8891                                         btrfs_extent_data_ref_root(buf, ref),
8892                                         btrfs_extent_data_ref_objectid(buf,
8893                                                                        ref),
8894                                         btrfs_extent_data_ref_offset(buf, ref),
8895                                         btrfs_extent_data_ref_count(buf, ref),
8896                                         0, root->fs_info->sectorsize);
8897                                 continue;
8898                         }
8899                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8900                                 struct btrfs_shared_data_ref *ref;
8901                                 ref = btrfs_item_ptr(buf, i,
8902                                                 struct btrfs_shared_data_ref);
8903                                 add_data_backref(extent_cache,
8904                                         key.objectid, key.offset, 0, 0, 0,
8905                                         btrfs_shared_data_ref_count(buf, ref),
8906                                         0, root->fs_info->sectorsize);
8907                                 continue;
8908                         }
8909                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8910                                 struct bad_item *bad;
8911
8912                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8913                                         continue;
8914                                 if (!owner)
8915                                         continue;
8916                                 bad = malloc(sizeof(struct bad_item));
8917                                 if (!bad)
8918                                         continue;
8919                                 INIT_LIST_HEAD(&bad->list);
8920                                 memcpy(&bad->key, &key,
8921                                        sizeof(struct btrfs_key));
8922                                 bad->root_id = owner;
8923                                 list_add_tail(&bad->list, &delete_items);
8924                                 continue;
8925                         }
8926                         if (key.type != BTRFS_EXTENT_DATA_KEY)
8927                                 continue;
8928                         fi = btrfs_item_ptr(buf, i,
8929                                             struct btrfs_file_extent_item);
8930                         if (btrfs_file_extent_type(buf, fi) ==
8931                             BTRFS_FILE_EXTENT_INLINE)
8932                                 continue;
8933                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8934                                 continue;
8935
8936                         data_bytes_allocated +=
8937                                 btrfs_file_extent_disk_num_bytes(buf, fi);
8938                         if (data_bytes_allocated < root->fs_info->sectorsize) {
8939                                 abort();
8940                         }
8941                         data_bytes_referenced +=
8942                                 btrfs_file_extent_num_bytes(buf, fi);
8943                         add_data_backref(extent_cache,
8944                                 btrfs_file_extent_disk_bytenr(buf, fi),
8945                                 parent, owner, key.objectid, key.offset -
8946                                 btrfs_file_extent_offset(buf, fi), 1, 1,
8947                                 btrfs_file_extent_disk_num_bytes(buf, fi));
8948                 }
8949         } else {
8950                 int level;
8951                 struct btrfs_key first_key;
8952
8953                 first_key.objectid = 0;
8954
8955                 if (nritems > 0)
8956                         btrfs_item_key_to_cpu(buf, &first_key, 0);
8957                 level = btrfs_header_level(buf);
8958                 for (i = 0; i < nritems; i++) {
8959                         struct extent_record tmpl;
8960
8961                         ptr = btrfs_node_blockptr(buf, i);
8962                         size = root->fs_info->nodesize;
8963                         btrfs_node_key_to_cpu(buf, &key, i);
8964                         if (ri != NULL) {
8965                                 if ((level == ri->drop_level)
8966                                     && is_dropped_key(&key, &ri->drop_key)) {
8967                                         continue;
8968                                 }
8969                         }
8970
8971                         memset(&tmpl, 0, sizeof(tmpl));
8972                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8973                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8974                         tmpl.start = ptr;
8975                         tmpl.nr = size;
8976                         tmpl.refs = 1;
8977                         tmpl.metadata = 1;
8978                         tmpl.max_size = size;
8979                         ret = add_extent_rec(extent_cache, &tmpl);
8980                         if (ret < 0)
8981                                 goto out;
8982
8983                         ret = add_tree_backref(extent_cache, ptr, parent,
8984                                         owner, 1);
8985                         if (ret < 0) {
8986                                 error(
8987                                 "add_tree_backref failed (non-leaf block): %s",
8988                                       strerror(-ret));
8989                                 continue;
8990                         }
8991
8992                         if (level > 1) {
8993                                 add_pending(nodes, seen, ptr, size);
8994                         } else {
8995                                 add_pending(pending, seen, ptr, size);
8996                         }
8997                 }
8998                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8999                                       nritems) * sizeof(struct btrfs_key_ptr);
9000         }
9001         total_btree_bytes += buf->len;
9002         if (fs_root_objectid(btrfs_header_owner(buf)))
9003                 total_fs_tree_bytes += buf->len;
9004         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9005                 total_extent_tree_bytes += buf->len;
9006 out:
9007         free_extent_buffer(buf);
9008         return ret;
9009 }
9010
9011 static int add_root_to_pending(struct extent_buffer *buf,
9012                                struct cache_tree *extent_cache,
9013                                struct cache_tree *pending,
9014                                struct cache_tree *seen,
9015                                struct cache_tree *nodes,
9016                                u64 objectid)
9017 {
9018         struct extent_record tmpl;
9019         int ret;
9020
9021         if (btrfs_header_level(buf) > 0)
9022                 add_pending(nodes, seen, buf->start, buf->len);
9023         else
9024                 add_pending(pending, seen, buf->start, buf->len);
9025
9026         memset(&tmpl, 0, sizeof(tmpl));
9027         tmpl.start = buf->start;
9028         tmpl.nr = buf->len;
9029         tmpl.is_root = 1;
9030         tmpl.refs = 1;
9031         tmpl.metadata = 1;
9032         tmpl.max_size = buf->len;
9033         add_extent_rec(extent_cache, &tmpl);
9034
9035         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9036             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9037                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9038                                 0, 1);
9039         else
9040                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9041                                 1);
9042         return ret;
9043 }
9044
9045 /* as we fix the tree, we might be deleting blocks that
9046  * we're tracking for repair.  This hook makes sure we
9047  * remove any backrefs for blocks as we are fixing them.
9048  */
9049 static int free_extent_hook(struct btrfs_trans_handle *trans,
9050                             struct btrfs_root *root,
9051                             u64 bytenr, u64 num_bytes, u64 parent,
9052                             u64 root_objectid, u64 owner, u64 offset,
9053                             int refs_to_drop)
9054 {
9055         struct extent_record *rec;
9056         struct cache_extent *cache;
9057         int is_data;
9058         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9059
9060         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9061         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9062         if (!cache)
9063                 return 0;
9064
9065         rec = container_of(cache, struct extent_record, cache);
9066         if (is_data) {
9067                 struct data_backref *back;
9068                 back = find_data_backref(rec, parent, root_objectid, owner,
9069                                          offset, 1, bytenr, num_bytes);
9070                 if (!back)
9071                         goto out;
9072                 if (back->node.found_ref) {
9073                         back->found_ref -= refs_to_drop;
9074                         if (rec->refs)
9075                                 rec->refs -= refs_to_drop;
9076                 }
9077                 if (back->node.found_extent_tree) {
9078                         back->num_refs -= refs_to_drop;
9079                         if (rec->extent_item_refs)
9080                                 rec->extent_item_refs -= refs_to_drop;
9081                 }
9082                 if (back->found_ref == 0)
9083                         back->node.found_ref = 0;
9084                 if (back->num_refs == 0)
9085                         back->node.found_extent_tree = 0;
9086
9087                 if (!back->node.found_extent_tree && back->node.found_ref) {
9088                         rb_erase(&back->node.node, &rec->backref_tree);
9089                         free(back);
9090                 }
9091         } else {
9092                 struct tree_backref *back;
9093                 back = find_tree_backref(rec, parent, root_objectid);
9094                 if (!back)
9095                         goto out;
9096                 if (back->node.found_ref) {
9097                         if (rec->refs)
9098                                 rec->refs--;
9099                         back->node.found_ref = 0;
9100                 }
9101                 if (back->node.found_extent_tree) {
9102                         if (rec->extent_item_refs)
9103                                 rec->extent_item_refs--;
9104                         back->node.found_extent_tree = 0;
9105                 }
9106                 if (!back->node.found_extent_tree && back->node.found_ref) {
9107                         rb_erase(&back->node.node, &rec->backref_tree);
9108                         free(back);
9109                 }
9110         }
9111         maybe_free_extent_rec(extent_cache, rec);
9112 out:
9113         return 0;
9114 }
9115
9116 static int delete_extent_records(struct btrfs_trans_handle *trans,
9117                                  struct btrfs_root *root,
9118                                  struct btrfs_path *path,
9119                                  u64 bytenr)
9120 {
9121         struct btrfs_key key;
9122         struct btrfs_key found_key;
9123         struct extent_buffer *leaf;
9124         int ret;
9125         int slot;
9126
9127
9128         key.objectid = bytenr;
9129         key.type = (u8)-1;
9130         key.offset = (u64)-1;
9131
9132         while(1) {
9133                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9134                                         &key, path, 0, 1);
9135                 if (ret < 0)
9136                         break;
9137
9138                 if (ret > 0) {
9139                         ret = 0;
9140                         if (path->slots[0] == 0)
9141                                 break;
9142                         path->slots[0]--;
9143                 }
9144                 ret = 0;
9145
9146                 leaf = path->nodes[0];
9147                 slot = path->slots[0];
9148
9149                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9150                 if (found_key.objectid != bytenr)
9151                         break;
9152
9153                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9154                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
9155                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9156                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9157                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9158                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9159                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9160                         btrfs_release_path(path);
9161                         if (found_key.type == 0) {
9162                                 if (found_key.offset == 0)
9163                                         break;
9164                                 key.offset = found_key.offset - 1;
9165                                 key.type = found_key.type;
9166                         }
9167                         key.type = found_key.type - 1;
9168                         key.offset = (u64)-1;
9169                         continue;
9170                 }
9171
9172                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9173                         found_key.objectid, found_key.type, found_key.offset);
9174
9175                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9176                 if (ret)
9177                         break;
9178                 btrfs_release_path(path);
9179
9180                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9181                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
9182                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9183                                 found_key.offset : root->fs_info->nodesize;
9184
9185                         ret = btrfs_update_block_group(trans, root, bytenr,
9186                                                        bytes, 0, 0);
9187                         if (ret)
9188                                 break;
9189                 }
9190         }
9191
9192         btrfs_release_path(path);
9193         return ret;
9194 }
9195
9196 /*
9197  * for a single backref, this will allocate a new extent
9198  * and add the backref to it.
9199  */
9200 static int record_extent(struct btrfs_trans_handle *trans,
9201                          struct btrfs_fs_info *info,
9202                          struct btrfs_path *path,
9203                          struct extent_record *rec,
9204                          struct extent_backref *back,
9205                          int allocated, u64 flags)
9206 {
9207         int ret = 0;
9208         struct btrfs_root *extent_root = info->extent_root;
9209         struct extent_buffer *leaf;
9210         struct btrfs_key ins_key;
9211         struct btrfs_extent_item *ei;
9212         struct data_backref *dback;
9213         struct btrfs_tree_block_info *bi;
9214
9215         if (!back->is_data)
9216                 rec->max_size = max_t(u64, rec->max_size,
9217                                     info->nodesize);
9218
9219         if (!allocated) {
9220                 u32 item_size = sizeof(*ei);
9221
9222                 if (!back->is_data)
9223                         item_size += sizeof(*bi);
9224
9225                 ins_key.objectid = rec->start;
9226                 ins_key.offset = rec->max_size;
9227                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9228
9229                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9230                                         &ins_key, item_size);
9231                 if (ret)
9232                         goto fail;
9233
9234                 leaf = path->nodes[0];
9235                 ei = btrfs_item_ptr(leaf, path->slots[0],
9236                                     struct btrfs_extent_item);
9237
9238                 btrfs_set_extent_refs(leaf, ei, 0);
9239                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9240
9241                 if (back->is_data) {
9242                         btrfs_set_extent_flags(leaf, ei,
9243                                                BTRFS_EXTENT_FLAG_DATA);
9244                 } else {
9245                         struct btrfs_disk_key copy_key;;
9246
9247                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9248                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9249                                              sizeof(*bi));
9250
9251                         btrfs_set_disk_key_objectid(&copy_key,
9252                                                     rec->info_objectid);
9253                         btrfs_set_disk_key_type(&copy_key, 0);
9254                         btrfs_set_disk_key_offset(&copy_key, 0);
9255
9256                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9257                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9258
9259                         btrfs_set_extent_flags(leaf, ei,
9260                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9261                 }
9262
9263                 btrfs_mark_buffer_dirty(leaf);
9264                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9265                                                rec->max_size, 1, 0);
9266                 if (ret)
9267                         goto fail;
9268                 btrfs_release_path(path);
9269         }
9270
9271         if (back->is_data) {
9272                 u64 parent;
9273                 int i;
9274
9275                 dback = to_data_backref(back);
9276                 if (back->full_backref)
9277                         parent = dback->parent;
9278                 else
9279                         parent = 0;
9280
9281                 for (i = 0; i < dback->found_ref; i++) {
9282                         /* if parent != 0, we're doing a full backref
9283                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9284                          * just makes the backref allocator create a data
9285                          * backref
9286                          */
9287                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9288                                                    rec->start, rec->max_size,
9289                                                    parent,
9290                                                    dback->root,
9291                                                    parent ?
9292                                                    BTRFS_FIRST_FREE_OBJECTID :
9293                                                    dback->owner,
9294                                                    dback->offset);
9295                         if (ret)
9296                                 break;
9297                 }
9298                 fprintf(stderr, "adding new data backref"
9299                                 " on %llu %s %llu owner %llu"
9300                                 " offset %llu found %d\n",
9301                                 (unsigned long long)rec->start,
9302                                 back->full_backref ?
9303                                 "parent" : "root",
9304                                 back->full_backref ?
9305                                 (unsigned long long)parent :
9306                                 (unsigned long long)dback->root,
9307                                 (unsigned long long)dback->owner,
9308                                 (unsigned long long)dback->offset,
9309                                 dback->found_ref);
9310         } else {
9311                 u64 parent;
9312                 struct tree_backref *tback;
9313
9314                 tback = to_tree_backref(back);
9315                 if (back->full_backref)
9316                         parent = tback->parent;
9317                 else
9318                         parent = 0;
9319
9320                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9321                                            rec->start, rec->max_size,
9322                                            parent, tback->root, 0, 0);
9323                 fprintf(stderr, "adding new tree backref on "
9324                         "start %llu len %llu parent %llu root %llu\n",
9325                         rec->start, rec->max_size, parent, tback->root);
9326         }
9327 fail:
9328         btrfs_release_path(path);
9329         return ret;
9330 }
9331
9332 static struct extent_entry *find_entry(struct list_head *entries,
9333                                        u64 bytenr, u64 bytes)
9334 {
9335         struct extent_entry *entry = NULL;
9336
9337         list_for_each_entry(entry, entries, list) {
9338                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9339                         return entry;
9340         }
9341
9342         return NULL;
9343 }
9344
9345 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9346 {
9347         struct extent_entry *entry, *best = NULL, *prev = NULL;
9348
9349         list_for_each_entry(entry, entries, list) {
9350                 /*
9351                  * If there are as many broken entries as entries then we know
9352                  * not to trust this particular entry.
9353                  */
9354                 if (entry->broken == entry->count)
9355                         continue;
9356
9357                 /*
9358                  * Special case, when there are only two entries and 'best' is
9359                  * the first one
9360                  */
9361                 if (!prev) {
9362                         best = entry;
9363                         prev = entry;
9364                         continue;
9365                 }
9366
9367                 /*
9368                  * If our current entry == best then we can't be sure our best
9369                  * is really the best, so we need to keep searching.
9370                  */
9371                 if (best && best->count == entry->count) {
9372                         prev = entry;
9373                         best = NULL;
9374                         continue;
9375                 }
9376
9377                 /* Prev == entry, not good enough, have to keep searching */
9378                 if (!prev->broken && prev->count == entry->count)
9379                         continue;
9380
9381                 if (!best)
9382                         best = (prev->count > entry->count) ? prev : entry;
9383                 else if (best->count < entry->count)
9384                         best = entry;
9385                 prev = entry;
9386         }
9387
9388         return best;
9389 }
9390
9391 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9392                       struct data_backref *dback, struct extent_entry *entry)
9393 {
9394         struct btrfs_trans_handle *trans;
9395         struct btrfs_root *root;
9396         struct btrfs_file_extent_item *fi;
9397         struct extent_buffer *leaf;
9398         struct btrfs_key key;
9399         u64 bytenr, bytes;
9400         int ret, err;
9401
9402         key.objectid = dback->root;
9403         key.type = BTRFS_ROOT_ITEM_KEY;
9404         key.offset = (u64)-1;
9405         root = btrfs_read_fs_root(info, &key);
9406         if (IS_ERR(root)) {
9407                 fprintf(stderr, "Couldn't find root for our ref\n");
9408                 return -EINVAL;
9409         }
9410
9411         /*
9412          * The backref points to the original offset of the extent if it was
9413          * split, so we need to search down to the offset we have and then walk
9414          * forward until we find the backref we're looking for.
9415          */
9416         key.objectid = dback->owner;
9417         key.type = BTRFS_EXTENT_DATA_KEY;
9418         key.offset = dback->offset;
9419         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9420         if (ret < 0) {
9421                 fprintf(stderr, "Error looking up ref %d\n", ret);
9422                 return ret;
9423         }
9424
9425         while (1) {
9426                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9427                         ret = btrfs_next_leaf(root, path);
9428                         if (ret) {
9429                                 fprintf(stderr, "Couldn't find our ref, next\n");
9430                                 return -EINVAL;
9431                         }
9432                 }
9433                 leaf = path->nodes[0];
9434                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9435                 if (key.objectid != dback->owner ||
9436                     key.type != BTRFS_EXTENT_DATA_KEY) {
9437                         fprintf(stderr, "Couldn't find our ref, search\n");
9438                         return -EINVAL;
9439                 }
9440                 fi = btrfs_item_ptr(leaf, path->slots[0],
9441                                     struct btrfs_file_extent_item);
9442                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9443                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9444
9445                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9446                         break;
9447                 path->slots[0]++;
9448         }
9449
9450         btrfs_release_path(path);
9451
9452         trans = btrfs_start_transaction(root, 1);
9453         if (IS_ERR(trans))
9454                 return PTR_ERR(trans);
9455
9456         /*
9457          * Ok we have the key of the file extent we want to fix, now we can cow
9458          * down to the thing and fix it.
9459          */
9460         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9461         if (ret < 0) {
9462                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9463                         key.objectid, key.type, key.offset, ret);
9464                 goto out;
9465         }
9466         if (ret > 0) {
9467                 fprintf(stderr, "Well that's odd, we just found this key "
9468                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9469                         key.offset);
9470                 ret = -EINVAL;
9471                 goto out;
9472         }
9473         leaf = path->nodes[0];
9474         fi = btrfs_item_ptr(leaf, path->slots[0],
9475                             struct btrfs_file_extent_item);
9476
9477         if (btrfs_file_extent_compression(leaf, fi) &&
9478             dback->disk_bytenr != entry->bytenr) {
9479                 fprintf(stderr, "Ref doesn't match the record start and is "
9480                         "compressed, please take a btrfs-image of this file "
9481                         "system and send it to a btrfs developer so they can "
9482                         "complete this functionality for bytenr %Lu\n",
9483                         dback->disk_bytenr);
9484                 ret = -EINVAL;
9485                 goto out;
9486         }
9487
9488         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9489                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9490         } else if (dback->disk_bytenr > entry->bytenr) {
9491                 u64 off_diff, offset;
9492
9493                 off_diff = dback->disk_bytenr - entry->bytenr;
9494                 offset = btrfs_file_extent_offset(leaf, fi);
9495                 if (dback->disk_bytenr + offset +
9496                     btrfs_file_extent_num_bytes(leaf, fi) >
9497                     entry->bytenr + entry->bytes) {
9498                         fprintf(stderr, "Ref is past the entry end, please "
9499                                 "take a btrfs-image of this file system and "
9500                                 "send it to a btrfs developer, ref %Lu\n",
9501                                 dback->disk_bytenr);
9502                         ret = -EINVAL;
9503                         goto out;
9504                 }
9505                 offset += off_diff;
9506                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9507                 btrfs_set_file_extent_offset(leaf, fi, offset);
9508         } else if (dback->disk_bytenr < entry->bytenr) {
9509                 u64 offset;
9510
9511                 offset = btrfs_file_extent_offset(leaf, fi);
9512                 if (dback->disk_bytenr + offset < entry->bytenr) {
9513                         fprintf(stderr, "Ref is before the entry start, please"
9514                                 " take a btrfs-image of this file system and "
9515                                 "send it to a btrfs developer, ref %Lu\n",
9516                                 dback->disk_bytenr);
9517                         ret = -EINVAL;
9518                         goto out;
9519                 }
9520
9521                 offset += dback->disk_bytenr;
9522                 offset -= entry->bytenr;
9523                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9524                 btrfs_set_file_extent_offset(leaf, fi, offset);
9525         }
9526
9527         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9528
9529         /*
9530          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9531          * only do this if we aren't using compression, otherwise it's a
9532          * trickier case.
9533          */
9534         if (!btrfs_file_extent_compression(leaf, fi))
9535                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9536         else
9537                 printf("ram bytes may be wrong?\n");
9538         btrfs_mark_buffer_dirty(leaf);
9539 out:
9540         err = btrfs_commit_transaction(trans, root);
9541         btrfs_release_path(path);
9542         return ret ? ret : err;
9543 }
9544
9545 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9546                            struct extent_record *rec)
9547 {
9548         struct extent_backref *back, *tmp;
9549         struct data_backref *dback;
9550         struct extent_entry *entry, *best = NULL;
9551         LIST_HEAD(entries);
9552         int nr_entries = 0;
9553         int broken_entries = 0;
9554         int ret = 0;
9555         short mismatch = 0;
9556
9557         /*
9558          * Metadata is easy and the backrefs should always agree on bytenr and
9559          * size, if not we've got bigger issues.
9560          */
9561         if (rec->metadata)
9562                 return 0;
9563
9564         rbtree_postorder_for_each_entry_safe(back, tmp,
9565                                              &rec->backref_tree, node) {
9566                 if (back->full_backref || !back->is_data)
9567                         continue;
9568
9569                 dback = to_data_backref(back);
9570
9571                 /*
9572                  * We only pay attention to backrefs that we found a real
9573                  * backref for.
9574                  */
9575                 if (dback->found_ref == 0)
9576                         continue;
9577
9578                 /*
9579                  * For now we only catch when the bytes don't match, not the
9580                  * bytenr.  We can easily do this at the same time, but I want
9581                  * to have a fs image to test on before we just add repair
9582                  * functionality willy-nilly so we know we won't screw up the
9583                  * repair.
9584                  */
9585
9586                 entry = find_entry(&entries, dback->disk_bytenr,
9587                                    dback->bytes);
9588                 if (!entry) {
9589                         entry = malloc(sizeof(struct extent_entry));
9590                         if (!entry) {
9591                                 ret = -ENOMEM;
9592                                 goto out;
9593                         }
9594                         memset(entry, 0, sizeof(*entry));
9595                         entry->bytenr = dback->disk_bytenr;
9596                         entry->bytes = dback->bytes;
9597                         list_add_tail(&entry->list, &entries);
9598                         nr_entries++;
9599                 }
9600
9601                 /*
9602                  * If we only have on entry we may think the entries agree when
9603                  * in reality they don't so we have to do some extra checking.
9604                  */
9605                 if (dback->disk_bytenr != rec->start ||
9606                     dback->bytes != rec->nr || back->broken)
9607                         mismatch = 1;
9608
9609                 if (back->broken) {
9610                         entry->broken++;
9611                         broken_entries++;
9612                 }
9613
9614                 entry->count++;
9615         }
9616
9617         /* Yay all the backrefs agree, carry on good sir */
9618         if (nr_entries <= 1 && !mismatch)
9619                 goto out;
9620
9621         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9622                 "%Lu\n", rec->start);
9623
9624         /*
9625          * First we want to see if the backrefs can agree amongst themselves who
9626          * is right, so figure out which one of the entries has the highest
9627          * count.
9628          */
9629         best = find_most_right_entry(&entries);
9630
9631         /*
9632          * Ok so we may have an even split between what the backrefs think, so
9633          * this is where we use the extent ref to see what it thinks.
9634          */
9635         if (!best) {
9636                 entry = find_entry(&entries, rec->start, rec->nr);
9637                 if (!entry && (!broken_entries || !rec->found_rec)) {
9638                         fprintf(stderr, "Backrefs don't agree with each other "
9639                                 "and extent record doesn't agree with anybody,"
9640                                 " so we can't fix bytenr %Lu bytes %Lu\n",
9641                                 rec->start, rec->nr);
9642                         ret = -EINVAL;
9643                         goto out;
9644                 } else if (!entry) {
9645                         /*
9646                          * Ok our backrefs were broken, we'll assume this is the
9647                          * correct value and add an entry for this range.
9648                          */
9649                         entry = malloc(sizeof(struct extent_entry));
9650                         if (!entry) {
9651                                 ret = -ENOMEM;
9652                                 goto out;
9653                         }
9654                         memset(entry, 0, sizeof(*entry));
9655                         entry->bytenr = rec->start;
9656                         entry->bytes = rec->nr;
9657                         list_add_tail(&entry->list, &entries);
9658                         nr_entries++;
9659                 }
9660                 entry->count++;
9661                 best = find_most_right_entry(&entries);
9662                 if (!best) {
9663                         fprintf(stderr, "Backrefs and extent record evenly "
9664                                 "split on who is right, this is going to "
9665                                 "require user input to fix bytenr %Lu bytes "
9666                                 "%Lu\n", rec->start, rec->nr);
9667                         ret = -EINVAL;
9668                         goto out;
9669                 }
9670         }
9671
9672         /*
9673          * I don't think this can happen currently as we'll abort() if we catch
9674          * this case higher up, but in case somebody removes that we still can't
9675          * deal with it properly here yet, so just bail out of that's the case.
9676          */
9677         if (best->bytenr != rec->start) {
9678                 fprintf(stderr, "Extent start and backref starts don't match, "
9679                         "please use btrfs-image on this file system and send "
9680                         "it to a btrfs developer so they can make fsck fix "
9681                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
9682                         rec->start, rec->nr);
9683                 ret = -EINVAL;
9684                 goto out;
9685         }
9686
9687         /*
9688          * Ok great we all agreed on an extent record, let's go find the real
9689          * references and fix up the ones that don't match.
9690          */
9691         rbtree_postorder_for_each_entry_safe(back, tmp,
9692                                              &rec->backref_tree, node) {
9693                 if (back->full_backref || !back->is_data)
9694                         continue;
9695
9696                 dback = to_data_backref(back);
9697
9698                 /*
9699                  * Still ignoring backrefs that don't have a real ref attached
9700                  * to them.
9701                  */
9702                 if (dback->found_ref == 0)
9703                         continue;
9704
9705                 if (dback->bytes == best->bytes &&
9706                     dback->disk_bytenr == best->bytenr)
9707                         continue;
9708
9709                 ret = repair_ref(info, path, dback, best);
9710                 if (ret)
9711                         goto out;
9712         }
9713
9714         /*
9715          * Ok we messed with the actual refs, which means we need to drop our
9716          * entire cache and go back and rescan.  I know this is a huge pain and
9717          * adds a lot of extra work, but it's the only way to be safe.  Once all
9718          * the backrefs agree we may not need to do anything to the extent
9719          * record itself.
9720          */
9721         ret = -EAGAIN;
9722 out:
9723         while (!list_empty(&entries)) {
9724                 entry = list_entry(entries.next, struct extent_entry, list);
9725                 list_del_init(&entry->list);
9726                 free(entry);
9727         }
9728         return ret;
9729 }
9730
9731 static int process_duplicates(struct cache_tree *extent_cache,
9732                               struct extent_record *rec)
9733 {
9734         struct extent_record *good, *tmp;
9735         struct cache_extent *cache;
9736         int ret;
9737
9738         /*
9739          * If we found a extent record for this extent then return, or if we
9740          * have more than one duplicate we are likely going to need to delete
9741          * something.
9742          */
9743         if (rec->found_rec || rec->num_duplicates > 1)
9744                 return 0;
9745
9746         /* Shouldn't happen but just in case */
9747         BUG_ON(!rec->num_duplicates);
9748
9749         /*
9750          * So this happens if we end up with a backref that doesn't match the
9751          * actual extent entry.  So either the backref is bad or the extent
9752          * entry is bad.  Either way we want to have the extent_record actually
9753          * reflect what we found in the extent_tree, so we need to take the
9754          * duplicate out and use that as the extent_record since the only way we
9755          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9756          */
9757         remove_cache_extent(extent_cache, &rec->cache);
9758
9759         good = to_extent_record(rec->dups.next);
9760         list_del_init(&good->list);
9761         INIT_LIST_HEAD(&good->backrefs);
9762         INIT_LIST_HEAD(&good->dups);
9763         good->cache.start = good->start;
9764         good->cache.size = good->nr;
9765         good->content_checked = 0;
9766         good->owner_ref_checked = 0;
9767         good->num_duplicates = 0;
9768         good->refs = rec->refs;
9769         list_splice_init(&rec->backrefs, &good->backrefs);
9770         while (1) {
9771                 cache = lookup_cache_extent(extent_cache, good->start,
9772                                             good->nr);
9773                 if (!cache)
9774                         break;
9775                 tmp = container_of(cache, struct extent_record, cache);
9776
9777                 /*
9778                  * If we find another overlapping extent and it's found_rec is
9779                  * set then it's a duplicate and we need to try and delete
9780                  * something.
9781                  */
9782                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9783                         if (list_empty(&good->list))
9784                                 list_add_tail(&good->list,
9785                                               &duplicate_extents);
9786                         good->num_duplicates += tmp->num_duplicates + 1;
9787                         list_splice_init(&tmp->dups, &good->dups);
9788                         list_del_init(&tmp->list);
9789                         list_add_tail(&tmp->list, &good->dups);
9790                         remove_cache_extent(extent_cache, &tmp->cache);
9791                         continue;
9792                 }
9793
9794                 /*
9795                  * Ok we have another non extent item backed extent rec, so lets
9796                  * just add it to this extent and carry on like we did above.
9797                  */
9798                 good->refs += tmp->refs;
9799                 list_splice_init(&tmp->backrefs, &good->backrefs);
9800                 remove_cache_extent(extent_cache, &tmp->cache);
9801                 free(tmp);
9802         }
9803         ret = insert_cache_extent(extent_cache, &good->cache);
9804         BUG_ON(ret);
9805         free(rec);
9806         return good->num_duplicates ? 0 : 1;
9807 }
9808
9809 static int delete_duplicate_records(struct btrfs_root *root,
9810                                     struct extent_record *rec)
9811 {
9812         struct btrfs_trans_handle *trans;
9813         LIST_HEAD(delete_list);
9814         struct btrfs_path path;
9815         struct extent_record *tmp, *good, *n;
9816         int nr_del = 0;
9817         int ret = 0, err;
9818         struct btrfs_key key;
9819
9820         btrfs_init_path(&path);
9821
9822         good = rec;
9823         /* Find the record that covers all of the duplicates. */
9824         list_for_each_entry(tmp, &rec->dups, list) {
9825                 if (good->start < tmp->start)
9826                         continue;
9827                 if (good->nr > tmp->nr)
9828                         continue;
9829
9830                 if (tmp->start + tmp->nr < good->start + good->nr) {
9831                         fprintf(stderr, "Ok we have overlapping extents that "
9832                                 "aren't completely covered by each other, this "
9833                                 "is going to require more careful thought.  "
9834                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9835                                 tmp->start, tmp->nr, good->start, good->nr);
9836                         abort();
9837                 }
9838                 good = tmp;
9839         }
9840
9841         if (good != rec)
9842                 list_add_tail(&rec->list, &delete_list);
9843
9844         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9845                 if (tmp == good)
9846                         continue;
9847                 list_move_tail(&tmp->list, &delete_list);
9848         }
9849
9850         root = root->fs_info->extent_root;
9851         trans = btrfs_start_transaction(root, 1);
9852         if (IS_ERR(trans)) {
9853                 ret = PTR_ERR(trans);
9854                 goto out;
9855         }
9856
9857         list_for_each_entry(tmp, &delete_list, list) {
9858                 if (tmp->found_rec == 0)
9859                         continue;
9860                 key.objectid = tmp->start;
9861                 key.type = BTRFS_EXTENT_ITEM_KEY;
9862                 key.offset = tmp->nr;
9863
9864                 /* Shouldn't happen but just in case */
9865                 if (tmp->metadata) {
9866                         fprintf(stderr, "Well this shouldn't happen, extent "
9867                                 "record overlaps but is metadata? "
9868                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9869                         abort();
9870                 }
9871
9872                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9873                 if (ret) {
9874                         if (ret > 0)
9875                                 ret = -EINVAL;
9876                         break;
9877                 }
9878                 ret = btrfs_del_item(trans, root, &path);
9879                 if (ret)
9880                         break;
9881                 btrfs_release_path(&path);
9882                 nr_del++;
9883         }
9884         err = btrfs_commit_transaction(trans, root);
9885         if (err && !ret)
9886                 ret = err;
9887 out:
9888         while (!list_empty(&delete_list)) {
9889                 tmp = to_extent_record(delete_list.next);
9890                 list_del_init(&tmp->list);
9891                 if (tmp == rec)
9892                         continue;
9893                 free(tmp);
9894         }
9895
9896         while (!list_empty(&rec->dups)) {
9897                 tmp = to_extent_record(rec->dups.next);
9898                 list_del_init(&tmp->list);
9899                 free(tmp);
9900         }
9901
9902         btrfs_release_path(&path);
9903
9904         if (!ret && !nr_del)
9905                 rec->num_duplicates = 0;
9906
9907         return ret ? ret : nr_del;
9908 }
9909
9910 static int find_possible_backrefs(struct btrfs_fs_info *info,
9911                                   struct btrfs_path *path,
9912                                   struct cache_tree *extent_cache,
9913                                   struct extent_record *rec)
9914 {
9915         struct btrfs_root *root;
9916         struct extent_backref *back, *tmp;
9917         struct data_backref *dback;
9918         struct cache_extent *cache;
9919         struct btrfs_file_extent_item *fi;
9920         struct btrfs_key key;
9921         u64 bytenr, bytes;
9922         int ret;
9923
9924         rbtree_postorder_for_each_entry_safe(back, tmp,
9925                                              &rec->backref_tree, node) {
9926                 /* Don't care about full backrefs (poor unloved backrefs) */
9927                 if (back->full_backref || !back->is_data)
9928                         continue;
9929
9930                 dback = to_data_backref(back);
9931
9932                 /* We found this one, we don't need to do a lookup */
9933                 if (dback->found_ref)
9934                         continue;
9935
9936                 key.objectid = dback->root;
9937                 key.type = BTRFS_ROOT_ITEM_KEY;
9938                 key.offset = (u64)-1;
9939
9940                 root = btrfs_read_fs_root(info, &key);
9941
9942                 /* No root, definitely a bad ref, skip */
9943                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9944                         continue;
9945                 /* Other err, exit */
9946                 if (IS_ERR(root))
9947                         return PTR_ERR(root);
9948
9949                 key.objectid = dback->owner;
9950                 key.type = BTRFS_EXTENT_DATA_KEY;
9951                 key.offset = dback->offset;
9952                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9953                 if (ret) {
9954                         btrfs_release_path(path);
9955                         if (ret < 0)
9956                                 return ret;
9957                         /* Didn't find it, we can carry on */
9958                         ret = 0;
9959                         continue;
9960                 }
9961
9962                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9963                                     struct btrfs_file_extent_item);
9964                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9965                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9966                 btrfs_release_path(path);
9967                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9968                 if (cache) {
9969                         struct extent_record *tmp;
9970                         tmp = container_of(cache, struct extent_record, cache);
9971
9972                         /*
9973                          * If we found an extent record for the bytenr for this
9974                          * particular backref then we can't add it to our
9975                          * current extent record.  We only want to add backrefs
9976                          * that don't have a corresponding extent item in the
9977                          * extent tree since they likely belong to this record
9978                          * and we need to fix it if it doesn't match bytenrs.
9979                          */
9980                         if  (tmp->found_rec)
9981                                 continue;
9982                 }
9983
9984                 dback->found_ref += 1;
9985                 dback->disk_bytenr = bytenr;
9986                 dback->bytes = bytes;
9987
9988                 /*
9989                  * Set this so the verify backref code knows not to trust the
9990                  * values in this backref.
9991                  */
9992                 back->broken = 1;
9993         }
9994
9995         return 0;
9996 }
9997
9998 /*
9999  * Record orphan data ref into corresponding root.
10000  *
10001  * Return 0 if the extent item contains data ref and recorded.
10002  * Return 1 if the extent item contains no useful data ref
10003  *   On that case, it may contains only shared_dataref or metadata backref
10004  *   or the file extent exists(this should be handled by the extent bytenr
10005  *   recovery routine)
10006  * Return <0 if something goes wrong.
10007  */
10008 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10009                                       struct extent_record *rec)
10010 {
10011         struct btrfs_key key;
10012         struct btrfs_root *dest_root;
10013         struct extent_backref *back, *tmp;
10014         struct data_backref *dback;
10015         struct orphan_data_extent *orphan;
10016         struct btrfs_path path;
10017         int recorded_data_ref = 0;
10018         int ret = 0;
10019
10020         if (rec->metadata)
10021                 return 1;
10022         btrfs_init_path(&path);
10023         rbtree_postorder_for_each_entry_safe(back, tmp,
10024                                              &rec->backref_tree, node) {
10025                 if (back->full_backref || !back->is_data ||
10026                     !back->found_extent_tree)
10027                         continue;
10028                 dback = to_data_backref(back);
10029                 if (dback->found_ref)
10030                         continue;
10031                 key.objectid = dback->root;
10032                 key.type = BTRFS_ROOT_ITEM_KEY;
10033                 key.offset = (u64)-1;
10034
10035                 dest_root = btrfs_read_fs_root(fs_info, &key);
10036
10037                 /* For non-exist root we just skip it */
10038                 if (IS_ERR(dest_root) || !dest_root)
10039                         continue;
10040
10041                 key.objectid = dback->owner;
10042                 key.type = BTRFS_EXTENT_DATA_KEY;
10043                 key.offset = dback->offset;
10044
10045                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10046                 btrfs_release_path(&path);
10047                 /*
10048                  * For ret < 0, it's OK since the fs-tree may be corrupted,
10049                  * we need to record it for inode/file extent rebuild.
10050                  * For ret > 0, we record it only for file extent rebuild.
10051                  * For ret == 0, the file extent exists but only bytenr
10052                  * mismatch, let the original bytenr fix routine to handle,
10053                  * don't record it.
10054                  */
10055                 if (ret == 0)
10056                         continue;
10057                 ret = 0;
10058                 orphan = malloc(sizeof(*orphan));
10059                 if (!orphan) {
10060                         ret = -ENOMEM;
10061                         goto out;
10062                 }
10063                 INIT_LIST_HEAD(&orphan->list);
10064                 orphan->root = dback->root;
10065                 orphan->objectid = dback->owner;
10066                 orphan->offset = dback->offset;
10067                 orphan->disk_bytenr = rec->cache.start;
10068                 orphan->disk_len = rec->cache.size;
10069                 list_add(&dest_root->orphan_data_extents, &orphan->list);
10070                 recorded_data_ref = 1;
10071         }
10072 out:
10073         btrfs_release_path(&path);
10074         if (!ret)
10075                 return !recorded_data_ref;
10076         else
10077                 return ret;
10078 }
10079
10080 /*
10081  * when an incorrect extent item is found, this will delete
10082  * all of the existing entries for it and recreate them
10083  * based on what the tree scan found.
10084  */
10085 static int fixup_extent_refs(struct btrfs_fs_info *info,
10086                              struct cache_tree *extent_cache,
10087                              struct extent_record *rec)
10088 {
10089         struct btrfs_trans_handle *trans = NULL;
10090         int ret;
10091         struct btrfs_path path;
10092         struct cache_extent *cache;
10093         struct extent_backref *back, *tmp;
10094         int allocated = 0;
10095         u64 flags = 0;
10096
10097         if (rec->flag_block_full_backref)
10098                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10099
10100         btrfs_init_path(&path);
10101         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10102                 /*
10103                  * Sometimes the backrefs themselves are so broken they don't
10104                  * get attached to any meaningful rec, so first go back and
10105                  * check any of our backrefs that we couldn't find and throw
10106                  * them into the list if we find the backref so that
10107                  * verify_backrefs can figure out what to do.
10108                  */
10109                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10110                 if (ret < 0)
10111                         goto out;
10112         }
10113
10114         /* step one, make sure all of the backrefs agree */
10115         ret = verify_backrefs(info, &path, rec);
10116         if (ret < 0)
10117                 goto out;
10118
10119         trans = btrfs_start_transaction(info->extent_root, 1);
10120         if (IS_ERR(trans)) {
10121                 ret = PTR_ERR(trans);
10122                 goto out;
10123         }
10124
10125         /* step two, delete all the existing records */
10126         ret = delete_extent_records(trans, info->extent_root, &path,
10127                                     rec->start);
10128
10129         if (ret < 0)
10130                 goto out;
10131
10132         /* was this block corrupt?  If so, don't add references to it */
10133         cache = lookup_cache_extent(info->corrupt_blocks,
10134                                     rec->start, rec->max_size);
10135         if (cache) {
10136                 ret = 0;
10137                 goto out;
10138         }
10139
10140         /* step three, recreate all the refs we did find */
10141         rbtree_postorder_for_each_entry_safe(back, tmp,
10142                                              &rec->backref_tree, node) {
10143                 /*
10144                  * if we didn't find any references, don't create a
10145                  * new extent record
10146                  */
10147                 if (!back->found_ref)
10148                         continue;
10149
10150                 rec->bad_full_backref = 0;
10151                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10152                 allocated = 1;
10153
10154                 if (ret)
10155                         goto out;
10156         }
10157 out:
10158         if (trans) {
10159                 int err = btrfs_commit_transaction(trans, info->extent_root);
10160                 if (!ret)
10161                         ret = err;
10162         }
10163
10164         if (!ret)
10165                 fprintf(stderr, "Repaired extent references for %llu\n",
10166                                 (unsigned long long)rec->start);
10167
10168         btrfs_release_path(&path);
10169         return ret;
10170 }
10171
10172 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10173                               struct extent_record *rec)
10174 {
10175         struct btrfs_trans_handle *trans;
10176         struct btrfs_root *root = fs_info->extent_root;
10177         struct btrfs_path path;
10178         struct btrfs_extent_item *ei;
10179         struct btrfs_key key;
10180         u64 flags;
10181         int ret = 0;
10182
10183         key.objectid = rec->start;
10184         if (rec->metadata) {
10185                 key.type = BTRFS_METADATA_ITEM_KEY;
10186                 key.offset = rec->info_level;
10187         } else {
10188                 key.type = BTRFS_EXTENT_ITEM_KEY;
10189                 key.offset = rec->max_size;
10190         }
10191
10192         trans = btrfs_start_transaction(root, 0);
10193         if (IS_ERR(trans))
10194                 return PTR_ERR(trans);
10195
10196         btrfs_init_path(&path);
10197         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10198         if (ret < 0) {
10199                 btrfs_release_path(&path);
10200                 btrfs_commit_transaction(trans, root);
10201                 return ret;
10202         } else if (ret) {
10203                 fprintf(stderr, "Didn't find extent for %llu\n",
10204                         (unsigned long long)rec->start);
10205                 btrfs_release_path(&path);
10206                 btrfs_commit_transaction(trans, root);
10207                 return -ENOENT;
10208         }
10209
10210         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10211                             struct btrfs_extent_item);
10212         flags = btrfs_extent_flags(path.nodes[0], ei);
10213         if (rec->flag_block_full_backref) {
10214                 fprintf(stderr, "setting full backref on %llu\n",
10215                         (unsigned long long)key.objectid);
10216                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10217         } else {
10218                 fprintf(stderr, "clearing full backref on %llu\n",
10219                         (unsigned long long)key.objectid);
10220                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10221         }
10222         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10223         btrfs_mark_buffer_dirty(path.nodes[0]);
10224         btrfs_release_path(&path);
10225         ret = btrfs_commit_transaction(trans, root);
10226         if (!ret)
10227                 fprintf(stderr, "Repaired extent flags for %llu\n",
10228                                 (unsigned long long)rec->start);
10229
10230         return ret;
10231 }
10232
10233 /* right now we only prune from the extent allocation tree */
10234 static int prune_one_block(struct btrfs_trans_handle *trans,
10235                            struct btrfs_fs_info *info,
10236                            struct btrfs_corrupt_block *corrupt)
10237 {
10238         int ret;
10239         struct btrfs_path path;
10240         struct extent_buffer *eb;
10241         u64 found;
10242         int slot;
10243         int nritems;
10244         int level = corrupt->level + 1;
10245
10246         btrfs_init_path(&path);
10247 again:
10248         /* we want to stop at the parent to our busted block */
10249         path.lowest_level = level;
10250
10251         ret = btrfs_search_slot(trans, info->extent_root,
10252                                 &corrupt->key, &path, -1, 1);
10253
10254         if (ret < 0)
10255                 goto out;
10256
10257         eb = path.nodes[level];
10258         if (!eb) {
10259                 ret = -ENOENT;
10260                 goto out;
10261         }
10262
10263         /*
10264          * hopefully the search gave us the block we want to prune,
10265          * lets try that first
10266          */
10267         slot = path.slots[level];
10268         found =  btrfs_node_blockptr(eb, slot);
10269         if (found == corrupt->cache.start)
10270                 goto del_ptr;
10271
10272         nritems = btrfs_header_nritems(eb);
10273
10274         /* the search failed, lets scan this node and hope we find it */
10275         for (slot = 0; slot < nritems; slot++) {
10276                 found =  btrfs_node_blockptr(eb, slot);
10277                 if (found == corrupt->cache.start)
10278                         goto del_ptr;
10279         }
10280         /*
10281          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10282          * to this block
10283          */
10284         if (eb == info->extent_root->node) {
10285                 ret = -ENOENT;
10286                 goto out;
10287         } else {
10288                 level++;
10289                 btrfs_release_path(&path);
10290                 goto again;
10291         }
10292
10293 del_ptr:
10294         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10295         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10296
10297 out:
10298         btrfs_release_path(&path);
10299         return ret;
10300 }
10301
10302 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10303 {
10304         struct btrfs_trans_handle *trans = NULL;
10305         struct cache_extent *cache;
10306         struct btrfs_corrupt_block *corrupt;
10307
10308         while (1) {
10309                 cache = search_cache_extent(info->corrupt_blocks, 0);
10310                 if (!cache)
10311                         break;
10312                 if (!trans) {
10313                         trans = btrfs_start_transaction(info->extent_root, 1);
10314                         if (IS_ERR(trans))
10315                                 return PTR_ERR(trans);
10316                 }
10317                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10318                 prune_one_block(trans, info, corrupt);
10319                 remove_cache_extent(info->corrupt_blocks, cache);
10320         }
10321         if (trans)
10322                 return btrfs_commit_transaction(trans, info->extent_root);
10323         return 0;
10324 }
10325
10326 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10327 {
10328         struct btrfs_block_group_cache *cache;
10329         u64 start, end;
10330         int ret;
10331
10332         while (1) {
10333                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10334                                             &start, &end, EXTENT_DIRTY);
10335                 if (ret)
10336                         break;
10337                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10338         }
10339
10340         start = 0;
10341         while (1) {
10342                 cache = btrfs_lookup_first_block_group(fs_info, start);
10343                 if (!cache)
10344                         break;
10345                 if (cache->cached)
10346                         cache->cached = 0;
10347                 start = cache->key.objectid + cache->key.offset;
10348         }
10349 }
10350
10351 static int check_extent_refs(struct btrfs_root *root,
10352                              struct cache_tree *extent_cache)
10353 {
10354         struct extent_record *rec;
10355         struct cache_extent *cache;
10356         int ret = 0;
10357         int had_dups = 0;
10358
10359         if (repair) {
10360                 /*
10361                  * if we're doing a repair, we have to make sure
10362                  * we don't allocate from the problem extents.
10363                  * In the worst case, this will be all the
10364                  * extents in the FS
10365                  */
10366                 cache = search_cache_extent(extent_cache, 0);
10367                 while(cache) {
10368                         rec = container_of(cache, struct extent_record, cache);
10369                         set_extent_dirty(root->fs_info->excluded_extents,
10370                                          rec->start,
10371                                          rec->start + rec->max_size - 1);
10372                         cache = next_cache_extent(cache);
10373                 }
10374
10375                 /* pin down all the corrupted blocks too */
10376                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10377                 while(cache) {
10378                         set_extent_dirty(root->fs_info->excluded_extents,
10379                                          cache->start,
10380                                          cache->start + cache->size - 1);
10381                         cache = next_cache_extent(cache);
10382                 }
10383                 prune_corrupt_blocks(root->fs_info);
10384                 reset_cached_block_groups(root->fs_info);
10385         }
10386
10387         reset_cached_block_groups(root->fs_info);
10388
10389         /*
10390          * We need to delete any duplicate entries we find first otherwise we
10391          * could mess up the extent tree when we have backrefs that actually
10392          * belong to a different extent item and not the weird duplicate one.
10393          */
10394         while (repair && !list_empty(&duplicate_extents)) {
10395                 rec = to_extent_record(duplicate_extents.next);
10396                 list_del_init(&rec->list);
10397
10398                 /* Sometimes we can find a backref before we find an actual
10399                  * extent, so we need to process it a little bit to see if there
10400                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10401                  * if this is a backref screwup.  If we need to delete stuff
10402                  * process_duplicates() will return 0, otherwise it will return
10403                  * 1 and we
10404                  */
10405                 if (process_duplicates(extent_cache, rec))
10406                         continue;
10407                 ret = delete_duplicate_records(root, rec);
10408                 if (ret < 0)
10409                         return ret;
10410                 /*
10411                  * delete_duplicate_records will return the number of entries
10412                  * deleted, so if it's greater than 0 then we know we actually
10413                  * did something and we need to remove.
10414                  */
10415                 if (ret)
10416                         had_dups = 1;
10417         }
10418
10419         if (had_dups)
10420                 return -EAGAIN;
10421
10422         while(1) {
10423                 int cur_err = 0;
10424                 int fix = 0;
10425
10426                 cache = search_cache_extent(extent_cache, 0);
10427                 if (!cache)
10428                         break;
10429                 rec = container_of(cache, struct extent_record, cache);
10430                 if (rec->num_duplicates) {
10431                         fprintf(stderr, "extent item %llu has multiple extent "
10432                                 "items\n", (unsigned long long)rec->start);
10433                         cur_err = 1;
10434                 }
10435
10436                 if (rec->refs != rec->extent_item_refs) {
10437                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10438                                 (unsigned long long)rec->start,
10439                                 (unsigned long long)rec->nr);
10440                         fprintf(stderr, "extent item %llu, found %llu\n",
10441                                 (unsigned long long)rec->extent_item_refs,
10442                                 (unsigned long long)rec->refs);
10443                         ret = record_orphan_data_extents(root->fs_info, rec);
10444                         if (ret < 0)
10445                                 goto repair_abort;
10446                         fix = ret;
10447                         cur_err = 1;
10448                 }
10449                 if (all_backpointers_checked(rec, 1)) {
10450                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10451                                 (unsigned long long)rec->start,
10452                                 (unsigned long long)rec->nr);
10453                         fix = 1;
10454                         cur_err = 1;
10455                 }
10456                 if (!rec->owner_ref_checked) {
10457                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10458                                 (unsigned long long)rec->start,
10459                                 (unsigned long long)rec->nr);
10460                         fix = 1;
10461                         cur_err = 1;
10462                 }
10463
10464                 if (repair && fix) {
10465                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10466                         if (ret)
10467                                 goto repair_abort;
10468                 }
10469
10470
10471                 if (rec->bad_full_backref) {
10472                         fprintf(stderr, "bad full backref, on [%llu]\n",
10473                                 (unsigned long long)rec->start);
10474                         if (repair) {
10475                                 ret = fixup_extent_flags(root->fs_info, rec);
10476                                 if (ret)
10477                                         goto repair_abort;
10478                                 fix = 1;
10479                         }
10480                         cur_err = 1;
10481                 }
10482                 /*
10483                  * Although it's not a extent ref's problem, we reuse this
10484                  * routine for error reporting.
10485                  * No repair function yet.
10486                  */
10487                 if (rec->crossing_stripes) {
10488                         fprintf(stderr,
10489                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10490                                 rec->start, rec->start + rec->max_size);
10491                         cur_err = 1;
10492                 }
10493
10494                 if (rec->wrong_chunk_type) {
10495                         fprintf(stderr,
10496                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10497                                 rec->start, rec->start + rec->max_size);
10498                         cur_err = 1;
10499                 }
10500
10501                 remove_cache_extent(extent_cache, cache);
10502                 free_all_extent_backrefs(rec);
10503                 if (!init_extent_tree && repair && (!cur_err || fix))
10504                         clear_extent_dirty(root->fs_info->excluded_extents,
10505                                            rec->start,
10506                                            rec->start + rec->max_size - 1);
10507                 free(rec);
10508         }
10509 repair_abort:
10510         if (repair) {
10511                 if (ret && ret != -EAGAIN) {
10512                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10513                         exit(1);
10514                 } else if (!ret) {
10515                         struct btrfs_trans_handle *trans;
10516
10517                         root = root->fs_info->extent_root;
10518                         trans = btrfs_start_transaction(root, 1);
10519                         if (IS_ERR(trans)) {
10520                                 ret = PTR_ERR(trans);
10521                                 goto repair_abort;
10522                         }
10523
10524                         ret = btrfs_fix_block_accounting(trans, root);
10525                         if (ret)
10526                                 goto repair_abort;
10527                         ret = btrfs_commit_transaction(trans, root);
10528                         if (ret)
10529                                 goto repair_abort;
10530                 }
10531                 return ret;
10532         }
10533         return 0;
10534 }
10535
10536 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10537 {
10538         u64 stripe_size;
10539
10540         if (type & BTRFS_BLOCK_GROUP_RAID0) {
10541                 stripe_size = length;
10542                 stripe_size /= num_stripes;
10543         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10544                 stripe_size = length * 2;
10545                 stripe_size /= num_stripes;
10546         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10547                 stripe_size = length;
10548                 stripe_size /= (num_stripes - 1);
10549         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10550                 stripe_size = length;
10551                 stripe_size /= (num_stripes - 2);
10552         } else {
10553                 stripe_size = length;
10554         }
10555         return stripe_size;
10556 }
10557
10558 /*
10559  * Check the chunk with its block group/dev list ref:
10560  * Return 0 if all refs seems valid.
10561  * Return 1 if part of refs seems valid, need later check for rebuild ref
10562  * like missing block group and needs to search extent tree to rebuild them.
10563  * Return -1 if essential refs are missing and unable to rebuild.
10564  */
10565 static int check_chunk_refs(struct chunk_record *chunk_rec,
10566                             struct block_group_tree *block_group_cache,
10567                             struct device_extent_tree *dev_extent_cache,
10568                             int silent)
10569 {
10570         struct cache_extent *block_group_item;
10571         struct block_group_record *block_group_rec;
10572         struct cache_extent *dev_extent_item;
10573         struct device_extent_record *dev_extent_rec;
10574         u64 devid;
10575         u64 offset;
10576         u64 length;
10577         int metadump_v2 = 0;
10578         int i;
10579         int ret = 0;
10580
10581         block_group_item = lookup_cache_extent(&block_group_cache->tree,
10582                                                chunk_rec->offset,
10583                                                chunk_rec->length);
10584         if (block_group_item) {
10585                 block_group_rec = container_of(block_group_item,
10586                                                struct block_group_record,
10587                                                cache);
10588                 if (chunk_rec->length != block_group_rec->offset ||
10589                     chunk_rec->offset != block_group_rec->objectid ||
10590                     (!metadump_v2 &&
10591                      chunk_rec->type_flags != block_group_rec->flags)) {
10592                         if (!silent)
10593                                 fprintf(stderr,
10594                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10595                                         chunk_rec->objectid,
10596                                         chunk_rec->type,
10597                                         chunk_rec->offset,
10598                                         chunk_rec->length,
10599                                         chunk_rec->offset,
10600                                         chunk_rec->type_flags,
10601                                         block_group_rec->objectid,
10602                                         block_group_rec->type,
10603                                         block_group_rec->offset,
10604                                         block_group_rec->offset,
10605                                         block_group_rec->objectid,
10606                                         block_group_rec->flags);
10607                         ret = -1;
10608                 } else {
10609                         list_del_init(&block_group_rec->list);
10610                         chunk_rec->bg_rec = block_group_rec;
10611                 }
10612         } else {
10613                 if (!silent)
10614                         fprintf(stderr,
10615                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10616                                 chunk_rec->objectid,
10617                                 chunk_rec->type,
10618                                 chunk_rec->offset,
10619                                 chunk_rec->length,
10620                                 chunk_rec->offset,
10621                                 chunk_rec->type_flags);
10622                 ret = 1;
10623         }
10624
10625         if (metadump_v2)
10626                 return ret;
10627
10628         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10629                                     chunk_rec->num_stripes);
10630         for (i = 0; i < chunk_rec->num_stripes; ++i) {
10631                 devid = chunk_rec->stripes[i].devid;
10632                 offset = chunk_rec->stripes[i].offset;
10633                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10634                                                        devid, offset, length);
10635                 if (dev_extent_item) {
10636                         dev_extent_rec = container_of(dev_extent_item,
10637                                                 struct device_extent_record,
10638                                                 cache);
10639                         if (dev_extent_rec->objectid != devid ||
10640                             dev_extent_rec->offset != offset ||
10641                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
10642                             dev_extent_rec->length != length) {
10643                                 if (!silent)
10644                                         fprintf(stderr,
10645                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10646                                                 chunk_rec->objectid,
10647                                                 chunk_rec->type,
10648                                                 chunk_rec->offset,
10649                                                 chunk_rec->stripes[i].devid,
10650                                                 chunk_rec->stripes[i].offset,
10651                                                 dev_extent_rec->objectid,
10652                                                 dev_extent_rec->offset,
10653                                                 dev_extent_rec->length);
10654                                 ret = -1;
10655                         } else {
10656                                 list_move(&dev_extent_rec->chunk_list,
10657                                           &chunk_rec->dextents);
10658                         }
10659                 } else {
10660                         if (!silent)
10661                                 fprintf(stderr,
10662                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10663                                         chunk_rec->objectid,
10664                                         chunk_rec->type,
10665                                         chunk_rec->offset,
10666                                         chunk_rec->stripes[i].devid,
10667                                         chunk_rec->stripes[i].offset);
10668                         ret = -1;
10669                 }
10670         }
10671         return ret;
10672 }
10673
10674 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10675 int check_chunks(struct cache_tree *chunk_cache,
10676                  struct block_group_tree *block_group_cache,
10677                  struct device_extent_tree *dev_extent_cache,
10678                  struct list_head *good, struct list_head *bad,
10679                  struct list_head *rebuild, int silent)
10680 {
10681         struct cache_extent *chunk_item;
10682         struct chunk_record *chunk_rec;
10683         struct block_group_record *bg_rec;
10684         struct device_extent_record *dext_rec;
10685         int err;
10686         int ret = 0;
10687
10688         chunk_item = first_cache_extent(chunk_cache);
10689         while (chunk_item) {
10690                 chunk_rec = container_of(chunk_item, struct chunk_record,
10691                                          cache);
10692                 err = check_chunk_refs(chunk_rec, block_group_cache,
10693                                        dev_extent_cache, silent);
10694                 if (err < 0)
10695                         ret = err;
10696                 if (err == 0 && good)
10697                         list_add_tail(&chunk_rec->list, good);
10698                 if (err > 0 && rebuild)
10699                         list_add_tail(&chunk_rec->list, rebuild);
10700                 if (err < 0 && bad)
10701                         list_add_tail(&chunk_rec->list, bad);
10702                 chunk_item = next_cache_extent(chunk_item);
10703         }
10704
10705         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10706                 if (!silent)
10707                         fprintf(stderr,
10708                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10709                                 bg_rec->objectid,
10710                                 bg_rec->offset,
10711                                 bg_rec->flags);
10712                 if (!ret)
10713                         ret = 1;
10714         }
10715
10716         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10717                             chunk_list) {
10718                 if (!silent)
10719                         fprintf(stderr,
10720                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10721                                 dext_rec->objectid,
10722                                 dext_rec->offset,
10723                                 dext_rec->length);
10724                 if (!ret)
10725                         ret = 1;
10726         }
10727         return ret;
10728 }
10729
10730
10731 static int check_device_used(struct device_record *dev_rec,
10732                              struct device_extent_tree *dext_cache)
10733 {
10734         struct cache_extent *cache;
10735         struct device_extent_record *dev_extent_rec;
10736         u64 total_byte = 0;
10737
10738         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10739         while (cache) {
10740                 dev_extent_rec = container_of(cache,
10741                                               struct device_extent_record,
10742                                               cache);
10743                 if (dev_extent_rec->objectid != dev_rec->devid)
10744                         break;
10745
10746                 list_del_init(&dev_extent_rec->device_list);
10747                 total_byte += dev_extent_rec->length;
10748                 cache = next_cache_extent(cache);
10749         }
10750
10751         if (total_byte != dev_rec->byte_used) {
10752                 fprintf(stderr,
10753                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10754                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10755                         dev_rec->type, dev_rec->offset);
10756                 return -1;
10757         } else {
10758                 return 0;
10759         }
10760 }
10761
10762 /* check btrfs_dev_item -> btrfs_dev_extent */
10763 static int check_devices(struct rb_root *dev_cache,
10764                          struct device_extent_tree *dev_extent_cache)
10765 {
10766         struct rb_node *dev_node;
10767         struct device_record *dev_rec;
10768         struct device_extent_record *dext_rec;
10769         int err;
10770         int ret = 0;
10771
10772         dev_node = rb_first(dev_cache);
10773         while (dev_node) {
10774                 dev_rec = container_of(dev_node, struct device_record, node);
10775                 err = check_device_used(dev_rec, dev_extent_cache);
10776                 if (err)
10777                         ret = err;
10778
10779                 dev_node = rb_next(dev_node);
10780         }
10781         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10782                             device_list) {
10783                 fprintf(stderr,
10784                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10785                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
10786                 if (!ret)
10787                         ret = 1;
10788         }
10789         return ret;
10790 }
10791
10792 static int add_root_item_to_list(struct list_head *head,
10793                                   u64 objectid, u64 bytenr, u64 last_snapshot,
10794                                   u8 level, u8 drop_level,
10795                                   struct btrfs_key *drop_key)
10796 {
10797
10798         struct root_item_record *ri_rec;
10799         ri_rec = malloc(sizeof(*ri_rec));
10800         if (!ri_rec)
10801                 return -ENOMEM;
10802         ri_rec->bytenr = bytenr;
10803         ri_rec->objectid = objectid;
10804         ri_rec->level = level;
10805         ri_rec->drop_level = drop_level;
10806         ri_rec->last_snapshot = last_snapshot;
10807         if (drop_key)
10808                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10809         list_add_tail(&ri_rec->list, head);
10810
10811         return 0;
10812 }
10813
10814 static void free_root_item_list(struct list_head *list)
10815 {
10816         struct root_item_record *ri_rec;
10817
10818         while (!list_empty(list)) {
10819                 ri_rec = list_first_entry(list, struct root_item_record,
10820                                           list);
10821                 list_del_init(&ri_rec->list);
10822                 free(ri_rec);
10823         }
10824 }
10825
10826 static int deal_root_from_list(struct list_head *list,
10827                                struct btrfs_root *root,
10828                                struct block_info *bits,
10829                                int bits_nr,
10830                                struct cache_tree *pending,
10831                                struct cache_tree *seen,
10832                                struct cache_tree *reada,
10833                                struct cache_tree *nodes,
10834                                struct cache_tree *extent_cache,
10835                                struct cache_tree *chunk_cache,
10836                                struct rb_root *dev_cache,
10837                                struct block_group_tree *block_group_cache,
10838                                struct device_extent_tree *dev_extent_cache)
10839 {
10840         int ret = 0;
10841         u64 last;
10842
10843         while (!list_empty(list)) {
10844                 struct root_item_record *rec;
10845                 struct extent_buffer *buf;
10846                 rec = list_entry(list->next,
10847                                  struct root_item_record, list);
10848                 last = 0;
10849                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10850                 if (!extent_buffer_uptodate(buf)) {
10851                         free_extent_buffer(buf);
10852                         ret = -EIO;
10853                         break;
10854                 }
10855                 ret = add_root_to_pending(buf, extent_cache, pending,
10856                                     seen, nodes, rec->objectid);
10857                 if (ret < 0)
10858                         break;
10859                 /*
10860                  * To rebuild extent tree, we need deal with snapshot
10861                  * one by one, otherwise we deal with node firstly which
10862                  * can maximize readahead.
10863                  */
10864                 while (1) {
10865                         ret = run_next_block(root, bits, bits_nr, &last,
10866                                              pending, seen, reada, nodes,
10867                                              extent_cache, chunk_cache,
10868                                              dev_cache, block_group_cache,
10869                                              dev_extent_cache, rec);
10870                         if (ret != 0)
10871                                 break;
10872                 }
10873                 free_extent_buffer(buf);
10874                 list_del(&rec->list);
10875                 free(rec);
10876                 if (ret < 0)
10877                         break;
10878         }
10879         while (ret >= 0) {
10880                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10881                                      reada, nodes, extent_cache, chunk_cache,
10882                                      dev_cache, block_group_cache,
10883                                      dev_extent_cache, NULL);
10884                 if (ret != 0) {
10885                         if (ret > 0)
10886                                 ret = 0;
10887                         break;
10888                 }
10889         }
10890         return ret;
10891 }
10892
10893 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10894 {
10895         struct rb_root dev_cache;
10896         struct cache_tree chunk_cache;
10897         struct block_group_tree block_group_cache;
10898         struct device_extent_tree dev_extent_cache;
10899         struct cache_tree extent_cache;
10900         struct cache_tree seen;
10901         struct cache_tree pending;
10902         struct cache_tree reada;
10903         struct cache_tree nodes;
10904         struct extent_io_tree excluded_extents;
10905         struct cache_tree corrupt_blocks;
10906         struct btrfs_path path;
10907         struct btrfs_key key;
10908         struct btrfs_key found_key;
10909         int ret, err = 0;
10910         struct block_info *bits;
10911         int bits_nr;
10912         struct extent_buffer *leaf;
10913         int slot;
10914         struct btrfs_root_item ri;
10915         struct list_head dropping_trees;
10916         struct list_head normal_trees;
10917         struct btrfs_root *root1;
10918         struct btrfs_root *root;
10919         u64 objectid;
10920         u8 level;
10921
10922         root = fs_info->fs_root;
10923         dev_cache = RB_ROOT;
10924         cache_tree_init(&chunk_cache);
10925         block_group_tree_init(&block_group_cache);
10926         device_extent_tree_init(&dev_extent_cache);
10927
10928         cache_tree_init(&extent_cache);
10929         cache_tree_init(&seen);
10930         cache_tree_init(&pending);
10931         cache_tree_init(&nodes);
10932         cache_tree_init(&reada);
10933         cache_tree_init(&corrupt_blocks);
10934         extent_io_tree_init(&excluded_extents);
10935         INIT_LIST_HEAD(&dropping_trees);
10936         INIT_LIST_HEAD(&normal_trees);
10937
10938         if (repair) {
10939                 fs_info->excluded_extents = &excluded_extents;
10940                 fs_info->fsck_extent_cache = &extent_cache;
10941                 fs_info->free_extent_hook = free_extent_hook;
10942                 fs_info->corrupt_blocks = &corrupt_blocks;
10943         }
10944
10945         bits_nr = 1024;
10946         bits = malloc(bits_nr * sizeof(struct block_info));
10947         if (!bits) {
10948                 perror("malloc");
10949                 exit(1);
10950         }
10951
10952         if (ctx.progress_enabled) {
10953                 ctx.tp = TASK_EXTENTS;
10954                 task_start(ctx.info);
10955         }
10956
10957 again:
10958         root1 = fs_info->tree_root;
10959         level = btrfs_header_level(root1->node);
10960         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10961                                     root1->node->start, 0, level, 0, NULL);
10962         if (ret < 0)
10963                 goto out;
10964         root1 = fs_info->chunk_root;
10965         level = btrfs_header_level(root1->node);
10966         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10967                                     root1->node->start, 0, level, 0, NULL);
10968         if (ret < 0)
10969                 goto out;
10970         btrfs_init_path(&path);
10971         key.offset = 0;
10972         key.objectid = 0;
10973         key.type = BTRFS_ROOT_ITEM_KEY;
10974         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10975         if (ret < 0)
10976                 goto out;
10977         while(1) {
10978                 leaf = path.nodes[0];
10979                 slot = path.slots[0];
10980                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10981                         ret = btrfs_next_leaf(root, &path);
10982                         if (ret != 0)
10983                                 break;
10984                         leaf = path.nodes[0];
10985                         slot = path.slots[0];
10986                 }
10987                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10988                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10989                         unsigned long offset;
10990                         u64 last_snapshot;
10991
10992                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10993                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10994                         last_snapshot = btrfs_root_last_snapshot(&ri);
10995                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10996                                 level = btrfs_root_level(&ri);
10997                                 ret = add_root_item_to_list(&normal_trees,
10998                                                 found_key.objectid,
10999                                                 btrfs_root_bytenr(&ri),
11000                                                 last_snapshot, level,
11001                                                 0, NULL);
11002                                 if (ret < 0)
11003                                         goto out;
11004                         } else {
11005                                 level = btrfs_root_level(&ri);
11006                                 objectid = found_key.objectid;
11007                                 btrfs_disk_key_to_cpu(&found_key,
11008                                                       &ri.drop_progress);
11009                                 ret = add_root_item_to_list(&dropping_trees,
11010                                                 objectid,
11011                                                 btrfs_root_bytenr(&ri),
11012                                                 last_snapshot, level,
11013                                                 ri.drop_level, &found_key);
11014                                 if (ret < 0)
11015                                         goto out;
11016                         }
11017                 }
11018                 path.slots[0]++;
11019         }
11020         btrfs_release_path(&path);
11021
11022         /*
11023          * check_block can return -EAGAIN if it fixes something, please keep
11024          * this in mind when dealing with return values from these functions, if
11025          * we get -EAGAIN we want to fall through and restart the loop.
11026          */
11027         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11028                                   &seen, &reada, &nodes, &extent_cache,
11029                                   &chunk_cache, &dev_cache, &block_group_cache,
11030                                   &dev_extent_cache);
11031         if (ret < 0) {
11032                 if (ret == -EAGAIN)
11033                         goto loop;
11034                 goto out;
11035         }
11036         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11037                                   &pending, &seen, &reada, &nodes,
11038                                   &extent_cache, &chunk_cache, &dev_cache,
11039                                   &block_group_cache, &dev_extent_cache);
11040         if (ret < 0) {
11041                 if (ret == -EAGAIN)
11042                         goto loop;
11043                 goto out;
11044         }
11045
11046         ret = check_chunks(&chunk_cache, &block_group_cache,
11047                            &dev_extent_cache, NULL, NULL, NULL, 0);
11048         if (ret) {
11049                 if (ret == -EAGAIN)
11050                         goto loop;
11051                 err = ret;
11052         }
11053
11054         ret = check_extent_refs(root, &extent_cache);
11055         if (ret < 0) {
11056                 if (ret == -EAGAIN)
11057                         goto loop;
11058                 goto out;
11059         }
11060
11061         ret = check_devices(&dev_cache, &dev_extent_cache);
11062         if (ret && err)
11063                 ret = err;
11064
11065 out:
11066         task_stop(ctx.info);
11067         if (repair) {
11068                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11069                 extent_io_tree_cleanup(&excluded_extents);
11070                 fs_info->fsck_extent_cache = NULL;
11071                 fs_info->free_extent_hook = NULL;
11072                 fs_info->corrupt_blocks = NULL;
11073                 fs_info->excluded_extents = NULL;
11074         }
11075         free(bits);
11076         free_chunk_cache_tree(&chunk_cache);
11077         free_device_cache_tree(&dev_cache);
11078         free_block_group_tree(&block_group_cache);
11079         free_device_extent_tree(&dev_extent_cache);
11080         free_extent_cache_tree(&seen);
11081         free_extent_cache_tree(&pending);
11082         free_extent_cache_tree(&reada);
11083         free_extent_cache_tree(&nodes);
11084         free_root_item_list(&normal_trees);
11085         free_root_item_list(&dropping_trees);
11086         return ret;
11087 loop:
11088         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11089         free_extent_cache_tree(&seen);
11090         free_extent_cache_tree(&pending);
11091         free_extent_cache_tree(&reada);
11092         free_extent_cache_tree(&nodes);
11093         free_chunk_cache_tree(&chunk_cache);
11094         free_block_group_tree(&block_group_cache);
11095         free_device_cache_tree(&dev_cache);
11096         free_device_extent_tree(&dev_extent_cache);
11097         free_extent_record_cache(&extent_cache);
11098         free_root_item_list(&normal_trees);
11099         free_root_item_list(&dropping_trees);
11100         extent_io_tree_cleanup(&excluded_extents);
11101         goto again;
11102 }
11103
11104 /*
11105  * Check backrefs of a tree block given by @bytenr or @eb.
11106  *
11107  * @root:       the root containing the @bytenr or @eb
11108  * @eb:         tree block extent buffer, can be NULL
11109  * @bytenr:     bytenr of the tree block to search
11110  * @level:      tree level of the tree block
11111  * @owner:      owner of the tree block
11112  *
11113  * Return >0 for any error found and output error message
11114  * Return 0 for no error found
11115  */
11116 static int check_tree_block_ref(struct btrfs_root *root,
11117                                 struct extent_buffer *eb, u64 bytenr,
11118                                 int level, u64 owner)
11119 {
11120         struct btrfs_key key;
11121         struct btrfs_root *extent_root = root->fs_info->extent_root;
11122         struct btrfs_path path;
11123         struct btrfs_extent_item *ei;
11124         struct btrfs_extent_inline_ref *iref;
11125         struct extent_buffer *leaf;
11126         unsigned long end;
11127         unsigned long ptr;
11128         int slot;
11129         int skinny_level;
11130         int type;
11131         u32 nodesize = root->fs_info->nodesize;
11132         u32 item_size;
11133         u64 offset;
11134         int tree_reloc_root = 0;
11135         int found_ref = 0;
11136         int err = 0;
11137         int ret;
11138
11139         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
11140             btrfs_header_bytenr(root->node) == bytenr)
11141                 tree_reloc_root = 1;
11142
11143         btrfs_init_path(&path);
11144         key.objectid = bytenr;
11145         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11146                 key.type = BTRFS_METADATA_ITEM_KEY;
11147         else
11148                 key.type = BTRFS_EXTENT_ITEM_KEY;
11149         key.offset = (u64)-1;
11150
11151         /* Search for the backref in extent tree */
11152         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11153         if (ret < 0) {
11154                 err |= BACKREF_MISSING;
11155                 goto out;
11156         }
11157         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11158         if (ret) {
11159                 err |= BACKREF_MISSING;
11160                 goto out;
11161         }
11162
11163         leaf = path.nodes[0];
11164         slot = path.slots[0];
11165         btrfs_item_key_to_cpu(leaf, &key, slot);
11166
11167         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11168
11169         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11170                 skinny_level = (int)key.offset;
11171                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11172         } else {
11173                 struct btrfs_tree_block_info *info;
11174
11175                 info = (struct btrfs_tree_block_info *)(ei + 1);
11176                 skinny_level = btrfs_tree_block_level(leaf, info);
11177                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11178         }
11179
11180         if (eb) {
11181                 u64 header_gen;
11182                 u64 extent_gen;
11183
11184                 if (!(btrfs_extent_flags(leaf, ei) &
11185                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11186                         error(
11187                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11188                                 key.objectid, nodesize,
11189                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11190                         err = BACKREF_MISMATCH;
11191                 }
11192                 header_gen = btrfs_header_generation(eb);
11193                 extent_gen = btrfs_extent_generation(leaf, ei);
11194                 if (header_gen != extent_gen) {
11195                         error(
11196         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11197                                 key.objectid, nodesize, header_gen,
11198                                 extent_gen);
11199                         err = BACKREF_MISMATCH;
11200                 }
11201                 if (level != skinny_level) {
11202                         error(
11203                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11204                                 key.objectid, nodesize, level, skinny_level);
11205                         err = BACKREF_MISMATCH;
11206                 }
11207                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11208                         error(
11209                         "extent[%llu %u] is referred by other roots than %llu",
11210                                 key.objectid, nodesize, root->objectid);
11211                         err = BACKREF_MISMATCH;
11212                 }
11213         }
11214
11215         /*
11216          * Iterate the extent/metadata item to find the exact backref
11217          */
11218         item_size = btrfs_item_size_nr(leaf, slot);
11219         ptr = (unsigned long)iref;
11220         end = (unsigned long)ei + item_size;
11221         while (ptr < end) {
11222                 iref = (struct btrfs_extent_inline_ref *)ptr;
11223                 type = btrfs_extent_inline_ref_type(leaf, iref);
11224                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11225
11226                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11227                         (offset == root->objectid || offset == owner)) {
11228                         found_ref = 1;
11229                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11230                         /*
11231                          * Backref of tree reloc root points to itself, no need
11232                          * to check backref any more.
11233                          */
11234                         if (tree_reloc_root)
11235                                 found_ref = 1;
11236                         else
11237                         /* Check if the backref points to valid referencer */
11238                                 found_ref = !check_tree_block_ref(root, NULL,
11239                                                 offset, level + 1, owner);
11240                 }
11241
11242                 if (found_ref)
11243                         break;
11244                 ptr += btrfs_extent_inline_ref_size(type);
11245         }
11246
11247         /*
11248          * Inlined extent item doesn't have what we need, check
11249          * TREE_BLOCK_REF_KEY
11250          */
11251         if (!found_ref) {
11252                 btrfs_release_path(&path);
11253                 key.objectid = bytenr;
11254                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11255                 key.offset = root->objectid;
11256
11257                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11258                 if (!ret)
11259                         found_ref = 1;
11260         }
11261         if (!found_ref)
11262                 err |= BACKREF_MISSING;
11263 out:
11264         btrfs_release_path(&path);
11265         if (eb && (err & BACKREF_MISSING))
11266                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
11267                         bytenr, nodesize, owner, level);
11268         return err;
11269 }
11270
11271 /*
11272  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11273  *
11274  * Return >0 any error found and output error message
11275  * Return 0 for no error found
11276  */
11277 static int check_extent_data_item(struct btrfs_root *root,
11278                                   struct extent_buffer *eb, int slot)
11279 {
11280         struct btrfs_file_extent_item *fi;
11281         struct btrfs_path path;
11282         struct btrfs_root *extent_root = root->fs_info->extent_root;
11283         struct btrfs_key fi_key;
11284         struct btrfs_key dbref_key;
11285         struct extent_buffer *leaf;
11286         struct btrfs_extent_item *ei;
11287         struct btrfs_extent_inline_ref *iref;
11288         struct btrfs_extent_data_ref *dref;
11289         u64 owner;
11290         u64 disk_bytenr;
11291         u64 disk_num_bytes;
11292         u64 extent_num_bytes;
11293         u64 extent_flags;
11294         u32 item_size;
11295         unsigned long end;
11296         unsigned long ptr;
11297         int type;
11298         u64 ref_root;
11299         int found_dbackref = 0;
11300         int err = 0;
11301         int ret;
11302
11303         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11304         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11305
11306         /* Nothing to check for hole and inline data extents */
11307         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11308             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11309                 return 0;
11310
11311         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11312         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11313         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11314
11315         /* Check unaligned disk_num_bytes and num_bytes */
11316         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11317                 error(
11318 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11319                         fi_key.objectid, fi_key.offset, disk_num_bytes,
11320                         root->fs_info->sectorsize);
11321                 err |= BYTES_UNALIGNED;
11322         } else {
11323                 data_bytes_allocated += disk_num_bytes;
11324         }
11325         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11326                 error(
11327 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11328                         fi_key.objectid, fi_key.offset, extent_num_bytes,
11329                         root->fs_info->sectorsize);
11330                 err |= BYTES_UNALIGNED;
11331         } else {
11332                 data_bytes_referenced += extent_num_bytes;
11333         }
11334         owner = btrfs_header_owner(eb);
11335
11336         /* Check the extent item of the file extent in extent tree */
11337         btrfs_init_path(&path);
11338         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11339         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11340         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11341
11342         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11343         if (ret)
11344                 goto out;
11345
11346         leaf = path.nodes[0];
11347         slot = path.slots[0];
11348         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11349
11350         extent_flags = btrfs_extent_flags(leaf, ei);
11351
11352         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11353                 error(
11354                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11355                     disk_bytenr, disk_num_bytes,
11356                     BTRFS_EXTENT_FLAG_DATA);
11357                 err |= BACKREF_MISMATCH;
11358         }
11359
11360         /* Check data backref inside that extent item */
11361         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11362         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11363         ptr = (unsigned long)iref;
11364         end = (unsigned long)ei + item_size;
11365         while (ptr < end) {
11366                 iref = (struct btrfs_extent_inline_ref *)ptr;
11367                 type = btrfs_extent_inline_ref_type(leaf, iref);
11368                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11369
11370                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11371                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
11372                         if (ref_root == owner || ref_root == root->objectid)
11373                                 found_dbackref = 1;
11374                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11375                         found_dbackref = !check_tree_block_ref(root, NULL,
11376                                 btrfs_extent_inline_ref_offset(leaf, iref),
11377                                 0, owner);
11378                 }
11379
11380                 if (found_dbackref)
11381                         break;
11382                 ptr += btrfs_extent_inline_ref_size(type);
11383         }
11384
11385         if (!found_dbackref) {
11386                 btrfs_release_path(&path);
11387
11388                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11389                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11390                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11391                 dbref_key.offset = hash_extent_data_ref(root->objectid,
11392                                 fi_key.objectid, fi_key.offset);
11393
11394                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11395                                         &dbref_key, &path, 0, 0);
11396                 if (!ret) {
11397                         found_dbackref = 1;
11398                         goto out;
11399                 }
11400
11401                 btrfs_release_path(&path);
11402
11403                 /*
11404                  * Neither inlined nor EXTENT_DATA_REF found, try
11405                  * SHARED_DATA_REF as last chance.
11406                  */
11407                 dbref_key.objectid = disk_bytenr;
11408                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11409                 dbref_key.offset = eb->start;
11410
11411                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11412                                         &dbref_key, &path, 0, 0);
11413                 if (!ret) {
11414                         found_dbackref = 1;
11415                         goto out;
11416                 }
11417         }
11418
11419 out:
11420         if (!found_dbackref)
11421                 err |= BACKREF_MISSING;
11422         btrfs_release_path(&path);
11423         if (err & BACKREF_MISSING) {
11424                 error("data extent[%llu %llu] backref lost",
11425                       disk_bytenr, disk_num_bytes);
11426         }
11427         return err;
11428 }
11429
11430 /*
11431  * Get real tree block level for the case like shared block
11432  * Return >= 0 as tree level
11433  * Return <0 for error
11434  */
11435 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11436 {
11437         struct extent_buffer *eb;
11438         struct btrfs_path path;
11439         struct btrfs_key key;
11440         struct btrfs_extent_item *ei;
11441         u64 flags;
11442         u64 transid;
11443         u8 backref_level;
11444         u8 header_level;
11445         int ret;
11446
11447         /* Search extent tree for extent generation and level */
11448         key.objectid = bytenr;
11449         key.type = BTRFS_METADATA_ITEM_KEY;
11450         key.offset = (u64)-1;
11451
11452         btrfs_init_path(&path);
11453         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11454         if (ret < 0)
11455                 goto release_out;
11456         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11457         if (ret < 0)
11458                 goto release_out;
11459         if (ret > 0) {
11460                 ret = -ENOENT;
11461                 goto release_out;
11462         }
11463
11464         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11465         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11466                             struct btrfs_extent_item);
11467         flags = btrfs_extent_flags(path.nodes[0], ei);
11468         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11469                 ret = -ENOENT;
11470                 goto release_out;
11471         }
11472
11473         /* Get transid for later read_tree_block() check */
11474         transid = btrfs_extent_generation(path.nodes[0], ei);
11475
11476         /* Get backref level as one source */
11477         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11478                 backref_level = key.offset;
11479         } else {
11480                 struct btrfs_tree_block_info *info;
11481
11482                 info = (struct btrfs_tree_block_info *)(ei + 1);
11483                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11484         }
11485         btrfs_release_path(&path);
11486
11487         /* Get level from tree block as an alternative source */
11488         eb = read_tree_block(fs_info, bytenr, transid);
11489         if (!extent_buffer_uptodate(eb)) {
11490                 free_extent_buffer(eb);
11491                 return -EIO;
11492         }
11493         header_level = btrfs_header_level(eb);
11494         free_extent_buffer(eb);
11495
11496         if (header_level != backref_level)
11497                 return -EIO;
11498         return header_level;
11499
11500 release_out:
11501         btrfs_release_path(&path);
11502         return ret;
11503 }
11504
11505 /*
11506  * Check if a tree block backref is valid (points to a valid tree block)
11507  * if level == -1, level will be resolved
11508  * Return >0 for any error found and print error message
11509  */
11510 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11511                                     u64 bytenr, int level)
11512 {
11513         struct btrfs_root *root;
11514         struct btrfs_key key;
11515         struct btrfs_path path;
11516         struct extent_buffer *eb;
11517         struct extent_buffer *node;
11518         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11519         int err = 0;
11520         int ret;
11521
11522         /* Query level for level == -1 special case */
11523         if (level == -1)
11524                 level = query_tree_block_level(fs_info, bytenr);
11525         if (level < 0) {
11526                 err |= REFERENCER_MISSING;
11527                 goto out;
11528         }
11529
11530         key.objectid = root_id;
11531         key.type = BTRFS_ROOT_ITEM_KEY;
11532         key.offset = (u64)-1;
11533
11534         root = btrfs_read_fs_root(fs_info, &key);
11535         if (IS_ERR(root)) {
11536                 err |= REFERENCER_MISSING;
11537                 goto out;
11538         }
11539
11540         /* Read out the tree block to get item/node key */
11541         eb = read_tree_block(fs_info, bytenr, 0);
11542         if (!extent_buffer_uptodate(eb)) {
11543                 err |= REFERENCER_MISSING;
11544                 free_extent_buffer(eb);
11545                 goto out;
11546         }
11547
11548         /* Empty tree, no need to check key */
11549         if (!btrfs_header_nritems(eb) && !level) {
11550                 free_extent_buffer(eb);
11551                 goto out;
11552         }
11553
11554         if (level)
11555                 btrfs_node_key_to_cpu(eb, &key, 0);
11556         else
11557                 btrfs_item_key_to_cpu(eb, &key, 0);
11558
11559         free_extent_buffer(eb);
11560
11561         btrfs_init_path(&path);
11562         path.lowest_level = level;
11563         /* Search with the first key, to ensure we can reach it */
11564         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11565         if (ret < 0) {
11566                 err |= REFERENCER_MISSING;
11567                 goto release_out;
11568         }
11569
11570         node = path.nodes[level];
11571         if (btrfs_header_bytenr(node) != bytenr) {
11572                 error(
11573         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11574                         bytenr, nodesize, bytenr,
11575                         btrfs_header_bytenr(node));
11576                 err |= REFERENCER_MISMATCH;
11577         }
11578         if (btrfs_header_level(node) != level) {
11579                 error(
11580         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11581                         bytenr, nodesize, level,
11582                         btrfs_header_level(node));
11583                 err |= REFERENCER_MISMATCH;
11584         }
11585
11586 release_out:
11587         btrfs_release_path(&path);
11588 out:
11589         if (err & REFERENCER_MISSING) {
11590                 if (level < 0)
11591                         error("extent [%llu %d] lost referencer (owner: %llu)",
11592                                 bytenr, nodesize, root_id);
11593                 else
11594                         error(
11595                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11596                                 bytenr, nodesize, root_id, level);
11597         }
11598
11599         return err;
11600 }
11601
11602 /*
11603  * Check if tree block @eb is tree reloc root.
11604  * Return 0 if it's not or any problem happens
11605  * Return 1 if it's a tree reloc root
11606  */
11607 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11608                                  struct extent_buffer *eb)
11609 {
11610         struct btrfs_root *tree_reloc_root;
11611         struct btrfs_key key;
11612         u64 bytenr = btrfs_header_bytenr(eb);
11613         u64 owner = btrfs_header_owner(eb);
11614         int ret = 0;
11615
11616         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11617         key.offset = owner;
11618         key.type = BTRFS_ROOT_ITEM_KEY;
11619
11620         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11621         if (IS_ERR(tree_reloc_root))
11622                 return 0;
11623
11624         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11625                 ret = 1;
11626         btrfs_free_fs_root(tree_reloc_root);
11627         return ret;
11628 }
11629
11630 /*
11631  * Check referencer for shared block backref
11632  * If level == -1, this function will resolve the level.
11633  */
11634 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11635                                      u64 parent, u64 bytenr, int level)
11636 {
11637         struct extent_buffer *eb;
11638         u32 nr;
11639         int found_parent = 0;
11640         int i;
11641
11642         eb = read_tree_block(fs_info, parent, 0);
11643         if (!extent_buffer_uptodate(eb))
11644                 goto out;
11645
11646         if (level == -1)
11647                 level = query_tree_block_level(fs_info, bytenr);
11648         if (level < 0)
11649                 goto out;
11650
11651         /* It's possible it's a tree reloc root */
11652         if (parent == bytenr) {
11653                 if (is_tree_reloc_root(fs_info, eb))
11654                         found_parent = 1;
11655                 goto out;
11656         }
11657
11658         if (level + 1 != btrfs_header_level(eb))
11659                 goto out;
11660
11661         nr = btrfs_header_nritems(eb);
11662         for (i = 0; i < nr; i++) {
11663                 if (bytenr == btrfs_node_blockptr(eb, i)) {
11664                         found_parent = 1;
11665                         break;
11666                 }
11667         }
11668 out:
11669         free_extent_buffer(eb);
11670         if (!found_parent) {
11671                 error(
11672         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11673                         bytenr, fs_info->nodesize, parent, level);
11674                 return REFERENCER_MISSING;
11675         }
11676         return 0;
11677 }
11678
11679 /*
11680  * Check referencer for normal (inlined) data ref
11681  * If len == 0, it will be resolved by searching in extent tree
11682  */
11683 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11684                                      u64 root_id, u64 objectid, u64 offset,
11685                                      u64 bytenr, u64 len, u32 count)
11686 {
11687         struct btrfs_root *root;
11688         struct btrfs_root *extent_root = fs_info->extent_root;
11689         struct btrfs_key key;
11690         struct btrfs_path path;
11691         struct extent_buffer *leaf;
11692         struct btrfs_file_extent_item *fi;
11693         u32 found_count = 0;
11694         int slot;
11695         int ret = 0;
11696
11697         if (!len) {
11698                 key.objectid = bytenr;
11699                 key.type = BTRFS_EXTENT_ITEM_KEY;
11700                 key.offset = (u64)-1;
11701
11702                 btrfs_init_path(&path);
11703                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11704                 if (ret < 0)
11705                         goto out;
11706                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11707                 if (ret)
11708                         goto out;
11709                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11710                 if (key.objectid != bytenr ||
11711                     key.type != BTRFS_EXTENT_ITEM_KEY)
11712                         goto out;
11713                 len = key.offset;
11714                 btrfs_release_path(&path);
11715         }
11716         key.objectid = root_id;
11717         key.type = BTRFS_ROOT_ITEM_KEY;
11718         key.offset = (u64)-1;
11719         btrfs_init_path(&path);
11720
11721         root = btrfs_read_fs_root(fs_info, &key);
11722         if (IS_ERR(root))
11723                 goto out;
11724
11725         key.objectid = objectid;
11726         key.type = BTRFS_EXTENT_DATA_KEY;
11727         /*
11728          * It can be nasty as data backref offset is
11729          * file offset - file extent offset, which is smaller or
11730          * equal to original backref offset.  The only special case is
11731          * overflow.  So we need to special check and do further search.
11732          */
11733         key.offset = offset & (1ULL << 63) ? 0 : offset;
11734
11735         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11736         if (ret < 0)
11737                 goto out;
11738
11739         /*
11740          * Search afterwards to get correct one
11741          * NOTE: As we must do a comprehensive check on the data backref to
11742          * make sure the dref count also matches, we must iterate all file
11743          * extents for that inode.
11744          */
11745         while (1) {
11746                 leaf = path.nodes[0];
11747                 slot = path.slots[0];
11748
11749                 if (slot >= btrfs_header_nritems(leaf))
11750                         goto next;
11751                 btrfs_item_key_to_cpu(leaf, &key, slot);
11752                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11753                         break;
11754                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11755                 /*
11756                  * Except normal disk bytenr and disk num bytes, we still
11757                  * need to do extra check on dbackref offset as
11758                  * dbackref offset = file_offset - file_extent_offset
11759                  */
11760                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11761                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11762                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11763                     offset)
11764                         found_count++;
11765
11766 next:
11767                 ret = btrfs_next_item(root, &path);
11768                 if (ret)
11769                         break;
11770         }
11771 out:
11772         btrfs_release_path(&path);
11773         if (found_count != count) {
11774                 error(
11775 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11776                         bytenr, len, root_id, objectid, offset, count, found_count);
11777                 return REFERENCER_MISSING;
11778         }
11779         return 0;
11780 }
11781
11782 /*
11783  * Check if the referencer of a shared data backref exists
11784  */
11785 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11786                                      u64 parent, u64 bytenr)
11787 {
11788         struct extent_buffer *eb;
11789         struct btrfs_key key;
11790         struct btrfs_file_extent_item *fi;
11791         u32 nr;
11792         int found_parent = 0;
11793         int i;
11794
11795         eb = read_tree_block(fs_info, parent, 0);
11796         if (!extent_buffer_uptodate(eb))
11797                 goto out;
11798
11799         nr = btrfs_header_nritems(eb);
11800         for (i = 0; i < nr; i++) {
11801                 btrfs_item_key_to_cpu(eb, &key, i);
11802                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11803                         continue;
11804
11805                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11806                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11807                         continue;
11808
11809                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11810                         found_parent = 1;
11811                         break;
11812                 }
11813         }
11814
11815 out:
11816         free_extent_buffer(eb);
11817         if (!found_parent) {
11818                 error("shared extent %llu referencer lost (parent: %llu)",
11819                         bytenr, parent);
11820                 return REFERENCER_MISSING;
11821         }
11822         return 0;
11823 }
11824
11825 /*
11826  * This function will check a given extent item, including its backref and
11827  * itself (like crossing stripe boundary and type)
11828  *
11829  * Since we don't use extent_record anymore, introduce new error bit
11830  */
11831 static int check_extent_item(struct btrfs_fs_info *fs_info,
11832                              struct extent_buffer *eb, int slot)
11833 {
11834         struct btrfs_extent_item *ei;
11835         struct btrfs_extent_inline_ref *iref;
11836         struct btrfs_extent_data_ref *dref;
11837         unsigned long end;
11838         unsigned long ptr;
11839         int type;
11840         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11841         u32 item_size = btrfs_item_size_nr(eb, slot);
11842         u64 flags;
11843         u64 offset;
11844         int metadata = 0;
11845         int level;
11846         struct btrfs_key key;
11847         int ret;
11848         int err = 0;
11849
11850         btrfs_item_key_to_cpu(eb, &key, slot);
11851         if (key.type == BTRFS_EXTENT_ITEM_KEY)
11852                 bytes_used += key.offset;
11853         else
11854                 bytes_used += nodesize;
11855
11856         if (item_size < sizeof(*ei)) {
11857                 /*
11858                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11859                  * old thing when on disk format is still un-determined.
11860                  * No need to care about it anymore
11861                  */
11862                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11863                 return -ENOTTY;
11864         }
11865
11866         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11867         flags = btrfs_extent_flags(eb, ei);
11868
11869         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11870                 metadata = 1;
11871         if (metadata && check_crossing_stripes(global_info, key.objectid,
11872                                                eb->len)) {
11873                 error("bad metadata [%llu, %llu) crossing stripe boundary",
11874                       key.objectid, key.objectid + nodesize);
11875                 err |= CROSSING_STRIPE_BOUNDARY;
11876         }
11877
11878         ptr = (unsigned long)(ei + 1);
11879
11880         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11881                 /* Old EXTENT_ITEM metadata */
11882                 struct btrfs_tree_block_info *info;
11883
11884                 info = (struct btrfs_tree_block_info *)ptr;
11885                 level = btrfs_tree_block_level(eb, info);
11886                 ptr += sizeof(struct btrfs_tree_block_info);
11887         } else {
11888                 /* New METADATA_ITEM */
11889                 level = key.offset;
11890         }
11891         end = (unsigned long)ei + item_size;
11892
11893 next:
11894         /* Reached extent item end normally */
11895         if (ptr == end)
11896                 goto out;
11897
11898         /* Beyond extent item end, wrong item size */
11899         if (ptr > end) {
11900                 err |= ITEM_SIZE_MISMATCH;
11901                 error("extent item at bytenr %llu slot %d has wrong size",
11902                         eb->start, slot);
11903                 goto out;
11904         }
11905
11906         /* Now check every backref in this extent item */
11907         iref = (struct btrfs_extent_inline_ref *)ptr;
11908         type = btrfs_extent_inline_ref_type(eb, iref);
11909         offset = btrfs_extent_inline_ref_offset(eb, iref);
11910         switch (type) {
11911         case BTRFS_TREE_BLOCK_REF_KEY:
11912                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11913                                                level);
11914                 err |= ret;
11915                 break;
11916         case BTRFS_SHARED_BLOCK_REF_KEY:
11917                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11918                                                  level);
11919                 err |= ret;
11920                 break;
11921         case BTRFS_EXTENT_DATA_REF_KEY:
11922                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11923                 ret = check_extent_data_backref(fs_info,
11924                                 btrfs_extent_data_ref_root(eb, dref),
11925                                 btrfs_extent_data_ref_objectid(eb, dref),
11926                                 btrfs_extent_data_ref_offset(eb, dref),
11927                                 key.objectid, key.offset,
11928                                 btrfs_extent_data_ref_count(eb, dref));
11929                 err |= ret;
11930                 break;
11931         case BTRFS_SHARED_DATA_REF_KEY:
11932                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11933                 err |= ret;
11934                 break;
11935         default:
11936                 error("extent[%llu %d %llu] has unknown ref type: %d",
11937                         key.objectid, key.type, key.offset, type);
11938                 err |= UNKNOWN_TYPE;
11939                 goto out;
11940         }
11941
11942         ptr += btrfs_extent_inline_ref_size(type);
11943         goto next;
11944
11945 out:
11946         return err;
11947 }
11948
11949 /*
11950  * Check if a dev extent item is referred correctly by its chunk
11951  */
11952 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11953                                  struct extent_buffer *eb, int slot)
11954 {
11955         struct btrfs_root *chunk_root = fs_info->chunk_root;
11956         struct btrfs_dev_extent *ptr;
11957         struct btrfs_path path;
11958         struct btrfs_key chunk_key;
11959         struct btrfs_key devext_key;
11960         struct btrfs_chunk *chunk;
11961         struct extent_buffer *l;
11962         int num_stripes;
11963         u64 length;
11964         int i;
11965         int found_chunk = 0;
11966         int ret;
11967
11968         btrfs_item_key_to_cpu(eb, &devext_key, slot);
11969         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11970         length = btrfs_dev_extent_length(eb, ptr);
11971
11972         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11973         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11974         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11975
11976         btrfs_init_path(&path);
11977         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11978         if (ret)
11979                 goto out;
11980
11981         l = path.nodes[0];
11982         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11983         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11984                                       chunk_key.offset);
11985         if (ret < 0)
11986                 goto out;
11987
11988         if (btrfs_stripe_length(fs_info, l, chunk) != length)
11989                 goto out;
11990
11991         num_stripes = btrfs_chunk_num_stripes(l, chunk);
11992         for (i = 0; i < num_stripes; i++) {
11993                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11994                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11995
11996                 if (devid == devext_key.objectid &&
11997                     offset == devext_key.offset) {
11998                         found_chunk = 1;
11999                         break;
12000                 }
12001         }
12002 out:
12003         btrfs_release_path(&path);
12004         if (!found_chunk) {
12005                 error(
12006                 "device extent[%llu, %llu, %llu] did not find the related chunk",
12007                         devext_key.objectid, devext_key.offset, length);
12008                 return REFERENCER_MISSING;
12009         }
12010         return 0;
12011 }
12012
12013 /*
12014  * Check if the used space is correct with the dev item
12015  */
12016 static int check_dev_item(struct btrfs_fs_info *fs_info,
12017                           struct extent_buffer *eb, int slot)
12018 {
12019         struct btrfs_root *dev_root = fs_info->dev_root;
12020         struct btrfs_dev_item *dev_item;
12021         struct btrfs_path path;
12022         struct btrfs_key key;
12023         struct btrfs_dev_extent *ptr;
12024         u64 dev_id;
12025         u64 used;
12026         u64 total = 0;
12027         int ret;
12028
12029         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12030         dev_id = btrfs_device_id(eb, dev_item);
12031         used = btrfs_device_bytes_used(eb, dev_item);
12032
12033         key.objectid = dev_id;
12034         key.type = BTRFS_DEV_EXTENT_KEY;
12035         key.offset = 0;
12036
12037         btrfs_init_path(&path);
12038         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12039         if (ret < 0) {
12040                 btrfs_item_key_to_cpu(eb, &key, slot);
12041                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12042                         key.objectid, key.type, key.offset);
12043                 btrfs_release_path(&path);
12044                 return REFERENCER_MISSING;
12045         }
12046
12047         /* Iterate dev_extents to calculate the used space of a device */
12048         while (1) {
12049                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12050                         goto next;
12051
12052                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12053                 if (key.objectid > dev_id)
12054                         break;
12055                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12056                         goto next;
12057
12058                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12059                                      struct btrfs_dev_extent);
12060                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12061 next:
12062                 ret = btrfs_next_item(dev_root, &path);
12063                 if (ret)
12064                         break;
12065         }
12066         btrfs_release_path(&path);
12067
12068         if (used != total) {
12069                 btrfs_item_key_to_cpu(eb, &key, slot);
12070                 error(
12071 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12072                         total, used, BTRFS_ROOT_TREE_OBJECTID,
12073                         BTRFS_DEV_EXTENT_KEY, dev_id);
12074                 return ACCOUNTING_MISMATCH;
12075         }
12076         return 0;
12077 }
12078
12079 /*
12080  * Check a block group item with its referener (chunk) and its used space
12081  * with extent/metadata item
12082  */
12083 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12084                                   struct extent_buffer *eb, int slot)
12085 {
12086         struct btrfs_root *extent_root = fs_info->extent_root;
12087         struct btrfs_root *chunk_root = fs_info->chunk_root;
12088         struct btrfs_block_group_item *bi;
12089         struct btrfs_block_group_item bg_item;
12090         struct btrfs_path path;
12091         struct btrfs_key bg_key;
12092         struct btrfs_key chunk_key;
12093         struct btrfs_key extent_key;
12094         struct btrfs_chunk *chunk;
12095         struct extent_buffer *leaf;
12096         struct btrfs_extent_item *ei;
12097         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12098         u64 flags;
12099         u64 bg_flags;
12100         u64 used;
12101         u64 total = 0;
12102         int ret;
12103         int err = 0;
12104
12105         btrfs_item_key_to_cpu(eb, &bg_key, slot);
12106         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12107         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12108         used = btrfs_block_group_used(&bg_item);
12109         bg_flags = btrfs_block_group_flags(&bg_item);
12110
12111         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12112         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12113         chunk_key.offset = bg_key.objectid;
12114
12115         btrfs_init_path(&path);
12116         /* Search for the referencer chunk */
12117         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12118         if (ret) {
12119                 error(
12120                 "block group[%llu %llu] did not find the related chunk item",
12121                         bg_key.objectid, bg_key.offset);
12122                 err |= REFERENCER_MISSING;
12123         } else {
12124                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12125                                         struct btrfs_chunk);
12126                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12127                                                 bg_key.offset) {
12128                         error(
12129         "block group[%llu %llu] related chunk item length does not match",
12130                                 bg_key.objectid, bg_key.offset);
12131                         err |= REFERENCER_MISMATCH;
12132                 }
12133         }
12134         btrfs_release_path(&path);
12135
12136         /* Search from the block group bytenr */
12137         extent_key.objectid = bg_key.objectid;
12138         extent_key.type = 0;
12139         extent_key.offset = 0;
12140
12141         btrfs_init_path(&path);
12142         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12143         if (ret < 0)
12144                 goto out;
12145
12146         /* Iterate extent tree to account used space */
12147         while (1) {
12148                 leaf = path.nodes[0];
12149
12150                 /* Search slot can point to the last item beyond leaf nritems */
12151                 if (path.slots[0] >= btrfs_header_nritems(leaf))
12152                         goto next;
12153
12154                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12155                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12156                         break;
12157
12158                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12159                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12160                         goto next;
12161                 if (extent_key.objectid < bg_key.objectid)
12162                         goto next;
12163
12164                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12165                         total += nodesize;
12166                 else
12167                         total += extent_key.offset;
12168
12169                 ei = btrfs_item_ptr(leaf, path.slots[0],
12170                                     struct btrfs_extent_item);
12171                 flags = btrfs_extent_flags(leaf, ei);
12172                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12173                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12174                                 error(
12175                         "bad extent[%llu, %llu) type mismatch with chunk",
12176                                         extent_key.objectid,
12177                                         extent_key.objectid + extent_key.offset);
12178                                 err |= CHUNK_TYPE_MISMATCH;
12179                         }
12180                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12181                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12182                                     BTRFS_BLOCK_GROUP_METADATA))) {
12183                                 error(
12184                         "bad extent[%llu, %llu) type mismatch with chunk",
12185                                         extent_key.objectid,
12186                                         extent_key.objectid + nodesize);
12187                                 err |= CHUNK_TYPE_MISMATCH;
12188                         }
12189                 }
12190 next:
12191                 ret = btrfs_next_item(extent_root, &path);
12192                 if (ret)
12193                         break;
12194         }
12195
12196 out:
12197         btrfs_release_path(&path);
12198
12199         if (total != used) {
12200                 error(
12201                 "block group[%llu %llu] used %llu but extent items used %llu",
12202                         bg_key.objectid, bg_key.offset, used, total);
12203                 err |= ACCOUNTING_MISMATCH;
12204         }
12205         return err;
12206 }
12207
12208 /*
12209  * Check a chunk item.
12210  * Including checking all referred dev_extents and block group
12211  */
12212 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12213                             struct extent_buffer *eb, int slot)
12214 {
12215         struct btrfs_root *extent_root = fs_info->extent_root;
12216         struct btrfs_root *dev_root = fs_info->dev_root;
12217         struct btrfs_path path;
12218         struct btrfs_key chunk_key;
12219         struct btrfs_key bg_key;
12220         struct btrfs_key devext_key;
12221         struct btrfs_chunk *chunk;
12222         struct extent_buffer *leaf;
12223         struct btrfs_block_group_item *bi;
12224         struct btrfs_block_group_item bg_item;
12225         struct btrfs_dev_extent *ptr;
12226         u64 length;
12227         u64 chunk_end;
12228         u64 stripe_len;
12229         u64 type;
12230         int num_stripes;
12231         u64 offset;
12232         u64 objectid;
12233         int i;
12234         int ret;
12235         int err = 0;
12236
12237         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12238         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12239         length = btrfs_chunk_length(eb, chunk);
12240         chunk_end = chunk_key.offset + length;
12241         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12242                                       chunk_key.offset);
12243         if (ret < 0) {
12244                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12245                         chunk_end);
12246                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12247                 goto out;
12248         }
12249         type = btrfs_chunk_type(eb, chunk);
12250
12251         bg_key.objectid = chunk_key.offset;
12252         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12253         bg_key.offset = length;
12254
12255         btrfs_init_path(&path);
12256         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12257         if (ret) {
12258                 error(
12259                 "chunk[%llu %llu) did not find the related block group item",
12260                         chunk_key.offset, chunk_end);
12261                 err |= REFERENCER_MISSING;
12262         } else{
12263                 leaf = path.nodes[0];
12264                 bi = btrfs_item_ptr(leaf, path.slots[0],
12265                                     struct btrfs_block_group_item);
12266                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12267                                    sizeof(bg_item));
12268                 if (btrfs_block_group_flags(&bg_item) != type) {
12269                         error(
12270 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12271                                 chunk_key.offset, chunk_end, type,
12272                                 btrfs_block_group_flags(&bg_item));
12273                         err |= REFERENCER_MISSING;
12274                 }
12275         }
12276
12277         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12278         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12279         for (i = 0; i < num_stripes; i++) {
12280                 btrfs_release_path(&path);
12281                 btrfs_init_path(&path);
12282                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12283                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12284                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12285
12286                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12287                                         0, 0);
12288                 if (ret)
12289                         goto not_match_dev;
12290
12291                 leaf = path.nodes[0];
12292                 ptr = btrfs_item_ptr(leaf, path.slots[0],
12293                                      struct btrfs_dev_extent);
12294                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12295                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12296                 if (objectid != chunk_key.objectid ||
12297                     offset != chunk_key.offset ||
12298                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12299                         goto not_match_dev;
12300                 continue;
12301 not_match_dev:
12302                 err |= BACKREF_MISSING;
12303                 error(
12304                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12305                         chunk_key.objectid, chunk_end, i);
12306                 continue;
12307         }
12308         btrfs_release_path(&path);
12309 out:
12310         return err;
12311 }
12312
12313 /*
12314  * Main entry function to check known items and update related accounting info
12315  */
12316 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
12317 {
12318         struct btrfs_fs_info *fs_info = root->fs_info;
12319         struct btrfs_key key;
12320         int slot = 0;
12321         int type;
12322         struct btrfs_extent_data_ref *dref;
12323         int ret;
12324         int err = 0;
12325
12326 next:
12327         btrfs_item_key_to_cpu(eb, &key, slot);
12328         type = key.type;
12329
12330         switch (type) {
12331         case BTRFS_EXTENT_DATA_KEY:
12332                 ret = check_extent_data_item(root, eb, slot);
12333                 err |= ret;
12334                 break;
12335         case BTRFS_BLOCK_GROUP_ITEM_KEY:
12336                 ret = check_block_group_item(fs_info, eb, slot);
12337                 err |= ret;
12338                 break;
12339         case BTRFS_DEV_ITEM_KEY:
12340                 ret = check_dev_item(fs_info, eb, slot);
12341                 err |= ret;
12342                 break;
12343         case BTRFS_CHUNK_ITEM_KEY:
12344                 ret = check_chunk_item(fs_info, eb, slot);
12345                 err |= ret;
12346                 break;
12347         case BTRFS_DEV_EXTENT_KEY:
12348                 ret = check_dev_extent_item(fs_info, eb, slot);
12349                 err |= ret;
12350                 break;
12351         case BTRFS_EXTENT_ITEM_KEY:
12352         case BTRFS_METADATA_ITEM_KEY:
12353                 ret = check_extent_item(fs_info, eb, slot);
12354                 err |= ret;
12355                 break;
12356         case BTRFS_EXTENT_CSUM_KEY:
12357                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12358                 break;
12359         case BTRFS_TREE_BLOCK_REF_KEY:
12360                 ret = check_tree_block_backref(fs_info, key.offset,
12361                                                key.objectid, -1);
12362                 err |= ret;
12363                 break;
12364         case BTRFS_EXTENT_DATA_REF_KEY:
12365                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12366                 ret = check_extent_data_backref(fs_info,
12367                                 btrfs_extent_data_ref_root(eb, dref),
12368                                 btrfs_extent_data_ref_objectid(eb, dref),
12369                                 btrfs_extent_data_ref_offset(eb, dref),
12370                                 key.objectid, 0,
12371                                 btrfs_extent_data_ref_count(eb, dref));
12372                 err |= ret;
12373                 break;
12374         case BTRFS_SHARED_BLOCK_REF_KEY:
12375                 ret = check_shared_block_backref(fs_info, key.offset,
12376                                                  key.objectid, -1);
12377                 err |= ret;
12378                 break;
12379         case BTRFS_SHARED_DATA_REF_KEY:
12380                 ret = check_shared_data_backref(fs_info, key.offset,
12381                                                 key.objectid);
12382                 err |= ret;
12383                 break;
12384         default:
12385                 break;
12386         }
12387
12388         if (++slot < btrfs_header_nritems(eb))
12389                 goto next;
12390
12391         return err;
12392 }
12393
12394 /*
12395  * Helper function for later fs/subvol tree check.  To determine if a tree
12396  * block should be checked.
12397  * This function will ensure only the direct referencer with lowest rootid to
12398  * check a fs/subvolume tree block.
12399  *
12400  * Backref check at extent tree would detect errors like missing subvolume
12401  * tree, so we can do aggressive check to reduce duplicated checks.
12402  */
12403 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
12404 {
12405         struct btrfs_root *extent_root = root->fs_info->extent_root;
12406         struct btrfs_key key;
12407         struct btrfs_path path;
12408         struct extent_buffer *leaf;
12409         int slot;
12410         struct btrfs_extent_item *ei;
12411         unsigned long ptr;
12412         unsigned long end;
12413         int type;
12414         u32 item_size;
12415         u64 offset;
12416         struct btrfs_extent_inline_ref *iref;
12417         int ret;
12418
12419         btrfs_init_path(&path);
12420         key.objectid = btrfs_header_bytenr(eb);
12421         key.type = BTRFS_METADATA_ITEM_KEY;
12422         key.offset = (u64)-1;
12423
12424         /*
12425          * Any failure in backref resolving means we can't determine
12426          * whom the tree block belongs to.
12427          * So in that case, we need to check that tree block
12428          */
12429         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12430         if (ret < 0)
12431                 goto need_check;
12432
12433         ret = btrfs_previous_extent_item(extent_root, &path,
12434                                          btrfs_header_bytenr(eb));
12435         if (ret)
12436                 goto need_check;
12437
12438         leaf = path.nodes[0];
12439         slot = path.slots[0];
12440         btrfs_item_key_to_cpu(leaf, &key, slot);
12441         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12442
12443         if (key.type == BTRFS_METADATA_ITEM_KEY) {
12444                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12445         } else {
12446                 struct btrfs_tree_block_info *info;
12447
12448                 info = (struct btrfs_tree_block_info *)(ei + 1);
12449                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
12450         }
12451
12452         item_size = btrfs_item_size_nr(leaf, slot);
12453         ptr = (unsigned long)iref;
12454         end = (unsigned long)ei + item_size;
12455         while (ptr < end) {
12456                 iref = (struct btrfs_extent_inline_ref *)ptr;
12457                 type = btrfs_extent_inline_ref_type(leaf, iref);
12458                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
12459
12460                 /*
12461                  * We only check the tree block if current root is
12462                  * the lowest referencer of it.
12463                  */
12464                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
12465                     offset < root->objectid) {
12466                         btrfs_release_path(&path);
12467                         return 0;
12468                 }
12469
12470                 ptr += btrfs_extent_inline_ref_size(type);
12471         }
12472         /*
12473          * Normally we should also check keyed tree block ref, but that may be
12474          * very time consuming.  Inlined ref should already make us skip a lot
12475          * of refs now.  So skip search keyed tree block ref.
12476          */
12477
12478 need_check:
12479         btrfs_release_path(&path);
12480         return 1;
12481 }
12482
12483 /*
12484  * Traversal function for tree block. We will do:
12485  * 1) Skip shared fs/subvolume tree blocks
12486  * 2) Update related bytes accounting
12487  * 3) Pre-order traversal
12488  */
12489 static int traverse_tree_block(struct btrfs_root *root,
12490                                 struct extent_buffer *node)
12491 {
12492         struct extent_buffer *eb;
12493         struct btrfs_key key;
12494         struct btrfs_key drop_key;
12495         int level;
12496         u64 nr;
12497         int i;
12498         int err = 0;
12499         int ret;
12500
12501         /*
12502          * Skip shared fs/subvolume tree block, in that case they will
12503          * be checked by referencer with lowest rootid
12504          */
12505         if (is_fstree(root->objectid) && !should_check(root, node))
12506                 return 0;
12507
12508         /* Update bytes accounting */
12509         total_btree_bytes += node->len;
12510         if (fs_root_objectid(btrfs_header_owner(node)))
12511                 total_fs_tree_bytes += node->len;
12512         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
12513                 total_extent_tree_bytes += node->len;
12514
12515         /* pre-order tranversal, check itself first */
12516         level = btrfs_header_level(node);
12517         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
12518                                    btrfs_header_level(node),
12519                                    btrfs_header_owner(node));
12520         err |= ret;
12521         if (err)
12522                 error(
12523         "check %s failed root %llu bytenr %llu level %d, force continue check",
12524                         level ? "node":"leaf", root->objectid,
12525                         btrfs_header_bytenr(node), btrfs_header_level(node));
12526
12527         if (!level) {
12528                 btree_space_waste += btrfs_leaf_free_space(root, node);
12529                 ret = check_leaf_items(root, node);
12530                 err |= ret;
12531                 return err;
12532         }
12533
12534         nr = btrfs_header_nritems(node);
12535         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
12536         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
12537                 sizeof(struct btrfs_key_ptr);
12538
12539         /* Then check all its children */
12540         for (i = 0; i < nr; i++) {
12541                 u64 blocknr = btrfs_node_blockptr(node, i);
12542
12543                 btrfs_node_key_to_cpu(node, &key, i);
12544                 if (level == root->root_item.drop_level &&
12545                     is_dropped_key(&key, &drop_key))
12546                         continue;
12547
12548                 /*
12549                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
12550                  * to call the function itself.
12551                  */
12552                 eb = read_tree_block(root->fs_info, blocknr, 0);
12553                 if (extent_buffer_uptodate(eb)) {
12554                         ret = traverse_tree_block(root, eb);
12555                         err |= ret;
12556                 }
12557                 free_extent_buffer(eb);
12558         }
12559
12560         return err;
12561 }
12562
12563 /*
12564  * Low memory usage version check_chunks_and_extents.
12565  */
12566 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12567 {
12568         struct btrfs_path path;
12569         struct btrfs_key key;
12570         struct btrfs_root *root1;
12571         struct btrfs_root *root;
12572         struct btrfs_root *cur_root;
12573         int err = 0;
12574         int ret;
12575
12576         root = fs_info->fs_root;
12577
12578         root1 = root->fs_info->chunk_root;
12579         ret = traverse_tree_block(root1, root1->node);
12580         err |= ret;
12581
12582         root1 = root->fs_info->tree_root;
12583         ret = traverse_tree_block(root1, root1->node);
12584         err |= ret;
12585
12586         btrfs_init_path(&path);
12587         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12588         key.offset = 0;
12589         key.type = BTRFS_ROOT_ITEM_KEY;
12590
12591         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12592         if (ret) {
12593                 error("cannot find extent treet in tree_root");
12594                 goto out;
12595         }
12596
12597         while (1) {
12598                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12599                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12600                         goto next;
12601                 key.offset = (u64)-1;
12602
12603                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12604                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12605                                         &key);
12606                 else
12607                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
12608                 if (IS_ERR(cur_root) || !cur_root) {
12609                         error("failed to read tree: %lld", key.objectid);
12610                         goto next;
12611                 }
12612
12613                 ret = traverse_tree_block(cur_root, cur_root->node);
12614                 err |= ret;
12615
12616                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12617                         btrfs_free_fs_root(cur_root);
12618 next:
12619                 ret = btrfs_next_item(root1, &path);
12620                 if (ret)
12621                         goto out;
12622         }
12623
12624 out:
12625         btrfs_release_path(&path);
12626         return err;
12627 }
12628
12629 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12630 {
12631         int ret;
12632
12633         if (!ctx.progress_enabled)
12634                 fprintf(stderr, "checking extents\n");
12635         if (check_mode == CHECK_MODE_LOWMEM)
12636                 ret = check_chunks_and_extents_v2(fs_info);
12637         else
12638                 ret = check_chunks_and_extents(fs_info);
12639
12640         return ret;
12641 }
12642
12643 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12644                            struct btrfs_root *root, int overwrite)
12645 {
12646         struct extent_buffer *c;
12647         struct extent_buffer *old = root->node;
12648         int level;
12649         int ret;
12650         struct btrfs_disk_key disk_key = {0,0,0};
12651
12652         level = 0;
12653
12654         if (overwrite) {
12655                 c = old;
12656                 extent_buffer_get(c);
12657                 goto init;
12658         }
12659         c = btrfs_alloc_free_block(trans, root,
12660                                    root->fs_info->nodesize,
12661                                    root->root_key.objectid,
12662                                    &disk_key, level, 0, 0);
12663         if (IS_ERR(c)) {
12664                 c = old;
12665                 extent_buffer_get(c);
12666                 overwrite = 1;
12667         }
12668 init:
12669         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12670         btrfs_set_header_level(c, level);
12671         btrfs_set_header_bytenr(c, c->start);
12672         btrfs_set_header_generation(c, trans->transid);
12673         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12674         btrfs_set_header_owner(c, root->root_key.objectid);
12675
12676         write_extent_buffer(c, root->fs_info->fsid,
12677                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
12678
12679         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12680                             btrfs_header_chunk_tree_uuid(c),
12681                             BTRFS_UUID_SIZE);
12682
12683         btrfs_mark_buffer_dirty(c);
12684         /*
12685          * this case can happen in the following case:
12686          *
12687          * 1.overwrite previous root.
12688          *
12689          * 2.reinit reloc data root, this is because we skip pin
12690          * down reloc data tree before which means we can allocate
12691          * same block bytenr here.
12692          */
12693         if (old->start == c->start) {
12694                 btrfs_set_root_generation(&root->root_item,
12695                                           trans->transid);
12696                 root->root_item.level = btrfs_header_level(root->node);
12697                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12698                                         &root->root_key, &root->root_item);
12699                 if (ret) {
12700                         free_extent_buffer(c);
12701                         return ret;
12702                 }
12703         }
12704         free_extent_buffer(old);
12705         root->node = c;
12706         add_root_to_dirty_list(root);
12707         return 0;
12708 }
12709
12710 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12711                                 struct extent_buffer *eb, int tree_root)
12712 {
12713         struct extent_buffer *tmp;
12714         struct btrfs_root_item *ri;
12715         struct btrfs_key key;
12716         u64 bytenr;
12717         int level = btrfs_header_level(eb);
12718         int nritems;
12719         int ret;
12720         int i;
12721
12722         /*
12723          * If we have pinned this block before, don't pin it again.
12724          * This can not only avoid forever loop with broken filesystem
12725          * but also give us some speedups.
12726          */
12727         if (test_range_bit(&fs_info->pinned_extents, eb->start,
12728                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12729                 return 0;
12730
12731         btrfs_pin_extent(fs_info, eb->start, eb->len);
12732
12733         nritems = btrfs_header_nritems(eb);
12734         for (i = 0; i < nritems; i++) {
12735                 if (level == 0) {
12736                         btrfs_item_key_to_cpu(eb, &key, i);
12737                         if (key.type != BTRFS_ROOT_ITEM_KEY)
12738                                 continue;
12739                         /* Skip the extent root and reloc roots */
12740                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12741                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12742                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12743                                 continue;
12744                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12745                         bytenr = btrfs_disk_root_bytenr(eb, ri);
12746
12747                         /*
12748                          * If at any point we start needing the real root we
12749                          * will have to build a stump root for the root we are
12750                          * in, but for now this doesn't actually use the root so
12751                          * just pass in extent_root.
12752                          */
12753                         tmp = read_tree_block(fs_info, bytenr, 0);
12754                         if (!extent_buffer_uptodate(tmp)) {
12755                                 fprintf(stderr, "Error reading root block\n");
12756                                 return -EIO;
12757                         }
12758                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
12759                         free_extent_buffer(tmp);
12760                         if (ret)
12761                                 return ret;
12762                 } else {
12763                         bytenr = btrfs_node_blockptr(eb, i);
12764
12765                         /* If we aren't the tree root don't read the block */
12766                         if (level == 1 && !tree_root) {
12767                                 btrfs_pin_extent(fs_info, bytenr,
12768                                                 fs_info->nodesize);
12769                                 continue;
12770                         }
12771
12772                         tmp = read_tree_block(fs_info, bytenr, 0);
12773                         if (!extent_buffer_uptodate(tmp)) {
12774                                 fprintf(stderr, "Error reading tree block\n");
12775                                 return -EIO;
12776                         }
12777                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12778                         free_extent_buffer(tmp);
12779                         if (ret)
12780                                 return ret;
12781                 }
12782         }
12783
12784         return 0;
12785 }
12786
12787 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12788 {
12789         int ret;
12790
12791         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12792         if (ret)
12793                 return ret;
12794
12795         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12796 }
12797
12798 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12799 {
12800         struct btrfs_block_group_cache *cache;
12801         struct btrfs_path path;
12802         struct extent_buffer *leaf;
12803         struct btrfs_chunk *chunk;
12804         struct btrfs_key key;
12805         int ret;
12806         u64 start;
12807
12808         btrfs_init_path(&path);
12809         key.objectid = 0;
12810         key.type = BTRFS_CHUNK_ITEM_KEY;
12811         key.offset = 0;
12812         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12813         if (ret < 0) {
12814                 btrfs_release_path(&path);
12815                 return ret;
12816         }
12817
12818         /*
12819          * We do this in case the block groups were screwed up and had alloc
12820          * bits that aren't actually set on the chunks.  This happens with
12821          * restored images every time and could happen in real life I guess.
12822          */
12823         fs_info->avail_data_alloc_bits = 0;
12824         fs_info->avail_metadata_alloc_bits = 0;
12825         fs_info->avail_system_alloc_bits = 0;
12826
12827         /* First we need to create the in-memory block groups */
12828         while (1) {
12829                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12830                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12831                         if (ret < 0) {
12832                                 btrfs_release_path(&path);
12833                                 return ret;
12834                         }
12835                         if (ret) {
12836                                 ret = 0;
12837                                 break;
12838                         }
12839                 }
12840                 leaf = path.nodes[0];
12841                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12842                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12843                         path.slots[0]++;
12844                         continue;
12845                 }
12846
12847                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12848                 btrfs_add_block_group(fs_info, 0,
12849                                       btrfs_chunk_type(leaf, chunk),
12850                                       key.objectid, key.offset,
12851                                       btrfs_chunk_length(leaf, chunk));
12852                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12853                                  key.offset + btrfs_chunk_length(leaf, chunk));
12854                 path.slots[0]++;
12855         }
12856         start = 0;
12857         while (1) {
12858                 cache = btrfs_lookup_first_block_group(fs_info, start);
12859                 if (!cache)
12860                         break;
12861                 cache->cached = 1;
12862                 start = cache->key.objectid + cache->key.offset;
12863         }
12864
12865         btrfs_release_path(&path);
12866         return 0;
12867 }
12868
12869 static int reset_balance(struct btrfs_trans_handle *trans,
12870                          struct btrfs_fs_info *fs_info)
12871 {
12872         struct btrfs_root *root = fs_info->tree_root;
12873         struct btrfs_path path;
12874         struct extent_buffer *leaf;
12875         struct btrfs_key key;
12876         int del_slot, del_nr = 0;
12877         int ret;
12878         int found = 0;
12879
12880         btrfs_init_path(&path);
12881         key.objectid = BTRFS_BALANCE_OBJECTID;
12882         key.type = BTRFS_BALANCE_ITEM_KEY;
12883         key.offset = 0;
12884         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12885         if (ret) {
12886                 if (ret > 0)
12887                         ret = 0;
12888                 if (!ret)
12889                         goto reinit_data_reloc;
12890                 else
12891                         goto out;
12892         }
12893
12894         ret = btrfs_del_item(trans, root, &path);
12895         if (ret)
12896                 goto out;
12897         btrfs_release_path(&path);
12898
12899         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12900         key.type = BTRFS_ROOT_ITEM_KEY;
12901         key.offset = 0;
12902         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12903         if (ret < 0)
12904                 goto out;
12905         while (1) {
12906                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12907                         if (!found)
12908                                 break;
12909
12910                         if (del_nr) {
12911                                 ret = btrfs_del_items(trans, root, &path,
12912                                                       del_slot, del_nr);
12913                                 del_nr = 0;
12914                                 if (ret)
12915                                         goto out;
12916                         }
12917                         key.offset++;
12918                         btrfs_release_path(&path);
12919
12920                         found = 0;
12921                         ret = btrfs_search_slot(trans, root, &key, &path,
12922                                                 -1, 1);
12923                         if (ret < 0)
12924                                 goto out;
12925                         continue;
12926                 }
12927                 found = 1;
12928                 leaf = path.nodes[0];
12929                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12930                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12931                         break;
12932                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12933                         path.slots[0]++;
12934                         continue;
12935                 }
12936                 if (!del_nr) {
12937                         del_slot = path.slots[0];
12938                         del_nr = 1;
12939                 } else {
12940                         del_nr++;
12941                 }
12942                 path.slots[0]++;
12943         }
12944
12945         if (del_nr) {
12946                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12947                 if (ret)
12948                         goto out;
12949         }
12950         btrfs_release_path(&path);
12951
12952 reinit_data_reloc:
12953         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12954         key.type = BTRFS_ROOT_ITEM_KEY;
12955         key.offset = (u64)-1;
12956         root = btrfs_read_fs_root(fs_info, &key);
12957         if (IS_ERR(root)) {
12958                 fprintf(stderr, "Error reading data reloc tree\n");
12959                 ret = PTR_ERR(root);
12960                 goto out;
12961         }
12962         record_root_in_trans(trans, root);
12963         ret = btrfs_fsck_reinit_root(trans, root, 0);
12964         if (ret)
12965                 goto out;
12966         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12967 out:
12968         btrfs_release_path(&path);
12969         return ret;
12970 }
12971
12972 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12973                               struct btrfs_fs_info *fs_info)
12974 {
12975         u64 start = 0;
12976         int ret;
12977
12978         /*
12979          * The only reason we don't do this is because right now we're just
12980          * walking the trees we find and pinning down their bytes, we don't look
12981          * at any of the leaves.  In order to do mixed groups we'd have to check
12982          * the leaves of any fs roots and pin down the bytes for any file
12983          * extents we find.  Not hard but why do it if we don't have to?
12984          */
12985         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12986                 fprintf(stderr, "We don't support re-initing the extent tree "
12987                         "for mixed block groups yet, please notify a btrfs "
12988                         "developer you want to do this so they can add this "
12989                         "functionality.\n");
12990                 return -EINVAL;
12991         }
12992
12993         /*
12994          * first we need to walk all of the trees except the extent tree and pin
12995          * down the bytes that are in use so we don't overwrite any existing
12996          * metadata.
12997          */
12998         ret = pin_metadata_blocks(fs_info);
12999         if (ret) {
13000                 fprintf(stderr, "error pinning down used bytes\n");
13001                 return ret;
13002         }
13003
13004         /*
13005          * Need to drop all the block groups since we're going to recreate all
13006          * of them again.
13007          */
13008         btrfs_free_block_groups(fs_info);
13009         ret = reset_block_groups(fs_info);
13010         if (ret) {
13011                 fprintf(stderr, "error resetting the block groups\n");
13012                 return ret;
13013         }
13014
13015         /* Ok we can allocate now, reinit the extent root */
13016         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13017         if (ret) {
13018                 fprintf(stderr, "extent root initialization failed\n");
13019                 /*
13020                  * When the transaction code is updated we should end the
13021                  * transaction, but for now progs only knows about commit so
13022                  * just return an error.
13023                  */
13024                 return ret;
13025         }
13026
13027         /*
13028          * Now we have all the in-memory block groups setup so we can make
13029          * allocations properly, and the metadata we care about is safe since we
13030          * pinned all of it above.
13031          */
13032         while (1) {
13033                 struct btrfs_block_group_cache *cache;
13034
13035                 cache = btrfs_lookup_first_block_group(fs_info, start);
13036                 if (!cache)
13037                         break;
13038                 start = cache->key.objectid + cache->key.offset;
13039                 ret = btrfs_insert_item(trans, fs_info->extent_root,
13040                                         &cache->key, &cache->item,
13041                                         sizeof(cache->item));
13042                 if (ret) {
13043                         fprintf(stderr, "Error adding block group\n");
13044                         return ret;
13045                 }
13046                 btrfs_extent_post_op(trans, fs_info->extent_root);
13047         }
13048
13049         ret = reset_balance(trans, fs_info);
13050         if (ret)
13051                 fprintf(stderr, "error resetting the pending balance\n");
13052
13053         return ret;
13054 }
13055
13056 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13057 {
13058         struct btrfs_path path;
13059         struct btrfs_trans_handle *trans;
13060         struct btrfs_key key;
13061         int ret;
13062
13063         printf("Recowing metadata block %llu\n", eb->start);
13064         key.objectid = btrfs_header_owner(eb);
13065         key.type = BTRFS_ROOT_ITEM_KEY;
13066         key.offset = (u64)-1;
13067
13068         root = btrfs_read_fs_root(root->fs_info, &key);
13069         if (IS_ERR(root)) {
13070                 fprintf(stderr, "Couldn't find owner root %llu\n",
13071                         key.objectid);
13072                 return PTR_ERR(root);
13073         }
13074
13075         trans = btrfs_start_transaction(root, 1);
13076         if (IS_ERR(trans))
13077                 return PTR_ERR(trans);
13078
13079         btrfs_init_path(&path);
13080         path.lowest_level = btrfs_header_level(eb);
13081         if (path.lowest_level)
13082                 btrfs_node_key_to_cpu(eb, &key, 0);
13083         else
13084                 btrfs_item_key_to_cpu(eb, &key, 0);
13085
13086         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13087         btrfs_commit_transaction(trans, root);
13088         btrfs_release_path(&path);
13089         return ret;
13090 }
13091
13092 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13093 {
13094         struct btrfs_path path;
13095         struct btrfs_trans_handle *trans;
13096         struct btrfs_key key;
13097         int ret;
13098
13099         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13100                bad->key.type, bad->key.offset);
13101         key.objectid = bad->root_id;
13102         key.type = BTRFS_ROOT_ITEM_KEY;
13103         key.offset = (u64)-1;
13104
13105         root = btrfs_read_fs_root(root->fs_info, &key);
13106         if (IS_ERR(root)) {
13107                 fprintf(stderr, "Couldn't find owner root %llu\n",
13108                         key.objectid);
13109                 return PTR_ERR(root);
13110         }
13111
13112         trans = btrfs_start_transaction(root, 1);
13113         if (IS_ERR(trans))
13114                 return PTR_ERR(trans);
13115
13116         btrfs_init_path(&path);
13117         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13118         if (ret) {
13119                 if (ret > 0)
13120                         ret = 0;
13121                 goto out;
13122         }
13123         ret = btrfs_del_item(trans, root, &path);
13124 out:
13125         btrfs_commit_transaction(trans, root);
13126         btrfs_release_path(&path);
13127         return ret;
13128 }
13129
13130 static int zero_log_tree(struct btrfs_root *root)
13131 {
13132         struct btrfs_trans_handle *trans;
13133         int ret;
13134
13135         trans = btrfs_start_transaction(root, 1);
13136         if (IS_ERR(trans)) {
13137                 ret = PTR_ERR(trans);
13138                 return ret;
13139         }
13140         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13141         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13142         ret = btrfs_commit_transaction(trans, root);
13143         return ret;
13144 }
13145
13146 static int populate_csum(struct btrfs_trans_handle *trans,
13147                          struct btrfs_root *csum_root, char *buf, u64 start,
13148                          u64 len)
13149 {
13150         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13151         u64 offset = 0;
13152         u64 sectorsize;
13153         int ret = 0;
13154
13155         while (offset < len) {
13156                 sectorsize = fs_info->sectorsize;
13157                 ret = read_extent_data(fs_info, buf, start + offset,
13158                                        &sectorsize, 0);
13159                 if (ret)
13160                         break;
13161                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13162                                             start + offset, buf, sectorsize);
13163                 if (ret)
13164                         break;
13165                 offset += sectorsize;
13166         }
13167         return ret;
13168 }
13169
13170 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13171                                       struct btrfs_root *csum_root,
13172                                       struct btrfs_root *cur_root)
13173 {
13174         struct btrfs_path path;
13175         struct btrfs_key key;
13176         struct extent_buffer *node;
13177         struct btrfs_file_extent_item *fi;
13178         char *buf = NULL;
13179         u64 start = 0;
13180         u64 len = 0;
13181         int slot = 0;
13182         int ret = 0;
13183
13184         buf = malloc(cur_root->fs_info->sectorsize);
13185         if (!buf)
13186                 return -ENOMEM;
13187
13188         btrfs_init_path(&path);
13189         key.objectid = 0;
13190         key.offset = 0;
13191         key.type = 0;
13192         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13193         if (ret < 0)
13194                 goto out;
13195         /* Iterate all regular file extents and fill its csum */
13196         while (1) {
13197                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13198
13199                 if (key.type != BTRFS_EXTENT_DATA_KEY)
13200                         goto next;
13201                 node = path.nodes[0];
13202                 slot = path.slots[0];
13203                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13204                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13205                         goto next;
13206                 start = btrfs_file_extent_disk_bytenr(node, fi);
13207                 len = btrfs_file_extent_disk_num_bytes(node, fi);
13208
13209                 ret = populate_csum(trans, csum_root, buf, start, len);
13210                 if (ret == -EEXIST)
13211                         ret = 0;
13212                 if (ret < 0)
13213                         goto out;
13214 next:
13215                 /*
13216                  * TODO: if next leaf is corrupted, jump to nearest next valid
13217                  * leaf.
13218                  */
13219                 ret = btrfs_next_item(cur_root, &path);
13220                 if (ret < 0)
13221                         goto out;
13222                 if (ret > 0) {
13223                         ret = 0;
13224                         goto out;
13225                 }
13226         }
13227
13228 out:
13229         btrfs_release_path(&path);
13230         free(buf);
13231         return ret;
13232 }
13233
13234 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13235                                   struct btrfs_root *csum_root)
13236 {
13237         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13238         struct btrfs_path path;
13239         struct btrfs_root *tree_root = fs_info->tree_root;
13240         struct btrfs_root *cur_root;
13241         struct extent_buffer *node;
13242         struct btrfs_key key;
13243         int slot = 0;
13244         int ret = 0;
13245
13246         btrfs_init_path(&path);
13247         key.objectid = BTRFS_FS_TREE_OBJECTID;
13248         key.offset = 0;
13249         key.type = BTRFS_ROOT_ITEM_KEY;
13250         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13251         if (ret < 0)
13252                 goto out;
13253         if (ret > 0) {
13254                 ret = -ENOENT;
13255                 goto out;
13256         }
13257
13258         while (1) {
13259                 node = path.nodes[0];
13260                 slot = path.slots[0];
13261                 btrfs_item_key_to_cpu(node, &key, slot);
13262                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13263                         goto out;
13264                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13265                         goto next;
13266                 if (!is_fstree(key.objectid))
13267                         goto next;
13268                 key.offset = (u64)-1;
13269
13270                 cur_root = btrfs_read_fs_root(fs_info, &key);
13271                 if (IS_ERR(cur_root) || !cur_root) {
13272                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13273                                 key.objectid);
13274                         goto out;
13275                 }
13276                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13277                                 cur_root);
13278                 if (ret < 0)
13279                         goto out;
13280 next:
13281                 ret = btrfs_next_item(tree_root, &path);
13282                 if (ret > 0) {
13283                         ret = 0;
13284                         goto out;
13285                 }
13286                 if (ret < 0)
13287                         goto out;
13288         }
13289
13290 out:
13291         btrfs_release_path(&path);
13292         return ret;
13293 }
13294
13295 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13296                                       struct btrfs_root *csum_root)
13297 {
13298         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13299         struct btrfs_path path;
13300         struct btrfs_extent_item *ei;
13301         struct extent_buffer *leaf;
13302         char *buf;
13303         struct btrfs_key key;
13304         int ret;
13305
13306         btrfs_init_path(&path);
13307         key.objectid = 0;
13308         key.type = BTRFS_EXTENT_ITEM_KEY;
13309         key.offset = 0;
13310         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13311         if (ret < 0) {
13312                 btrfs_release_path(&path);
13313                 return ret;
13314         }
13315
13316         buf = malloc(csum_root->fs_info->sectorsize);
13317         if (!buf) {
13318                 btrfs_release_path(&path);
13319                 return -ENOMEM;
13320         }
13321
13322         while (1) {
13323                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13324                         ret = btrfs_next_leaf(extent_root, &path);
13325                         if (ret < 0)
13326                                 break;
13327                         if (ret) {
13328                                 ret = 0;
13329                                 break;
13330                         }
13331                 }
13332                 leaf = path.nodes[0];
13333
13334                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13335                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13336                         path.slots[0]++;
13337                         continue;
13338                 }
13339
13340                 ei = btrfs_item_ptr(leaf, path.slots[0],
13341                                     struct btrfs_extent_item);
13342                 if (!(btrfs_extent_flags(leaf, ei) &
13343                       BTRFS_EXTENT_FLAG_DATA)) {
13344                         path.slots[0]++;
13345                         continue;
13346                 }
13347
13348                 ret = populate_csum(trans, csum_root, buf, key.objectid,
13349                                     key.offset);
13350                 if (ret)
13351                         break;
13352                 path.slots[0]++;
13353         }
13354
13355         btrfs_release_path(&path);
13356         free(buf);
13357         return ret;
13358 }
13359
13360 /*
13361  * Recalculate the csum and put it into the csum tree.
13362  *
13363  * Extent tree init will wipe out all the extent info, so in that case, we
13364  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
13365  * will use fs/subvol trees to init the csum tree.
13366  */
13367 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13368                           struct btrfs_root *csum_root,
13369                           int search_fs_tree)
13370 {
13371         if (search_fs_tree)
13372                 return fill_csum_tree_from_fs(trans, csum_root);
13373         else
13374                 return fill_csum_tree_from_extent(trans, csum_root);
13375 }
13376
13377 static void free_roots_info_cache(void)
13378 {
13379         if (!roots_info_cache)
13380                 return;
13381
13382         while (!cache_tree_empty(roots_info_cache)) {
13383                 struct cache_extent *entry;
13384                 struct root_item_info *rii;
13385
13386                 entry = first_cache_extent(roots_info_cache);
13387                 if (!entry)
13388                         break;
13389                 remove_cache_extent(roots_info_cache, entry);
13390                 rii = container_of(entry, struct root_item_info, cache_extent);
13391                 free(rii);
13392         }
13393
13394         free(roots_info_cache);
13395         roots_info_cache = NULL;
13396 }
13397
13398 static int build_roots_info_cache(struct btrfs_fs_info *info)
13399 {
13400         int ret = 0;
13401         struct btrfs_key key;
13402         struct extent_buffer *leaf;
13403         struct btrfs_path path;
13404
13405         if (!roots_info_cache) {
13406                 roots_info_cache = malloc(sizeof(*roots_info_cache));
13407                 if (!roots_info_cache)
13408                         return -ENOMEM;
13409                 cache_tree_init(roots_info_cache);
13410         }
13411
13412         btrfs_init_path(&path);
13413         key.objectid = 0;
13414         key.type = BTRFS_EXTENT_ITEM_KEY;
13415         key.offset = 0;
13416         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13417         if (ret < 0)
13418                 goto out;
13419         leaf = path.nodes[0];
13420
13421         while (1) {
13422                 struct btrfs_key found_key;
13423                 struct btrfs_extent_item *ei;
13424                 struct btrfs_extent_inline_ref *iref;
13425                 int slot = path.slots[0];
13426                 int type;
13427                 u64 flags;
13428                 u64 root_id;
13429                 u8 level;
13430                 struct cache_extent *entry;
13431                 struct root_item_info *rii;
13432
13433                 if (slot >= btrfs_header_nritems(leaf)) {
13434                         ret = btrfs_next_leaf(info->extent_root, &path);
13435                         if (ret < 0) {
13436                                 break;
13437                         } else if (ret) {
13438                                 ret = 0;
13439                                 break;
13440                         }
13441                         leaf = path.nodes[0];
13442                         slot = path.slots[0];
13443                 }
13444
13445                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13446
13447                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13448                     found_key.type != BTRFS_METADATA_ITEM_KEY)
13449                         goto next;
13450
13451                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13452                 flags = btrfs_extent_flags(leaf, ei);
13453
13454                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13455                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13456                         goto next;
13457
13458                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13459                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13460                         level = found_key.offset;
13461                 } else {
13462                         struct btrfs_tree_block_info *binfo;
13463
13464                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
13465                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13466                         level = btrfs_tree_block_level(leaf, binfo);
13467                 }
13468
13469                 /*
13470                  * For a root extent, it must be of the following type and the
13471                  * first (and only one) iref in the item.
13472                  */
13473                 type = btrfs_extent_inline_ref_type(leaf, iref);
13474                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13475                         goto next;
13476
13477                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13478                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13479                 if (!entry) {
13480                         rii = malloc(sizeof(struct root_item_info));
13481                         if (!rii) {
13482                                 ret = -ENOMEM;
13483                                 goto out;
13484                         }
13485                         rii->cache_extent.start = root_id;
13486                         rii->cache_extent.size = 1;
13487                         rii->level = (u8)-1;
13488                         entry = &rii->cache_extent;
13489                         ret = insert_cache_extent(roots_info_cache, entry);
13490                         ASSERT(ret == 0);
13491                 } else {
13492                         rii = container_of(entry, struct root_item_info,
13493                                            cache_extent);
13494                 }
13495
13496                 ASSERT(rii->cache_extent.start == root_id);
13497                 ASSERT(rii->cache_extent.size == 1);
13498
13499                 if (level > rii->level || rii->level == (u8)-1) {
13500                         rii->level = level;
13501                         rii->bytenr = found_key.objectid;
13502                         rii->gen = btrfs_extent_generation(leaf, ei);
13503                         rii->node_count = 1;
13504                 } else if (level == rii->level) {
13505                         rii->node_count++;
13506                 }
13507 next:
13508                 path.slots[0]++;
13509         }
13510
13511 out:
13512         btrfs_release_path(&path);
13513
13514         return ret;
13515 }
13516
13517 static int maybe_repair_root_item(struct btrfs_path *path,
13518                                   const struct btrfs_key *root_key,
13519                                   const int read_only_mode)
13520 {
13521         const u64 root_id = root_key->objectid;
13522         struct cache_extent *entry;
13523         struct root_item_info *rii;
13524         struct btrfs_root_item ri;
13525         unsigned long offset;
13526
13527         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13528         if (!entry) {
13529                 fprintf(stderr,
13530                         "Error: could not find extent items for root %llu\n",
13531                         root_key->objectid);
13532                 return -ENOENT;
13533         }
13534
13535         rii = container_of(entry, struct root_item_info, cache_extent);
13536         ASSERT(rii->cache_extent.start == root_id);
13537         ASSERT(rii->cache_extent.size == 1);
13538
13539         if (rii->node_count != 1) {
13540                 fprintf(stderr,
13541                         "Error: could not find btree root extent for root %llu\n",
13542                         root_id);
13543                 return -ENOENT;
13544         }
13545
13546         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13547         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13548
13549         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13550             btrfs_root_level(&ri) != rii->level ||
13551             btrfs_root_generation(&ri) != rii->gen) {
13552
13553                 /*
13554                  * If we're in repair mode but our caller told us to not update
13555                  * the root item, i.e. just check if it needs to be updated, don't
13556                  * print this message, since the caller will call us again shortly
13557                  * for the same root item without read only mode (the caller will
13558                  * open a transaction first).
13559                  */
13560                 if (!(read_only_mode && repair))
13561                         fprintf(stderr,
13562                                 "%sroot item for root %llu,"
13563                                 " current bytenr %llu, current gen %llu, current level %u,"
13564                                 " new bytenr %llu, new gen %llu, new level %u\n",
13565                                 (read_only_mode ? "" : "fixing "),
13566                                 root_id,
13567                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13568                                 btrfs_root_level(&ri),
13569                                 rii->bytenr, rii->gen, rii->level);
13570
13571                 if (btrfs_root_generation(&ri) > rii->gen) {
13572                         fprintf(stderr,
13573                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13574                                 root_id, btrfs_root_generation(&ri), rii->gen);
13575                         return -EINVAL;
13576                 }
13577
13578                 if (!read_only_mode) {
13579                         btrfs_set_root_bytenr(&ri, rii->bytenr);
13580                         btrfs_set_root_level(&ri, rii->level);
13581                         btrfs_set_root_generation(&ri, rii->gen);
13582                         write_extent_buffer(path->nodes[0], &ri,
13583                                             offset, sizeof(ri));
13584                 }
13585
13586                 return 1;
13587         }
13588
13589         return 0;
13590 }
13591
13592 /*
13593  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13594  * caused read-only snapshots to be corrupted if they were created at a moment
13595  * when the source subvolume/snapshot had orphan items. The issue was that the
13596  * on-disk root items became incorrect, referring to the pre orphan cleanup root
13597  * node instead of the post orphan cleanup root node.
13598  * So this function, and its callees, just detects and fixes those cases. Even
13599  * though the regression was for read-only snapshots, this function applies to
13600  * any snapshot/subvolume root.
13601  * This must be run before any other repair code - not doing it so, makes other
13602  * repair code delete or modify backrefs in the extent tree for example, which
13603  * will result in an inconsistent fs after repairing the root items.
13604  */
13605 static int repair_root_items(struct btrfs_fs_info *info)
13606 {
13607         struct btrfs_path path;
13608         struct btrfs_key key;
13609         struct extent_buffer *leaf;
13610         struct btrfs_trans_handle *trans = NULL;
13611         int ret = 0;
13612         int bad_roots = 0;
13613         int need_trans = 0;
13614
13615         btrfs_init_path(&path);
13616
13617         ret = build_roots_info_cache(info);
13618         if (ret)
13619                 goto out;
13620
13621         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13622         key.type = BTRFS_ROOT_ITEM_KEY;
13623         key.offset = 0;
13624
13625 again:
13626         /*
13627          * Avoid opening and committing transactions if a leaf doesn't have
13628          * any root items that need to be fixed, so that we avoid rotating
13629          * backup roots unnecessarily.
13630          */
13631         if (need_trans) {
13632                 trans = btrfs_start_transaction(info->tree_root, 1);
13633                 if (IS_ERR(trans)) {
13634                         ret = PTR_ERR(trans);
13635                         goto out;
13636                 }
13637         }
13638
13639         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13640                                 0, trans ? 1 : 0);
13641         if (ret < 0)
13642                 goto out;
13643         leaf = path.nodes[0];
13644
13645         while (1) {
13646                 struct btrfs_key found_key;
13647
13648                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13649                         int no_more_keys = find_next_key(&path, &key);
13650
13651                         btrfs_release_path(&path);
13652                         if (trans) {
13653                                 ret = btrfs_commit_transaction(trans,
13654                                                                info->tree_root);
13655                                 trans = NULL;
13656                                 if (ret < 0)
13657                                         goto out;
13658                         }
13659                         need_trans = 0;
13660                         if (no_more_keys)
13661                                 break;
13662                         goto again;
13663                 }
13664
13665                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13666
13667                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13668                         goto next;
13669                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13670                         goto next;
13671
13672                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13673                 if (ret < 0)
13674                         goto out;
13675                 if (ret) {
13676                         if (!trans && repair) {
13677                                 need_trans = 1;
13678                                 key = found_key;
13679                                 btrfs_release_path(&path);
13680                                 goto again;
13681                         }
13682                         bad_roots++;
13683                 }
13684 next:
13685                 path.slots[0]++;
13686         }
13687         ret = 0;
13688 out:
13689         free_roots_info_cache();
13690         btrfs_release_path(&path);
13691         if (trans)
13692                 btrfs_commit_transaction(trans, info->tree_root);
13693         if (ret < 0)
13694                 return ret;
13695
13696         return bad_roots;
13697 }
13698
13699 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13700 {
13701         struct btrfs_trans_handle *trans;
13702         struct btrfs_block_group_cache *bg_cache;
13703         u64 current = 0;
13704         int ret = 0;
13705
13706         /* Clear all free space cache inodes and its extent data */
13707         while (1) {
13708                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13709                 if (!bg_cache)
13710                         break;
13711                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13712                 if (ret < 0)
13713                         return ret;
13714                 current = bg_cache->key.objectid + bg_cache->key.offset;
13715         }
13716
13717         /* Don't forget to set cache_generation to -1 */
13718         trans = btrfs_start_transaction(fs_info->tree_root, 0);
13719         if (IS_ERR(trans)) {
13720                 error("failed to update super block cache generation");
13721                 return PTR_ERR(trans);
13722         }
13723         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13724         btrfs_commit_transaction(trans, fs_info->tree_root);
13725
13726         return ret;
13727 }
13728
13729 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13730                 int clear_version)
13731 {
13732         int ret = 0;
13733
13734         if (clear_version == 1) {
13735                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13736                         error(
13737                 "free space cache v2 detected, use --clear-space-cache v2");
13738                         ret = 1;
13739                         goto close_out;
13740                 }
13741                 printf("Clearing free space cache\n");
13742                 ret = clear_free_space_cache(fs_info);
13743                 if (ret) {
13744                         error("failed to clear free space cache");
13745                         ret = 1;
13746                 } else {
13747                         printf("Free space cache cleared\n");
13748                 }
13749         } else if (clear_version == 2) {
13750                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13751                         printf("no free space cache v2 to clear\n");
13752                         ret = 0;
13753                         goto close_out;
13754                 }
13755                 printf("Clear free space cache v2\n");
13756                 ret = btrfs_clear_free_space_tree(fs_info);
13757                 if (ret) {
13758                         error("failed to clear free space cache v2: %d", ret);
13759                         ret = 1;
13760                 } else {
13761                         printf("free space cache v2 cleared\n");
13762                 }
13763         }
13764 close_out:
13765         return ret;
13766 }
13767
13768 const char * const cmd_check_usage[] = {
13769         "btrfs check [options] <device>",
13770         "Check structural integrity of a filesystem (unmounted).",
13771         "Check structural integrity of an unmounted filesystem. Verify internal",
13772         "trees' consistency and item connectivity. In the repair mode try to",
13773         "fix the problems found. ",
13774         "WARNING: the repair mode is considered dangerous",
13775         "",
13776         "-s|--super <superblock>     use this superblock copy",
13777         "-b|--backup                 use the first valid backup root copy",
13778         "--force                     skip mount checks, repair is not possible",
13779         "--repair                    try to repair the filesystem",
13780         "--readonly                  run in read-only mode (default)",
13781         "--init-csum-tree            create a new CRC tree",
13782         "--init-extent-tree          create a new extent tree",
13783         "--mode <MODE>               allows choice of memory/IO trade-offs",
13784         "                            where MODE is one of:",
13785         "                            original - read inodes and extents to memory (requires",
13786         "                                       more memory, does less IO)",
13787         "                            lowmem   - try to use less memory but read blocks again",
13788         "                                       when needed",
13789         "--check-data-csum           verify checksums of data blocks",
13790         "-Q|--qgroup-report          print a report on qgroup consistency",
13791         "-E|--subvol-extents <subvolid>",
13792         "                            print subvolume extents and sharing state",
13793         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
13794         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
13795         "-p|--progress               indicate progress",
13796         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
13797         NULL
13798 };
13799
13800 int cmd_check(int argc, char **argv)
13801 {
13802         struct cache_tree root_cache;
13803         struct btrfs_root *root;
13804         struct btrfs_fs_info *info;
13805         u64 bytenr = 0;
13806         u64 subvolid = 0;
13807         u64 tree_root_bytenr = 0;
13808         u64 chunk_root_bytenr = 0;
13809         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13810         int ret = 0;
13811         int err = 0;
13812         u64 num;
13813         int init_csum_tree = 0;
13814         int readonly = 0;
13815         int clear_space_cache = 0;
13816         int qgroup_report = 0;
13817         int qgroups_repaired = 0;
13818         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13819         int force = 0;
13820
13821         while(1) {
13822                 int c;
13823                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13824                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13825                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13826                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13827                         GETOPT_VAL_FORCE };
13828                 static const struct option long_options[] = {
13829                         { "super", required_argument, NULL, 's' },
13830                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13831                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13832                         { "init-csum-tree", no_argument, NULL,
13833                                 GETOPT_VAL_INIT_CSUM },
13834                         { "init-extent-tree", no_argument, NULL,
13835                                 GETOPT_VAL_INIT_EXTENT },
13836                         { "check-data-csum", no_argument, NULL,
13837                                 GETOPT_VAL_CHECK_CSUM },
13838                         { "backup", no_argument, NULL, 'b' },
13839                         { "subvol-extents", required_argument, NULL, 'E' },
13840                         { "qgroup-report", no_argument, NULL, 'Q' },
13841                         { "tree-root", required_argument, NULL, 'r' },
13842                         { "chunk-root", required_argument, NULL,
13843                                 GETOPT_VAL_CHUNK_TREE },
13844                         { "progress", no_argument, NULL, 'p' },
13845                         { "mode", required_argument, NULL,
13846                                 GETOPT_VAL_MODE },
13847                         { "clear-space-cache", required_argument, NULL,
13848                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
13849                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13850                         { NULL, 0, NULL, 0}
13851                 };
13852
13853                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13854                 if (c < 0)
13855                         break;
13856                 switch(c) {
13857                         case 'a': /* ignored */ break;
13858                         case 'b':
13859                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13860                                 break;
13861                         case 's':
13862                                 num = arg_strtou64(optarg);
13863                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13864                                         error(
13865                                         "super mirror should be less than %d",
13866                                                 BTRFS_SUPER_MIRROR_MAX);
13867                                         exit(1);
13868                                 }
13869                                 bytenr = btrfs_sb_offset(((int)num));
13870                                 printf("using SB copy %llu, bytenr %llu\n", num,
13871                                        (unsigned long long)bytenr);
13872                                 break;
13873                         case 'Q':
13874                                 qgroup_report = 1;
13875                                 break;
13876                         case 'E':
13877                                 subvolid = arg_strtou64(optarg);
13878                                 break;
13879                         case 'r':
13880                                 tree_root_bytenr = arg_strtou64(optarg);
13881                                 break;
13882                         case GETOPT_VAL_CHUNK_TREE:
13883                                 chunk_root_bytenr = arg_strtou64(optarg);
13884                                 break;
13885                         case 'p':
13886                                 ctx.progress_enabled = true;
13887                                 break;
13888                         case '?':
13889                         case 'h':
13890                                 usage(cmd_check_usage);
13891                         case GETOPT_VAL_REPAIR:
13892                                 printf("enabling repair mode\n");
13893                                 repair = 1;
13894                                 ctree_flags |= OPEN_CTREE_WRITES;
13895                                 break;
13896                         case GETOPT_VAL_READONLY:
13897                                 readonly = 1;
13898                                 break;
13899                         case GETOPT_VAL_INIT_CSUM:
13900                                 printf("Creating a new CRC tree\n");
13901                                 init_csum_tree = 1;
13902                                 repair = 1;
13903                                 ctree_flags |= OPEN_CTREE_WRITES;
13904                                 break;
13905                         case GETOPT_VAL_INIT_EXTENT:
13906                                 init_extent_tree = 1;
13907                                 ctree_flags |= (OPEN_CTREE_WRITES |
13908                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
13909                                 repair = 1;
13910                                 break;
13911                         case GETOPT_VAL_CHECK_CSUM:
13912                                 check_data_csum = 1;
13913                                 break;
13914                         case GETOPT_VAL_MODE:
13915                                 check_mode = parse_check_mode(optarg);
13916                                 if (check_mode == CHECK_MODE_UNKNOWN) {
13917                                         error("unknown mode: %s", optarg);
13918                                         exit(1);
13919                                 }
13920                                 break;
13921                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
13922                                 if (strcmp(optarg, "v1") == 0) {
13923                                         clear_space_cache = 1;
13924                                 } else if (strcmp(optarg, "v2") == 0) {
13925                                         clear_space_cache = 2;
13926                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13927                                 } else {
13928                                         error(
13929                 "invalid argument to --clear-space-cache, must be v1 or v2");
13930                                         exit(1);
13931                                 }
13932                                 ctree_flags |= OPEN_CTREE_WRITES;
13933                                 break;
13934                         case GETOPT_VAL_FORCE:
13935                                 force = 1;
13936                                 break;
13937                 }
13938         }
13939
13940         if (check_argc_exact(argc - optind, 1))
13941                 usage(cmd_check_usage);
13942
13943         if (ctx.progress_enabled) {
13944                 ctx.tp = TASK_NOTHING;
13945                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13946         }
13947
13948         /* This check is the only reason for --readonly to exist */
13949         if (readonly && repair) {
13950                 error("repair options are not compatible with --readonly");
13951                 exit(1);
13952         }
13953
13954         /*
13955          * experimental and dangerous
13956          */
13957         if (repair && check_mode == CHECK_MODE_LOWMEM)
13958                 warning("low-memory mode repair support is only partial");
13959
13960         radix_tree_init();
13961         cache_tree_init(&root_cache);
13962
13963         ret = check_mounted(argv[optind]);
13964         if (!force) {
13965                 if (ret < 0) {
13966                         error("could not check mount status: %s",
13967                                         strerror(-ret));
13968                         err |= !!ret;
13969                         goto err_out;
13970                 } else if (ret) {
13971                         error(
13972 "%s is currently mounted, use --force if you really intend to check the filesystem",
13973                                 argv[optind]);
13974                         ret = -EBUSY;
13975                         err |= !!ret;
13976                         goto err_out;
13977                 }
13978         } else {
13979                 if (repair) {
13980                         error("repair and --force is not yet supported");
13981                         ret = 1;
13982                         err |= !!ret;
13983                         goto err_out;
13984                 }
13985                 if (ret < 0) {
13986                         warning(
13987 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13988                                 argv[optind]);
13989                 } else if (ret) {
13990                         warning(
13991                         "filesystem mounted, continuing because of --force");
13992                 }
13993                 /* A block device is mounted in exclusive mode by kernel */
13994                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13995         }
13996
13997         /* only allow partial opening under repair mode */
13998         if (repair)
13999                 ctree_flags |= OPEN_CTREE_PARTIAL;
14000
14001         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14002                                   chunk_root_bytenr, ctree_flags);
14003         if (!info) {
14004                 error("cannot open file system");
14005                 ret = -EIO;
14006                 err |= !!ret;
14007                 goto err_out;
14008         }
14009
14010         global_info = info;
14011         root = info->fs_root;
14012         uuid_unparse(info->super_copy->fsid, uuidbuf);
14013
14014         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14015
14016         /*
14017          * Check the bare minimum before starting anything else that could rely
14018          * on it, namely the tree roots, any local consistency checks
14019          */
14020         if (!extent_buffer_uptodate(info->tree_root->node) ||
14021             !extent_buffer_uptodate(info->dev_root->node) ||
14022             !extent_buffer_uptodate(info->chunk_root->node)) {
14023                 error("critical roots corrupted, unable to check the filesystem");
14024                 err |= !!ret;
14025                 ret = -EIO;
14026                 goto close_out;
14027         }
14028
14029         if (clear_space_cache) {
14030                 ret = do_clear_free_space_cache(info, clear_space_cache);
14031                 err |= !!ret;
14032                 goto close_out;
14033         }
14034
14035         /*
14036          * repair mode will force us to commit transaction which
14037          * will make us fail to load log tree when mounting.
14038          */
14039         if (repair && btrfs_super_log_root(info->super_copy)) {
14040                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14041                 if (!ret) {
14042                         ret = 1;
14043                         err |= !!ret;
14044                         goto close_out;
14045                 }
14046                 ret = zero_log_tree(root);
14047                 err |= !!ret;
14048                 if (ret) {
14049                         error("failed to zero log tree: %d", ret);
14050                         goto close_out;
14051                 }
14052         }
14053
14054         if (qgroup_report) {
14055                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14056                        uuidbuf);
14057                 ret = qgroup_verify_all(info);
14058                 err |= !!ret;
14059                 if (ret == 0)
14060                         report_qgroups(1);
14061                 goto close_out;
14062         }
14063         if (subvolid) {
14064                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14065                        subvolid, argv[optind], uuidbuf);
14066                 ret = print_extent_state(info, subvolid);
14067                 err |= !!ret;
14068                 goto close_out;
14069         }
14070
14071         if (init_extent_tree || init_csum_tree) {
14072                 struct btrfs_trans_handle *trans;
14073
14074                 trans = btrfs_start_transaction(info->extent_root, 0);
14075                 if (IS_ERR(trans)) {
14076                         error("error starting transaction");
14077                         ret = PTR_ERR(trans);
14078                         err |= !!ret;
14079                         goto close_out;
14080                 }
14081
14082                 if (init_extent_tree) {
14083                         printf("Creating a new extent tree\n");
14084                         ret = reinit_extent_tree(trans, info);
14085                         err |= !!ret;
14086                         if (ret)
14087                                 goto close_out;
14088                 }
14089
14090                 if (init_csum_tree) {
14091                         printf("Reinitialize checksum tree\n");
14092                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14093                         if (ret) {
14094                                 error("checksum tree initialization failed: %d",
14095                                                 ret);
14096                                 ret = -EIO;
14097                                 err |= !!ret;
14098                                 goto close_out;
14099                         }
14100
14101                         ret = fill_csum_tree(trans, info->csum_root,
14102                                              init_extent_tree);
14103                         err |= !!ret;
14104                         if (ret) {
14105                                 error("checksum tree refilling failed: %d", ret);
14106                                 return -EIO;
14107                         }
14108                 }
14109                 /*
14110                  * Ok now we commit and run the normal fsck, which will add
14111                  * extent entries for all of the items it finds.
14112                  */
14113                 ret = btrfs_commit_transaction(trans, info->extent_root);
14114                 err |= !!ret;
14115                 if (ret)
14116                         goto close_out;
14117         }
14118         if (!extent_buffer_uptodate(info->extent_root->node)) {
14119                 error("critical: extent_root, unable to check the filesystem");
14120                 ret = -EIO;
14121                 err |= !!ret;
14122                 goto close_out;
14123         }
14124         if (!extent_buffer_uptodate(info->csum_root->node)) {
14125                 error("critical: csum_root, unable to check the filesystem");
14126                 ret = -EIO;
14127                 err |= !!ret;
14128                 goto close_out;
14129         }
14130
14131         ret = do_check_chunks_and_extents(info);
14132         err |= !!ret;
14133         if (ret)
14134                 error(
14135                 "errors found in extent allocation tree or chunk allocation");
14136
14137         ret = repair_root_items(info);
14138         err |= !!ret;
14139         if (ret < 0) {
14140                 error("failed to repair root items: %s", strerror(-ret));
14141                 goto close_out;
14142         }
14143         if (repair) {
14144                 fprintf(stderr, "Fixed %d roots.\n", ret);
14145                 ret = 0;
14146         } else if (ret > 0) {
14147                 fprintf(stderr,
14148                        "Found %d roots with an outdated root item.\n",
14149                        ret);
14150                 fprintf(stderr,
14151                         "Please run a filesystem check with the option --repair to fix them.\n");
14152                 ret = 1;
14153                 err |= !!ret;
14154                 goto close_out;
14155         }
14156
14157         if (!ctx.progress_enabled) {
14158                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14159                         fprintf(stderr, "checking free space tree\n");
14160                 else
14161                         fprintf(stderr, "checking free space cache\n");
14162         }
14163         ret = check_space_cache(root);
14164         err |= !!ret;
14165         if (ret) {
14166                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14167                         error("errors found in free space tree");
14168                 else
14169                         error("errors found in free space cache");
14170                 goto out;
14171         }
14172
14173         /*
14174          * We used to have to have these hole extents in between our real
14175          * extents so if we don't have this flag set we need to make sure there
14176          * are no gaps in the file extents for inodes, otherwise we can just
14177          * ignore it when this happens.
14178          */
14179         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14180         ret = do_check_fs_roots(info, &root_cache);
14181         err |= !!ret;
14182         if (ret) {
14183                 error("errors found in fs roots");
14184                 goto out;
14185         }
14186
14187         fprintf(stderr, "checking csums\n");
14188         ret = check_csums(root);
14189         err |= !!ret;
14190         if (ret) {
14191                 error("errors found in csum tree");
14192                 goto out;
14193         }
14194
14195         fprintf(stderr, "checking root refs\n");
14196         /* For low memory mode, check_fs_roots_v2 handles root refs */
14197         if (check_mode != CHECK_MODE_LOWMEM) {
14198                 ret = check_root_refs(root, &root_cache);
14199                 err |= !!ret;
14200                 if (ret) {
14201                         error("errors found in root refs");
14202                         goto out;
14203                 }
14204         }
14205
14206         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14207                 struct extent_buffer *eb;
14208
14209                 eb = list_first_entry(&root->fs_info->recow_ebs,
14210                                       struct extent_buffer, recow);
14211                 list_del_init(&eb->recow);
14212                 ret = recow_extent_buffer(root, eb);
14213                 err |= !!ret;
14214                 if (ret) {
14215                         error("fails to fix transid errors");
14216                         break;
14217                 }
14218         }
14219
14220         while (!list_empty(&delete_items)) {
14221                 struct bad_item *bad;
14222
14223                 bad = list_first_entry(&delete_items, struct bad_item, list);
14224                 list_del_init(&bad->list);
14225                 if (repair) {
14226                         ret = delete_bad_item(root, bad);
14227                         err |= !!ret;
14228                 }
14229                 free(bad);
14230         }
14231
14232         if (info->quota_enabled) {
14233                 fprintf(stderr, "checking quota groups\n");
14234                 ret = qgroup_verify_all(info);
14235                 err |= !!ret;
14236                 if (ret) {
14237                         error("failed to check quota groups");
14238                         goto out;
14239                 }
14240                 report_qgroups(0);
14241                 ret = repair_qgroups(info, &qgroups_repaired);
14242                 err |= !!ret;
14243                 if (err) {
14244                         error("failed to repair quota groups");
14245                         goto out;
14246                 }
14247                 ret = 0;
14248         }
14249
14250         if (!list_empty(&root->fs_info->recow_ebs)) {
14251                 error("transid errors in file system");
14252                 ret = 1;
14253                 err |= !!ret;
14254         }
14255 out:
14256         printf("found %llu bytes used, ",
14257                (unsigned long long)bytes_used);
14258         if (err)
14259                 printf("error(s) found\n");
14260         else
14261                 printf("no error found\n");
14262         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14263         printf("total tree bytes: %llu\n",
14264                (unsigned long long)total_btree_bytes);
14265         printf("total fs tree bytes: %llu\n",
14266                (unsigned long long)total_fs_tree_bytes);
14267         printf("total extent tree bytes: %llu\n",
14268                (unsigned long long)total_extent_tree_bytes);
14269         printf("btree space waste bytes: %llu\n",
14270                (unsigned long long)btree_space_waste);
14271         printf("file data blocks allocated: %llu\n referenced %llu\n",
14272                 (unsigned long long)data_bytes_allocated,
14273                 (unsigned long long)data_bytes_referenced);
14274
14275         free_qgroup_counts();
14276         free_root_recs_tree(&root_cache);
14277 close_out:
14278         close_ctree(root);
14279 err_out:
14280         if (ctx.progress_enabled)
14281                 task_deinit(ctx.info);
14282
14283         return err;
14284 }