btrfs-progs: check: change the way lowmem mode traverses metadata
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING       (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH      (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN         (1<<20) /* DIR isize should be recalculated */
136
137 static inline struct data_backref* to_data_backref(struct extent_backref *back)
138 {
139         return container_of(back, struct data_backref, node);
140 }
141
142 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
143 {
144         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
145         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
146         struct data_backref *back1 = to_data_backref(ext1);
147         struct data_backref *back2 = to_data_backref(ext2);
148
149         WARN_ON(!ext1->is_data);
150         WARN_ON(!ext2->is_data);
151
152         /* parent and root are a union, so this covers both */
153         if (back1->parent > back2->parent)
154                 return 1;
155         if (back1->parent < back2->parent)
156                 return -1;
157
158         /* This is a full backref and the parents match. */
159         if (back1->node.full_backref)
160                 return 0;
161
162         if (back1->owner > back2->owner)
163                 return 1;
164         if (back1->owner < back2->owner)
165                 return -1;
166
167         if (back1->offset > back2->offset)
168                 return 1;
169         if (back1->offset < back2->offset)
170                 return -1;
171
172         if (back1->found_ref && back2->found_ref) {
173                 if (back1->disk_bytenr > back2->disk_bytenr)
174                         return 1;
175                 if (back1->disk_bytenr < back2->disk_bytenr)
176                         return -1;
177
178                 if (back1->bytes > back2->bytes)
179                         return 1;
180                 if (back1->bytes < back2->bytes)
181                         return -1;
182         }
183
184         return 0;
185 }
186
187 /*
188  * Much like data_backref, just removed the undetermined members
189  * and change it to use list_head.
190  * During extent scan, it is stored in root->orphan_data_extent.
191  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
192  */
193 struct orphan_data_extent {
194         struct list_head list;
195         u64 root;
196         u64 objectid;
197         u64 offset;
198         u64 disk_bytenr;
199         u64 disk_len;
200 };
201
202 struct tree_backref {
203         struct extent_backref node;
204         union {
205                 u64 parent;
206                 u64 root;
207         };
208 };
209
210 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
211 {
212         return container_of(back, struct tree_backref, node);
213 }
214
215 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
216 {
217         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
218         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
219         struct tree_backref *back1 = to_tree_backref(ext1);
220         struct tree_backref *back2 = to_tree_backref(ext2);
221
222         WARN_ON(ext1->is_data);
223         WARN_ON(ext2->is_data);
224
225         /* parent and root are a union, so this covers both */
226         if (back1->parent > back2->parent)
227                 return 1;
228         if (back1->parent < back2->parent)
229                 return -1;
230
231         return 0;
232 }
233
234 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
235 {
236         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
237         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
238
239         if (ext1->is_data > ext2->is_data)
240                 return 1;
241
242         if (ext1->is_data < ext2->is_data)
243                 return -1;
244
245         if (ext1->full_backref > ext2->full_backref)
246                 return 1;
247         if (ext1->full_backref < ext2->full_backref)
248                 return -1;
249
250         if (ext1->is_data)
251                 return compare_data_backref(node1, node2);
252         else
253                 return compare_tree_backref(node1, node2);
254 }
255
256 /* Explicit initialization for extent_record::flag_block_full_backref */
257 enum { FLAG_UNSET = 2 };
258
259 struct extent_record {
260         struct list_head backrefs;
261         struct list_head dups;
262         struct rb_root backref_tree;
263         struct list_head list;
264         struct cache_extent cache;
265         struct btrfs_disk_key parent_key;
266         u64 start;
267         u64 max_size;
268         u64 nr;
269         u64 refs;
270         u64 extent_item_refs;
271         u64 generation;
272         u64 parent_generation;
273         u64 info_objectid;
274         u32 num_duplicates;
275         u8 info_level;
276         unsigned int flag_block_full_backref:2;
277         unsigned int found_rec:1;
278         unsigned int content_checked:1;
279         unsigned int owner_ref_checked:1;
280         unsigned int is_root:1;
281         unsigned int metadata:1;
282         unsigned int bad_full_backref:1;
283         unsigned int crossing_stripes:1;
284         unsigned int wrong_chunk_type:1;
285 };
286
287 static inline struct extent_record* to_extent_record(struct list_head *entry)
288 {
289         return container_of(entry, struct extent_record, list);
290 }
291
292 struct inode_backref {
293         struct list_head list;
294         unsigned int found_dir_item:1;
295         unsigned int found_dir_index:1;
296         unsigned int found_inode_ref:1;
297         u8 filetype;
298         u8 ref_type;
299         int errors;
300         u64 dir;
301         u64 index;
302         u16 namelen;
303         char name[0];
304 };
305
306 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
307 {
308         return list_entry(entry, struct inode_backref, list);
309 }
310
311 struct root_item_record {
312         struct list_head list;
313         u64 objectid;
314         u64 bytenr;
315         u64 last_snapshot;
316         u8 level;
317         u8 drop_level;
318         struct btrfs_key drop_key;
319 };
320
321 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
322 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
323 #define REF_ERR_NO_INODE_REF            (1 << 2)
324 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
325 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
326 #define REF_ERR_DUP_INODE_REF           (1 << 5)
327 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
328 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
329 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
330 #define REF_ERR_NO_ROOT_REF             (1 << 9)
331 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
332 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
333 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
334
335 struct file_extent_hole {
336         struct rb_node node;
337         u64 start;
338         u64 len;
339 };
340
341 struct inode_record {
342         struct list_head backrefs;
343         unsigned int checked:1;
344         unsigned int merging:1;
345         unsigned int found_inode_item:1;
346         unsigned int found_dir_item:1;
347         unsigned int found_file_extent:1;
348         unsigned int found_csum_item:1;
349         unsigned int some_csum_missing:1;
350         unsigned int nodatasum:1;
351         int errors;
352
353         u64 ino;
354         u32 nlink;
355         u32 imode;
356         u64 isize;
357         u64 nbytes;
358
359         u32 found_link;
360         u64 found_size;
361         u64 extent_start;
362         u64 extent_end;
363         struct rb_root holes;
364         struct list_head orphan_extents;
365
366         u32 refs;
367 };
368
369 #define I_ERR_NO_INODE_ITEM             (1 << 0)
370 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
371 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
372 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
373 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
374 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
375 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
376 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
377 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
378 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
379 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
380 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
381 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
382 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
383 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
384
385 struct root_backref {
386         struct list_head list;
387         unsigned int found_dir_item:1;
388         unsigned int found_dir_index:1;
389         unsigned int found_back_ref:1;
390         unsigned int found_forward_ref:1;
391         unsigned int reachable:1;
392         int errors;
393         u64 ref_root;
394         u64 dir;
395         u64 index;
396         u16 namelen;
397         char name[0];
398 };
399
400 static inline struct root_backref* to_root_backref(struct list_head *entry)
401 {
402         return list_entry(entry, struct root_backref, list);
403 }
404
405 struct root_record {
406         struct list_head backrefs;
407         struct cache_extent cache;
408         unsigned int found_root_item:1;
409         u64 objectid;
410         u32 found_ref;
411 };
412
413 struct ptr_node {
414         struct cache_extent cache;
415         void *data;
416 };
417
418 struct shared_node {
419         struct cache_extent cache;
420         struct cache_tree root_cache;
421         struct cache_tree inode_cache;
422         struct inode_record *current;
423         u32 refs;
424 };
425
426 struct block_info {
427         u64 start;
428         u32 size;
429 };
430
431 struct walk_control {
432         struct cache_tree shared;
433         struct shared_node *nodes[BTRFS_MAX_LEVEL];
434         int active_node;
435         int root_level;
436 };
437
438 struct bad_item {
439         struct btrfs_key key;
440         u64 root_id;
441         struct list_head list;
442 };
443
444 struct extent_entry {
445         u64 bytenr;
446         u64 bytes;
447         int count;
448         int broken;
449         struct list_head list;
450 };
451
452 struct root_item_info {
453         /* level of the root */
454         u8 level;
455         /* number of nodes at this level, must be 1 for a root */
456         int node_count;
457         u64 bytenr;
458         u64 gen;
459         struct cache_extent cache_extent;
460 };
461
462 /*
463  * Error bit for low memory mode check.
464  *
465  * Currently no caller cares about it yet.  Just internal use for error
466  * classification.
467  */
468 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
469 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
470 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
471 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
472 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
473 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
474 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
475 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
476 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
477 #define CHUNK_TYPE_MISMATCH     (1 << 8)
478
479 static void *print_status_check(void *p)
480 {
481         struct task_ctx *priv = p;
482         const char work_indicator[] = { '.', 'o', 'O', 'o' };
483         uint32_t count = 0;
484         static char *task_position_string[] = {
485                 "checking extents",
486                 "checking free space cache",
487                 "checking fs roots",
488         };
489
490         task_period_start(priv->info, 1000 /* 1s */);
491
492         if (priv->tp == TASK_NOTHING)
493                 return NULL;
494
495         while (1) {
496                 printf("%s [%c]\r", task_position_string[priv->tp],
497                                 work_indicator[count % 4]);
498                 count++;
499                 fflush(stdout);
500                 task_period_wait(priv->info);
501         }
502         return NULL;
503 }
504
505 static int print_status_return(void *p)
506 {
507         printf("\n");
508         fflush(stdout);
509
510         return 0;
511 }
512
513 static enum btrfs_check_mode parse_check_mode(const char *str)
514 {
515         if (strcmp(str, "lowmem") == 0)
516                 return CHECK_MODE_LOWMEM;
517         if (strcmp(str, "orig") == 0)
518                 return CHECK_MODE_ORIGINAL;
519         if (strcmp(str, "original") == 0)
520                 return CHECK_MODE_ORIGINAL;
521
522         return CHECK_MODE_UNKNOWN;
523 }
524
525 /* Compatible function to allow reuse of old codes */
526 static u64 first_extent_gap(struct rb_root *holes)
527 {
528         struct file_extent_hole *hole;
529
530         if (RB_EMPTY_ROOT(holes))
531                 return (u64)-1;
532
533         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
534         return hole->start;
535 }
536
537 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
538 {
539         struct file_extent_hole *hole1;
540         struct file_extent_hole *hole2;
541
542         hole1 = rb_entry(node1, struct file_extent_hole, node);
543         hole2 = rb_entry(node2, struct file_extent_hole, node);
544
545         if (hole1->start > hole2->start)
546                 return -1;
547         if (hole1->start < hole2->start)
548                 return 1;
549         /* Now hole1->start == hole2->start */
550         if (hole1->len >= hole2->len)
551                 /*
552                  * Hole 1 will be merge center
553                  * Same hole will be merged later
554                  */
555                 return -1;
556         /* Hole 2 will be merge center */
557         return 1;
558 }
559
560 /*
561  * Add a hole to the record
562  *
563  * This will do hole merge for copy_file_extent_holes(),
564  * which will ensure there won't be continuous holes.
565  */
566 static int add_file_extent_hole(struct rb_root *holes,
567                                 u64 start, u64 len)
568 {
569         struct file_extent_hole *hole;
570         struct file_extent_hole *prev = NULL;
571         struct file_extent_hole *next = NULL;
572
573         hole = malloc(sizeof(*hole));
574         if (!hole)
575                 return -ENOMEM;
576         hole->start = start;
577         hole->len = len;
578         /* Since compare will not return 0, no -EEXIST will happen */
579         rb_insert(holes, &hole->node, compare_hole);
580
581         /* simple merge with previous hole */
582         if (rb_prev(&hole->node))
583                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
584                                 node);
585         if (prev && prev->start + prev->len >= hole->start) {
586                 hole->len = hole->start + hole->len - prev->start;
587                 hole->start = prev->start;
588                 rb_erase(&prev->node, holes);
589                 free(prev);
590                 prev = NULL;
591         }
592
593         /* iterate merge with next holes */
594         while (1) {
595                 if (!rb_next(&hole->node))
596                         break;
597                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
598                                         node);
599                 if (hole->start + hole->len >= next->start) {
600                         if (hole->start + hole->len <= next->start + next->len)
601                                 hole->len = next->start + next->len -
602                                             hole->start;
603                         rb_erase(&next->node, holes);
604                         free(next);
605                         next = NULL;
606                 } else
607                         break;
608         }
609         return 0;
610 }
611
612 static int compare_hole_range(struct rb_node *node, void *data)
613 {
614         struct file_extent_hole *hole;
615         u64 start;
616
617         hole = (struct file_extent_hole *)data;
618         start = hole->start;
619
620         hole = rb_entry(node, struct file_extent_hole, node);
621         if (start < hole->start)
622                 return -1;
623         if (start >= hole->start && start < hole->start + hole->len)
624                 return 0;
625         return 1;
626 }
627
628 /*
629  * Delete a hole in the record
630  *
631  * This will do the hole split and is much restrict than add.
632  */
633 static int del_file_extent_hole(struct rb_root *holes,
634                                 u64 start, u64 len)
635 {
636         struct file_extent_hole *hole;
637         struct file_extent_hole tmp;
638         u64 prev_start = 0;
639         u64 prev_len = 0;
640         u64 next_start = 0;
641         u64 next_len = 0;
642         struct rb_node *node;
643         int have_prev = 0;
644         int have_next = 0;
645         int ret = 0;
646
647         tmp.start = start;
648         tmp.len = len;
649         node = rb_search(holes, &tmp, compare_hole_range, NULL);
650         if (!node)
651                 return -EEXIST;
652         hole = rb_entry(node, struct file_extent_hole, node);
653         if (start + len > hole->start + hole->len)
654                 return -EEXIST;
655
656         /*
657          * Now there will be no overlap, delete the hole and re-add the
658          * split(s) if they exists.
659          */
660         if (start > hole->start) {
661                 prev_start = hole->start;
662                 prev_len = start - hole->start;
663                 have_prev = 1;
664         }
665         if (hole->start + hole->len > start + len) {
666                 next_start = start + len;
667                 next_len = hole->start + hole->len - start - len;
668                 have_next = 1;
669         }
670         rb_erase(node, holes);
671         free(hole);
672         if (have_prev) {
673                 ret = add_file_extent_hole(holes, prev_start, prev_len);
674                 if (ret < 0)
675                         return ret;
676         }
677         if (have_next) {
678                 ret = add_file_extent_hole(holes, next_start, next_len);
679                 if (ret < 0)
680                         return ret;
681         }
682         return 0;
683 }
684
685 static int copy_file_extent_holes(struct rb_root *dst,
686                                   struct rb_root *src)
687 {
688         struct file_extent_hole *hole;
689         struct rb_node *node;
690         int ret = 0;
691
692         node = rb_first(src);
693         while (node) {
694                 hole = rb_entry(node, struct file_extent_hole, node);
695                 ret = add_file_extent_hole(dst, hole->start, hole->len);
696                 if (ret)
697                         break;
698                 node = rb_next(node);
699         }
700         return ret;
701 }
702
703 static void free_file_extent_holes(struct rb_root *holes)
704 {
705         struct rb_node *node;
706         struct file_extent_hole *hole;
707
708         node = rb_first(holes);
709         while (node) {
710                 hole = rb_entry(node, struct file_extent_hole, node);
711                 rb_erase(node, holes);
712                 free(hole);
713                 node = rb_first(holes);
714         }
715 }
716
717 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
718
719 static void record_root_in_trans(struct btrfs_trans_handle *trans,
720                                  struct btrfs_root *root)
721 {
722         if (root->last_trans != trans->transid) {
723                 root->track_dirty = 1;
724                 root->last_trans = trans->transid;
725                 root->commit_root = root->node;
726                 extent_buffer_get(root->node);
727         }
728 }
729
730 static u8 imode_to_type(u32 imode)
731 {
732 #define S_SHIFT 12
733         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
734                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
735                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
736                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
737                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
738                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
739                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
740                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
741         };
742
743         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
744 #undef S_SHIFT
745 }
746
747 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
748 {
749         struct device_record *rec1;
750         struct device_record *rec2;
751
752         rec1 = rb_entry(node1, struct device_record, node);
753         rec2 = rb_entry(node2, struct device_record, node);
754         if (rec1->devid > rec2->devid)
755                 return -1;
756         else if (rec1->devid < rec2->devid)
757                 return 1;
758         else
759                 return 0;
760 }
761
762 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
763 {
764         struct inode_record *rec;
765         struct inode_backref *backref;
766         struct inode_backref *orig;
767         struct inode_backref *tmp;
768         struct orphan_data_extent *src_orphan;
769         struct orphan_data_extent *dst_orphan;
770         struct rb_node *rb;
771         size_t size;
772         int ret;
773
774         rec = malloc(sizeof(*rec));
775         if (!rec)
776                 return ERR_PTR(-ENOMEM);
777         memcpy(rec, orig_rec, sizeof(*rec));
778         rec->refs = 1;
779         INIT_LIST_HEAD(&rec->backrefs);
780         INIT_LIST_HEAD(&rec->orphan_extents);
781         rec->holes = RB_ROOT;
782
783         list_for_each_entry(orig, &orig_rec->backrefs, list) {
784                 size = sizeof(*orig) + orig->namelen + 1;
785                 backref = malloc(size);
786                 if (!backref) {
787                         ret = -ENOMEM;
788                         goto cleanup;
789                 }
790                 memcpy(backref, orig, size);
791                 list_add_tail(&backref->list, &rec->backrefs);
792         }
793         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
794                 dst_orphan = malloc(sizeof(*dst_orphan));
795                 if (!dst_orphan) {
796                         ret = -ENOMEM;
797                         goto cleanup;
798                 }
799                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
800                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
801         }
802         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
803         if (ret < 0)
804                 goto cleanup_rb;
805
806         return rec;
807
808 cleanup_rb:
809         rb = rb_first(&rec->holes);
810         while (rb) {
811                 struct file_extent_hole *hole;
812
813                 hole = rb_entry(rb, struct file_extent_hole, node);
814                 rb = rb_next(rb);
815                 free(hole);
816         }
817
818 cleanup:
819         if (!list_empty(&rec->backrefs))
820                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
821                         list_del(&orig->list);
822                         free(orig);
823                 }
824
825         if (!list_empty(&rec->orphan_extents))
826                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
827                         list_del(&orig->list);
828                         free(orig);
829                 }
830
831         free(rec);
832
833         return ERR_PTR(ret);
834 }
835
836 static void print_orphan_data_extents(struct list_head *orphan_extents,
837                                       u64 objectid)
838 {
839         struct orphan_data_extent *orphan;
840
841         if (list_empty(orphan_extents))
842                 return;
843         printf("The following data extent is lost in tree %llu:\n",
844                objectid);
845         list_for_each_entry(orphan, orphan_extents, list) {
846                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
847                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
848                        orphan->disk_len);
849         }
850 }
851
852 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
853 {
854         u64 root_objectid = root->root_key.objectid;
855         int errors = rec->errors;
856
857         if (!errors)
858                 return;
859         /* reloc root errors, we print its corresponding fs root objectid*/
860         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
861                 root_objectid = root->root_key.offset;
862                 fprintf(stderr, "reloc");
863         }
864         fprintf(stderr, "root %llu inode %llu errors %x",
865                 (unsigned long long) root_objectid,
866                 (unsigned long long) rec->ino, rec->errors);
867
868         if (errors & I_ERR_NO_INODE_ITEM)
869                 fprintf(stderr, ", no inode item");
870         if (errors & I_ERR_NO_ORPHAN_ITEM)
871                 fprintf(stderr, ", no orphan item");
872         if (errors & I_ERR_DUP_INODE_ITEM)
873                 fprintf(stderr, ", dup inode item");
874         if (errors & I_ERR_DUP_DIR_INDEX)
875                 fprintf(stderr, ", dup dir index");
876         if (errors & I_ERR_ODD_DIR_ITEM)
877                 fprintf(stderr, ", odd dir item");
878         if (errors & I_ERR_ODD_FILE_EXTENT)
879                 fprintf(stderr, ", odd file extent");
880         if (errors & I_ERR_BAD_FILE_EXTENT)
881                 fprintf(stderr, ", bad file extent");
882         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
883                 fprintf(stderr, ", file extent overlap");
884         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
885                 fprintf(stderr, ", file extent discount");
886         if (errors & I_ERR_DIR_ISIZE_WRONG)
887                 fprintf(stderr, ", dir isize wrong");
888         if (errors & I_ERR_FILE_NBYTES_WRONG)
889                 fprintf(stderr, ", nbytes wrong");
890         if (errors & I_ERR_ODD_CSUM_ITEM)
891                 fprintf(stderr, ", odd csum item");
892         if (errors & I_ERR_SOME_CSUM_MISSING)
893                 fprintf(stderr, ", some csum missing");
894         if (errors & I_ERR_LINK_COUNT_WRONG)
895                 fprintf(stderr, ", link count wrong");
896         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
897                 fprintf(stderr, ", orphan file extent");
898         fprintf(stderr, "\n");
899         /* Print the orphan extents if needed */
900         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
901                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
902
903         /* Print the holes if needed */
904         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
905                 struct file_extent_hole *hole;
906                 struct rb_node *node;
907                 int found = 0;
908
909                 node = rb_first(&rec->holes);
910                 fprintf(stderr, "Found file extent holes:\n");
911                 while (node) {
912                         found = 1;
913                         hole = rb_entry(node, struct file_extent_hole, node);
914                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
915                                 hole->start, hole->len);
916                         node = rb_next(node);
917                 }
918                 if (!found)
919                         fprintf(stderr, "\tstart: 0, len: %llu\n",
920                                 round_up(rec->isize,
921                                          root->fs_info->sectorsize));
922         }
923 }
924
925 static void print_ref_error(int errors)
926 {
927         if (errors & REF_ERR_NO_DIR_ITEM)
928                 fprintf(stderr, ", no dir item");
929         if (errors & REF_ERR_NO_DIR_INDEX)
930                 fprintf(stderr, ", no dir index");
931         if (errors & REF_ERR_NO_INODE_REF)
932                 fprintf(stderr, ", no inode ref");
933         if (errors & REF_ERR_DUP_DIR_ITEM)
934                 fprintf(stderr, ", dup dir item");
935         if (errors & REF_ERR_DUP_DIR_INDEX)
936                 fprintf(stderr, ", dup dir index");
937         if (errors & REF_ERR_DUP_INODE_REF)
938                 fprintf(stderr, ", dup inode ref");
939         if (errors & REF_ERR_INDEX_UNMATCH)
940                 fprintf(stderr, ", index mismatch");
941         if (errors & REF_ERR_FILETYPE_UNMATCH)
942                 fprintf(stderr, ", filetype mismatch");
943         if (errors & REF_ERR_NAME_TOO_LONG)
944                 fprintf(stderr, ", name too long");
945         if (errors & REF_ERR_NO_ROOT_REF)
946                 fprintf(stderr, ", no root ref");
947         if (errors & REF_ERR_NO_ROOT_BACKREF)
948                 fprintf(stderr, ", no root backref");
949         if (errors & REF_ERR_DUP_ROOT_REF)
950                 fprintf(stderr, ", dup root ref");
951         if (errors & REF_ERR_DUP_ROOT_BACKREF)
952                 fprintf(stderr, ", dup root backref");
953         fprintf(stderr, "\n");
954 }
955
956 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
957                                           u64 ino, int mod)
958 {
959         struct ptr_node *node;
960         struct cache_extent *cache;
961         struct inode_record *rec = NULL;
962         int ret;
963
964         cache = lookup_cache_extent(inode_cache, ino, 1);
965         if (cache) {
966                 node = container_of(cache, struct ptr_node, cache);
967                 rec = node->data;
968                 if (mod && rec->refs > 1) {
969                         node->data = clone_inode_rec(rec);
970                         if (IS_ERR(node->data))
971                                 return node->data;
972                         rec->refs--;
973                         rec = node->data;
974                 }
975         } else if (mod) {
976                 rec = calloc(1, sizeof(*rec));
977                 if (!rec)
978                         return ERR_PTR(-ENOMEM);
979                 rec->ino = ino;
980                 rec->extent_start = (u64)-1;
981                 rec->refs = 1;
982                 INIT_LIST_HEAD(&rec->backrefs);
983                 INIT_LIST_HEAD(&rec->orphan_extents);
984                 rec->holes = RB_ROOT;
985
986                 node = malloc(sizeof(*node));
987                 if (!node) {
988                         free(rec);
989                         return ERR_PTR(-ENOMEM);
990                 }
991                 node->cache.start = ino;
992                 node->cache.size = 1;
993                 node->data = rec;
994
995                 if (ino == BTRFS_FREE_INO_OBJECTID)
996                         rec->found_link = 1;
997
998                 ret = insert_cache_extent(inode_cache, &node->cache);
999                 if (ret)
1000                         return ERR_PTR(-EEXIST);
1001         }
1002         return rec;
1003 }
1004
1005 static void free_orphan_data_extents(struct list_head *orphan_extents)
1006 {
1007         struct orphan_data_extent *orphan;
1008
1009         while (!list_empty(orphan_extents)) {
1010                 orphan = list_entry(orphan_extents->next,
1011                                     struct orphan_data_extent, list);
1012                 list_del(&orphan->list);
1013                 free(orphan);
1014         }
1015 }
1016
1017 static void free_inode_rec(struct inode_record *rec)
1018 {
1019         struct inode_backref *backref;
1020
1021         if (--rec->refs > 0)
1022                 return;
1023
1024         while (!list_empty(&rec->backrefs)) {
1025                 backref = to_inode_backref(rec->backrefs.next);
1026                 list_del(&backref->list);
1027                 free(backref);
1028         }
1029         free_orphan_data_extents(&rec->orphan_extents);
1030         free_file_extent_holes(&rec->holes);
1031         free(rec);
1032 }
1033
1034 static int can_free_inode_rec(struct inode_record *rec)
1035 {
1036         if (!rec->errors && rec->checked && rec->found_inode_item &&
1037             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1038                 return 1;
1039         return 0;
1040 }
1041
1042 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1043                                  struct inode_record *rec)
1044 {
1045         struct cache_extent *cache;
1046         struct inode_backref *tmp, *backref;
1047         struct ptr_node *node;
1048         u8 filetype;
1049
1050         if (!rec->found_inode_item)
1051                 return;
1052
1053         filetype = imode_to_type(rec->imode);
1054         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1055                 if (backref->found_dir_item && backref->found_dir_index) {
1056                         if (backref->filetype != filetype)
1057                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1058                         if (!backref->errors && backref->found_inode_ref &&
1059                             rec->nlink == rec->found_link) {
1060                                 list_del(&backref->list);
1061                                 free(backref);
1062                         }
1063                 }
1064         }
1065
1066         if (!rec->checked || rec->merging)
1067                 return;
1068
1069         if (S_ISDIR(rec->imode)) {
1070                 if (rec->found_size != rec->isize)
1071                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1072                 if (rec->found_file_extent)
1073                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1074         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1075                 if (rec->found_dir_item)
1076                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1077                 if (rec->found_size != rec->nbytes)
1078                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1079                 if (rec->nlink > 0 && !no_holes &&
1080                     (rec->extent_end < rec->isize ||
1081                      first_extent_gap(&rec->holes) < rec->isize))
1082                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1083         }
1084
1085         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1086                 if (rec->found_csum_item && rec->nodatasum)
1087                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1088                 if (rec->some_csum_missing && !rec->nodatasum)
1089                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1090         }
1091
1092         BUG_ON(rec->refs != 1);
1093         if (can_free_inode_rec(rec)) {
1094                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1095                 node = container_of(cache, struct ptr_node, cache);
1096                 BUG_ON(node->data != rec);
1097                 remove_cache_extent(inode_cache, &node->cache);
1098                 free(node);
1099                 free_inode_rec(rec);
1100         }
1101 }
1102
1103 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1104 {
1105         struct btrfs_path path;
1106         struct btrfs_key key;
1107         int ret;
1108
1109         key.objectid = BTRFS_ORPHAN_OBJECTID;
1110         key.type = BTRFS_ORPHAN_ITEM_KEY;
1111         key.offset = ino;
1112
1113         btrfs_init_path(&path);
1114         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1115         btrfs_release_path(&path);
1116         if (ret > 0)
1117                 ret = -ENOENT;
1118         return ret;
1119 }
1120
1121 static int process_inode_item(struct extent_buffer *eb,
1122                               int slot, struct btrfs_key *key,
1123                               struct shared_node *active_node)
1124 {
1125         struct inode_record *rec;
1126         struct btrfs_inode_item *item;
1127
1128         rec = active_node->current;
1129         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1130         if (rec->found_inode_item) {
1131                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1132                 return 1;
1133         }
1134         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1135         rec->nlink = btrfs_inode_nlink(eb, item);
1136         rec->isize = btrfs_inode_size(eb, item);
1137         rec->nbytes = btrfs_inode_nbytes(eb, item);
1138         rec->imode = btrfs_inode_mode(eb, item);
1139         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1140                 rec->nodatasum = 1;
1141         rec->found_inode_item = 1;
1142         if (rec->nlink == 0)
1143                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1144         maybe_free_inode_rec(&active_node->inode_cache, rec);
1145         return 0;
1146 }
1147
1148 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1149                                                 const char *name,
1150                                                 int namelen, u64 dir)
1151 {
1152         struct inode_backref *backref;
1153
1154         list_for_each_entry(backref, &rec->backrefs, list) {
1155                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1156                         break;
1157                 if (backref->dir != dir || backref->namelen != namelen)
1158                         continue;
1159                 if (memcmp(name, backref->name, namelen))
1160                         continue;
1161                 return backref;
1162         }
1163
1164         backref = malloc(sizeof(*backref) + namelen + 1);
1165         if (!backref)
1166                 return NULL;
1167         memset(backref, 0, sizeof(*backref));
1168         backref->dir = dir;
1169         backref->namelen = namelen;
1170         memcpy(backref->name, name, namelen);
1171         backref->name[namelen] = '\0';
1172         list_add_tail(&backref->list, &rec->backrefs);
1173         return backref;
1174 }
1175
1176 static int add_inode_backref(struct cache_tree *inode_cache,
1177                              u64 ino, u64 dir, u64 index,
1178                              const char *name, int namelen,
1179                              u8 filetype, u8 itemtype, int errors)
1180 {
1181         struct inode_record *rec;
1182         struct inode_backref *backref;
1183
1184         rec = get_inode_rec(inode_cache, ino, 1);
1185         BUG_ON(IS_ERR(rec));
1186         backref = get_inode_backref(rec, name, namelen, dir);
1187         BUG_ON(!backref);
1188         if (errors)
1189                 backref->errors |= errors;
1190         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1191                 if (backref->found_dir_index)
1192                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1193                 if (backref->found_inode_ref && backref->index != index)
1194                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1195                 if (backref->found_dir_item && backref->filetype != filetype)
1196                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1197
1198                 backref->index = index;
1199                 backref->filetype = filetype;
1200                 backref->found_dir_index = 1;
1201         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1202                 rec->found_link++;
1203                 if (backref->found_dir_item)
1204                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1205                 if (backref->found_dir_index && backref->filetype != filetype)
1206                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1207
1208                 backref->filetype = filetype;
1209                 backref->found_dir_item = 1;
1210         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1211                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1212                 if (backref->found_inode_ref)
1213                         backref->errors |= REF_ERR_DUP_INODE_REF;
1214                 if (backref->found_dir_index && backref->index != index)
1215                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1216                 else
1217                         backref->index = index;
1218
1219                 backref->ref_type = itemtype;
1220                 backref->found_inode_ref = 1;
1221         } else {
1222                 BUG_ON(1);
1223         }
1224
1225         maybe_free_inode_rec(inode_cache, rec);
1226         return 0;
1227 }
1228
1229 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1230                             struct cache_tree *dst_cache)
1231 {
1232         struct inode_backref *backref;
1233         u32 dir_count = 0;
1234         int ret = 0;
1235
1236         dst->merging = 1;
1237         list_for_each_entry(backref, &src->backrefs, list) {
1238                 if (backref->found_dir_index) {
1239                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1240                                         backref->index, backref->name,
1241                                         backref->namelen, backref->filetype,
1242                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1243                 }
1244                 if (backref->found_dir_item) {
1245                         dir_count++;
1246                         add_inode_backref(dst_cache, dst->ino,
1247                                         backref->dir, 0, backref->name,
1248                                         backref->namelen, backref->filetype,
1249                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1250                 }
1251                 if (backref->found_inode_ref) {
1252                         add_inode_backref(dst_cache, dst->ino,
1253                                         backref->dir, backref->index,
1254                                         backref->name, backref->namelen, 0,
1255                                         backref->ref_type, backref->errors);
1256                 }
1257         }
1258
1259         if (src->found_dir_item)
1260                 dst->found_dir_item = 1;
1261         if (src->found_file_extent)
1262                 dst->found_file_extent = 1;
1263         if (src->found_csum_item)
1264                 dst->found_csum_item = 1;
1265         if (src->some_csum_missing)
1266                 dst->some_csum_missing = 1;
1267         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1268                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1269                 if (ret < 0)
1270                         return ret;
1271         }
1272
1273         BUG_ON(src->found_link < dir_count);
1274         dst->found_link += src->found_link - dir_count;
1275         dst->found_size += src->found_size;
1276         if (src->extent_start != (u64)-1) {
1277                 if (dst->extent_start == (u64)-1) {
1278                         dst->extent_start = src->extent_start;
1279                         dst->extent_end = src->extent_end;
1280                 } else {
1281                         if (dst->extent_end > src->extent_start)
1282                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1283                         else if (dst->extent_end < src->extent_start) {
1284                                 ret = add_file_extent_hole(&dst->holes,
1285                                         dst->extent_end,
1286                                         src->extent_start - dst->extent_end);
1287                         }
1288                         if (dst->extent_end < src->extent_end)
1289                                 dst->extent_end = src->extent_end;
1290                 }
1291         }
1292
1293         dst->errors |= src->errors;
1294         if (src->found_inode_item) {
1295                 if (!dst->found_inode_item) {
1296                         dst->nlink = src->nlink;
1297                         dst->isize = src->isize;
1298                         dst->nbytes = src->nbytes;
1299                         dst->imode = src->imode;
1300                         dst->nodatasum = src->nodatasum;
1301                         dst->found_inode_item = 1;
1302                 } else {
1303                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1304                 }
1305         }
1306         dst->merging = 0;
1307
1308         return 0;
1309 }
1310
1311 static int splice_shared_node(struct shared_node *src_node,
1312                               struct shared_node *dst_node)
1313 {
1314         struct cache_extent *cache;
1315         struct ptr_node *node, *ins;
1316         struct cache_tree *src, *dst;
1317         struct inode_record *rec, *conflict;
1318         u64 current_ino = 0;
1319         int splice = 0;
1320         int ret;
1321
1322         if (--src_node->refs == 0)
1323                 splice = 1;
1324         if (src_node->current)
1325                 current_ino = src_node->current->ino;
1326
1327         src = &src_node->root_cache;
1328         dst = &dst_node->root_cache;
1329 again:
1330         cache = search_cache_extent(src, 0);
1331         while (cache) {
1332                 node = container_of(cache, struct ptr_node, cache);
1333                 rec = node->data;
1334                 cache = next_cache_extent(cache);
1335
1336                 if (splice) {
1337                         remove_cache_extent(src, &node->cache);
1338                         ins = node;
1339                 } else {
1340                         ins = malloc(sizeof(*ins));
1341                         BUG_ON(!ins);
1342                         ins->cache.start = node->cache.start;
1343                         ins->cache.size = node->cache.size;
1344                         ins->data = rec;
1345                         rec->refs++;
1346                 }
1347                 ret = insert_cache_extent(dst, &ins->cache);
1348                 if (ret == -EEXIST) {
1349                         conflict = get_inode_rec(dst, rec->ino, 1);
1350                         BUG_ON(IS_ERR(conflict));
1351                         merge_inode_recs(rec, conflict, dst);
1352                         if (rec->checked) {
1353                                 conflict->checked = 1;
1354                                 if (dst_node->current == conflict)
1355                                         dst_node->current = NULL;
1356                         }
1357                         maybe_free_inode_rec(dst, conflict);
1358                         free_inode_rec(rec);
1359                         free(ins);
1360                 } else {
1361                         BUG_ON(ret);
1362                 }
1363         }
1364
1365         if (src == &src_node->root_cache) {
1366                 src = &src_node->inode_cache;
1367                 dst = &dst_node->inode_cache;
1368                 goto again;
1369         }
1370
1371         if (current_ino > 0 && (!dst_node->current ||
1372             current_ino > dst_node->current->ino)) {
1373                 if (dst_node->current) {
1374                         dst_node->current->checked = 1;
1375                         maybe_free_inode_rec(dst, dst_node->current);
1376                 }
1377                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1378                 BUG_ON(IS_ERR(dst_node->current));
1379         }
1380         return 0;
1381 }
1382
1383 static void free_inode_ptr(struct cache_extent *cache)
1384 {
1385         struct ptr_node *node;
1386         struct inode_record *rec;
1387
1388         node = container_of(cache, struct ptr_node, cache);
1389         rec = node->data;
1390         free_inode_rec(rec);
1391         free(node);
1392 }
1393
1394 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1395
1396 static struct shared_node *find_shared_node(struct cache_tree *shared,
1397                                             u64 bytenr)
1398 {
1399         struct cache_extent *cache;
1400         struct shared_node *node;
1401
1402         cache = lookup_cache_extent(shared, bytenr, 1);
1403         if (cache) {
1404                 node = container_of(cache, struct shared_node, cache);
1405                 return node;
1406         }
1407         return NULL;
1408 }
1409
1410 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1411 {
1412         int ret;
1413         struct shared_node *node;
1414
1415         node = calloc(1, sizeof(*node));
1416         if (!node)
1417                 return -ENOMEM;
1418         node->cache.start = bytenr;
1419         node->cache.size = 1;
1420         cache_tree_init(&node->root_cache);
1421         cache_tree_init(&node->inode_cache);
1422         node->refs = refs;
1423
1424         ret = insert_cache_extent(shared, &node->cache);
1425
1426         return ret;
1427 }
1428
1429 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1430                              struct walk_control *wc, int level)
1431 {
1432         struct shared_node *node;
1433         struct shared_node *dest;
1434         int ret;
1435
1436         if (level == wc->active_node)
1437                 return 0;
1438
1439         BUG_ON(wc->active_node <= level);
1440         node = find_shared_node(&wc->shared, bytenr);
1441         if (!node) {
1442                 ret = add_shared_node(&wc->shared, bytenr, refs);
1443                 BUG_ON(ret);
1444                 node = find_shared_node(&wc->shared, bytenr);
1445                 wc->nodes[level] = node;
1446                 wc->active_node = level;
1447                 return 0;
1448         }
1449
1450         if (wc->root_level == wc->active_node &&
1451             btrfs_root_refs(&root->root_item) == 0) {
1452                 if (--node->refs == 0) {
1453                         free_inode_recs_tree(&node->root_cache);
1454                         free_inode_recs_tree(&node->inode_cache);
1455                         remove_cache_extent(&wc->shared, &node->cache);
1456                         free(node);
1457                 }
1458                 return 1;
1459         }
1460
1461         dest = wc->nodes[wc->active_node];
1462         splice_shared_node(node, dest);
1463         if (node->refs == 0) {
1464                 remove_cache_extent(&wc->shared, &node->cache);
1465                 free(node);
1466         }
1467         return 1;
1468 }
1469
1470 static int leave_shared_node(struct btrfs_root *root,
1471                              struct walk_control *wc, int level)
1472 {
1473         struct shared_node *node;
1474         struct shared_node *dest;
1475         int i;
1476
1477         if (level == wc->root_level)
1478                 return 0;
1479
1480         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1481                 if (wc->nodes[i])
1482                         break;
1483         }
1484         BUG_ON(i >= BTRFS_MAX_LEVEL);
1485
1486         node = wc->nodes[wc->active_node];
1487         wc->nodes[wc->active_node] = NULL;
1488         wc->active_node = i;
1489
1490         dest = wc->nodes[wc->active_node];
1491         if (wc->active_node < wc->root_level ||
1492             btrfs_root_refs(&root->root_item) > 0) {
1493                 BUG_ON(node->refs <= 1);
1494                 splice_shared_node(node, dest);
1495         } else {
1496                 BUG_ON(node->refs < 2);
1497                 node->refs--;
1498         }
1499         return 0;
1500 }
1501
1502 /*
1503  * Returns:
1504  * < 0 - on error
1505  * 1   - if the root with id child_root_id is a child of root parent_root_id
1506  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1507  *       has other root(s) as parent(s)
1508  * 2   - if the root child_root_id doesn't have any parent roots
1509  */
1510 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1511                          u64 child_root_id)
1512 {
1513         struct btrfs_path path;
1514         struct btrfs_key key;
1515         struct extent_buffer *leaf;
1516         int has_parent = 0;
1517         int ret;
1518
1519         btrfs_init_path(&path);
1520
1521         key.objectid = parent_root_id;
1522         key.type = BTRFS_ROOT_REF_KEY;
1523         key.offset = child_root_id;
1524         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1525                                 0, 0);
1526         if (ret < 0)
1527                 return ret;
1528         btrfs_release_path(&path);
1529         if (!ret)
1530                 return 1;
1531
1532         key.objectid = child_root_id;
1533         key.type = BTRFS_ROOT_BACKREF_KEY;
1534         key.offset = 0;
1535         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1536                                 0, 0);
1537         if (ret < 0)
1538                 goto out;
1539
1540         while (1) {
1541                 leaf = path.nodes[0];
1542                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1543                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1544                         if (ret)
1545                                 break;
1546                         leaf = path.nodes[0];
1547                 }
1548
1549                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1550                 if (key.objectid != child_root_id ||
1551                     key.type != BTRFS_ROOT_BACKREF_KEY)
1552                         break;
1553
1554                 has_parent = 1;
1555
1556                 if (key.offset == parent_root_id) {
1557                         btrfs_release_path(&path);
1558                         return 1;
1559                 }
1560
1561                 path.slots[0]++;
1562         }
1563 out:
1564         btrfs_release_path(&path);
1565         if (ret < 0)
1566                 return ret;
1567         return has_parent ? 0 : 2;
1568 }
1569
1570 static int process_dir_item(struct extent_buffer *eb,
1571                             int slot, struct btrfs_key *key,
1572                             struct shared_node *active_node)
1573 {
1574         u32 total;
1575         u32 cur = 0;
1576         u32 len;
1577         u32 name_len;
1578         u32 data_len;
1579         int error;
1580         int nritems = 0;
1581         u8 filetype;
1582         struct btrfs_dir_item *di;
1583         struct inode_record *rec;
1584         struct cache_tree *root_cache;
1585         struct cache_tree *inode_cache;
1586         struct btrfs_key location;
1587         char namebuf[BTRFS_NAME_LEN];
1588
1589         root_cache = &active_node->root_cache;
1590         inode_cache = &active_node->inode_cache;
1591         rec = active_node->current;
1592         rec->found_dir_item = 1;
1593
1594         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1595         total = btrfs_item_size_nr(eb, slot);
1596         while (cur < total) {
1597                 nritems++;
1598                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1599                 name_len = btrfs_dir_name_len(eb, di);
1600                 data_len = btrfs_dir_data_len(eb, di);
1601                 filetype = btrfs_dir_type(eb, di);
1602
1603                 rec->found_size += name_len;
1604                 if (cur + sizeof(*di) + name_len > total ||
1605                     name_len > BTRFS_NAME_LEN) {
1606                         error = REF_ERR_NAME_TOO_LONG;
1607
1608                         if (cur + sizeof(*di) > total)
1609                                 break;
1610                         len = min_t(u32, total - cur - sizeof(*di),
1611                                     BTRFS_NAME_LEN);
1612                 } else {
1613                         len = name_len;
1614                         error = 0;
1615                 }
1616
1617                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1618
1619                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1620                     key->offset != btrfs_name_hash(namebuf, len)) {
1621                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1622                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1623                         key->objectid, key->offset, namebuf, len, filetype,
1624                         key->offset, btrfs_name_hash(namebuf, len));
1625                 }
1626
1627                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1628                         add_inode_backref(inode_cache, location.objectid,
1629                                           key->objectid, key->offset, namebuf,
1630                                           len, filetype, key->type, error);
1631                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1632                         add_inode_backref(root_cache, location.objectid,
1633                                           key->objectid, key->offset,
1634                                           namebuf, len, filetype,
1635                                           key->type, error);
1636                 } else {
1637                         fprintf(stderr, "invalid location in dir item %u\n",
1638                                 location.type);
1639                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1640                                           key->objectid, key->offset, namebuf,
1641                                           len, filetype, key->type, error);
1642                 }
1643
1644                 len = sizeof(*di) + name_len + data_len;
1645                 di = (struct btrfs_dir_item *)((char *)di + len);
1646                 cur += len;
1647         }
1648         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1649                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1650
1651         return 0;
1652 }
1653
1654 static int process_inode_ref(struct extent_buffer *eb,
1655                              int slot, struct btrfs_key *key,
1656                              struct shared_node *active_node)
1657 {
1658         u32 total;
1659         u32 cur = 0;
1660         u32 len;
1661         u32 name_len;
1662         u64 index;
1663         int error;
1664         struct cache_tree *inode_cache;
1665         struct btrfs_inode_ref *ref;
1666         char namebuf[BTRFS_NAME_LEN];
1667
1668         inode_cache = &active_node->inode_cache;
1669
1670         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1671         total = btrfs_item_size_nr(eb, slot);
1672         while (cur < total) {
1673                 name_len = btrfs_inode_ref_name_len(eb, ref);
1674                 index = btrfs_inode_ref_index(eb, ref);
1675
1676                 /* inode_ref + namelen should not cross item boundary */
1677                 if (cur + sizeof(*ref) + name_len > total ||
1678                     name_len > BTRFS_NAME_LEN) {
1679                         if (total < cur + sizeof(*ref))
1680                                 break;
1681
1682                         /* Still try to read out the remaining part */
1683                         len = min_t(u32, total - cur - sizeof(*ref),
1684                                     BTRFS_NAME_LEN);
1685                         error = REF_ERR_NAME_TOO_LONG;
1686                 } else {
1687                         len = name_len;
1688                         error = 0;
1689                 }
1690
1691                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1692                 add_inode_backref(inode_cache, key->objectid, key->offset,
1693                                   index, namebuf, len, 0, key->type, error);
1694
1695                 len = sizeof(*ref) + name_len;
1696                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1697                 cur += len;
1698         }
1699         return 0;
1700 }
1701
1702 static int process_inode_extref(struct extent_buffer *eb,
1703                                 int slot, struct btrfs_key *key,
1704                                 struct shared_node *active_node)
1705 {
1706         u32 total;
1707         u32 cur = 0;
1708         u32 len;
1709         u32 name_len;
1710         u64 index;
1711         u64 parent;
1712         int error;
1713         struct cache_tree *inode_cache;
1714         struct btrfs_inode_extref *extref;
1715         char namebuf[BTRFS_NAME_LEN];
1716
1717         inode_cache = &active_node->inode_cache;
1718
1719         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1720         total = btrfs_item_size_nr(eb, slot);
1721         while (cur < total) {
1722                 name_len = btrfs_inode_extref_name_len(eb, extref);
1723                 index = btrfs_inode_extref_index(eb, extref);
1724                 parent = btrfs_inode_extref_parent(eb, extref);
1725                 if (name_len <= BTRFS_NAME_LEN) {
1726                         len = name_len;
1727                         error = 0;
1728                 } else {
1729                         len = BTRFS_NAME_LEN;
1730                         error = REF_ERR_NAME_TOO_LONG;
1731                 }
1732                 read_extent_buffer(eb, namebuf,
1733                                    (unsigned long)(extref + 1), len);
1734                 add_inode_backref(inode_cache, key->objectid, parent,
1735                                   index, namebuf, len, 0, key->type, error);
1736
1737                 len = sizeof(*extref) + name_len;
1738                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1739                 cur += len;
1740         }
1741         return 0;
1742
1743 }
1744
1745 static int count_csum_range(struct btrfs_root *root, u64 start,
1746                             u64 len, u64 *found)
1747 {
1748         struct btrfs_key key;
1749         struct btrfs_path path;
1750         struct extent_buffer *leaf;
1751         int ret;
1752         size_t size;
1753         *found = 0;
1754         u64 csum_end;
1755         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1756
1757         btrfs_init_path(&path);
1758
1759         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1760         key.offset = start;
1761         key.type = BTRFS_EXTENT_CSUM_KEY;
1762
1763         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1764                                 &key, &path, 0, 0);
1765         if (ret < 0)
1766                 goto out;
1767         if (ret > 0 && path.slots[0] > 0) {
1768                 leaf = path.nodes[0];
1769                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1770                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1771                     key.type == BTRFS_EXTENT_CSUM_KEY)
1772                         path.slots[0]--;
1773         }
1774
1775         while (len > 0) {
1776                 leaf = path.nodes[0];
1777                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1778                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1779                         if (ret > 0)
1780                                 break;
1781                         else if (ret < 0)
1782                                 goto out;
1783                         leaf = path.nodes[0];
1784                 }
1785
1786                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1787                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1788                     key.type != BTRFS_EXTENT_CSUM_KEY)
1789                         break;
1790
1791                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1792                 if (key.offset >= start + len)
1793                         break;
1794
1795                 if (key.offset > start)
1796                         start = key.offset;
1797
1798                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1799                 csum_end = key.offset + (size / csum_size) *
1800                            root->fs_info->sectorsize;
1801                 if (csum_end > start) {
1802                         size = min(csum_end - start, len);
1803                         len -= size;
1804                         start += size;
1805                         *found += size;
1806                 }
1807
1808                 path.slots[0]++;
1809         }
1810 out:
1811         btrfs_release_path(&path);
1812         if (ret < 0)
1813                 return ret;
1814         return 0;
1815 }
1816
1817 static int process_file_extent(struct btrfs_root *root,
1818                                 struct extent_buffer *eb,
1819                                 int slot, struct btrfs_key *key,
1820                                 struct shared_node *active_node)
1821 {
1822         struct inode_record *rec;
1823         struct btrfs_file_extent_item *fi;
1824         u64 num_bytes = 0;
1825         u64 disk_bytenr = 0;
1826         u64 extent_offset = 0;
1827         u64 mask = root->fs_info->sectorsize - 1;
1828         int extent_type;
1829         int ret;
1830
1831         rec = active_node->current;
1832         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1833         rec->found_file_extent = 1;
1834
1835         if (rec->extent_start == (u64)-1) {
1836                 rec->extent_start = key->offset;
1837                 rec->extent_end = key->offset;
1838         }
1839
1840         if (rec->extent_end > key->offset)
1841                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1842         else if (rec->extent_end < key->offset) {
1843                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1844                                            key->offset - rec->extent_end);
1845                 if (ret < 0)
1846                         return ret;
1847         }
1848
1849         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1850         extent_type = btrfs_file_extent_type(eb, fi);
1851
1852         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1853                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1854                 if (num_bytes == 0)
1855                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1856                 rec->found_size += num_bytes;
1857                 num_bytes = (num_bytes + mask) & ~mask;
1858         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1859                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1860                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1861                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1862                 extent_offset = btrfs_file_extent_offset(eb, fi);
1863                 if (num_bytes == 0 || (num_bytes & mask))
1864                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865                 if (num_bytes + extent_offset >
1866                     btrfs_file_extent_ram_bytes(eb, fi))
1867                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1868                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1869                     (btrfs_file_extent_compression(eb, fi) ||
1870                      btrfs_file_extent_encryption(eb, fi) ||
1871                      btrfs_file_extent_other_encoding(eb, fi)))
1872                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1873                 if (disk_bytenr > 0)
1874                         rec->found_size += num_bytes;
1875         } else {
1876                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1877         }
1878         rec->extent_end = key->offset + num_bytes;
1879
1880         /*
1881          * The data reloc tree will copy full extents into its inode and then
1882          * copy the corresponding csums.  Because the extent it copied could be
1883          * a preallocated extent that hasn't been written to yet there may be no
1884          * csums to copy, ergo we won't have csums for our file extent.  This is
1885          * ok so just don't bother checking csums if the inode belongs to the
1886          * data reloc tree.
1887          */
1888         if (disk_bytenr > 0 &&
1889             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1890                 u64 found;
1891                 if (btrfs_file_extent_compression(eb, fi))
1892                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1893                 else
1894                         disk_bytenr += extent_offset;
1895
1896                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1897                 if (ret < 0)
1898                         return ret;
1899                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1900                         if (found > 0)
1901                                 rec->found_csum_item = 1;
1902                         if (found < num_bytes)
1903                                 rec->some_csum_missing = 1;
1904                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1905                         if (found > 0)
1906                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1907                 }
1908         }
1909         return 0;
1910 }
1911
1912 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1913                             struct walk_control *wc)
1914 {
1915         struct btrfs_key key;
1916         u32 nritems;
1917         int i;
1918         int ret = 0;
1919         struct cache_tree *inode_cache;
1920         struct shared_node *active_node;
1921
1922         if (wc->root_level == wc->active_node &&
1923             btrfs_root_refs(&root->root_item) == 0)
1924                 return 0;
1925
1926         active_node = wc->nodes[wc->active_node];
1927         inode_cache = &active_node->inode_cache;
1928         nritems = btrfs_header_nritems(eb);
1929         for (i = 0; i < nritems; i++) {
1930                 btrfs_item_key_to_cpu(eb, &key, i);
1931
1932                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1933                         continue;
1934                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1935                         continue;
1936
1937                 if (active_node->current == NULL ||
1938                     active_node->current->ino < key.objectid) {
1939                         if (active_node->current) {
1940                                 active_node->current->checked = 1;
1941                                 maybe_free_inode_rec(inode_cache,
1942                                                      active_node->current);
1943                         }
1944                         active_node->current = get_inode_rec(inode_cache,
1945                                                              key.objectid, 1);
1946                         BUG_ON(IS_ERR(active_node->current));
1947                 }
1948                 switch (key.type) {
1949                 case BTRFS_DIR_ITEM_KEY:
1950                 case BTRFS_DIR_INDEX_KEY:
1951                         ret = process_dir_item(eb, i, &key, active_node);
1952                         break;
1953                 case BTRFS_INODE_REF_KEY:
1954                         ret = process_inode_ref(eb, i, &key, active_node);
1955                         break;
1956                 case BTRFS_INODE_EXTREF_KEY:
1957                         ret = process_inode_extref(eb, i, &key, active_node);
1958                         break;
1959                 case BTRFS_INODE_ITEM_KEY:
1960                         ret = process_inode_item(eb, i, &key, active_node);
1961                         break;
1962                 case BTRFS_EXTENT_DATA_KEY:
1963                         ret = process_file_extent(root, eb, i, &key,
1964                                                   active_node);
1965                         break;
1966                 default:
1967                         break;
1968                 };
1969         }
1970         return ret;
1971 }
1972
1973 struct node_refs {
1974         u64 bytenr[BTRFS_MAX_LEVEL];
1975         u64 refs[BTRFS_MAX_LEVEL];
1976         int need_check[BTRFS_MAX_LEVEL];
1977         /* field for checking all trees */
1978         int checked[BTRFS_MAX_LEVEL];
1979         /* the corresponding extent should be marked as full backref or not */
1980         int full_backref[BTRFS_MAX_LEVEL];
1981 };
1982
1983 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1984                              struct extent_buffer *eb, struct node_refs *nrefs,
1985                              u64 level, int check_all);
1986 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1987                             unsigned int ext_ref);
1988
1989 /*
1990  * Returns >0  Found error, not fatal, should continue
1991  * Returns <0  Fatal error, must exit the whole check
1992  * Returns 0   No errors found
1993  */
1994 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1995                                struct node_refs *nrefs, int *level, int ext_ref)
1996 {
1997         struct extent_buffer *cur = path->nodes[0];
1998         struct btrfs_key key;
1999         u64 cur_bytenr;
2000         u32 nritems;
2001         u64 first_ino = 0;
2002         int root_level = btrfs_header_level(root->node);
2003         int i;
2004         int ret = 0; /* Final return value */
2005         int err = 0; /* Positive error bitmap */
2006
2007         cur_bytenr = cur->start;
2008
2009         /* skip to first inode item or the first inode number change */
2010         nritems = btrfs_header_nritems(cur);
2011         for (i = 0; i < nritems; i++) {
2012                 btrfs_item_key_to_cpu(cur, &key, i);
2013                 if (i == 0)
2014                         first_ino = key.objectid;
2015                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2016                     (first_ino && first_ino != key.objectid))
2017                         break;
2018         }
2019         if (i == nritems) {
2020                 path->slots[0] = nritems;
2021                 return 0;
2022         }
2023         path->slots[0] = i;
2024
2025 again:
2026         err |= check_inode_item(root, path, ext_ref);
2027
2028         /* modify cur since check_inode_item may change path */
2029         cur = path->nodes[0];
2030
2031         if (err & LAST_ITEM)
2032                 goto out;
2033
2034         /* still have inode items in thie leaf */
2035         if (cur->start == cur_bytenr)
2036                 goto again;
2037
2038         /*
2039          * we have switched to another leaf, above nodes may
2040          * have changed, here walk down the path, if a node
2041          * or leaf is shared, check whether we can skip this
2042          * node or leaf.
2043          */
2044         for (i = root_level; i >= 0; i--) {
2045                 if (path->nodes[i]->start == nrefs->bytenr[i])
2046                         continue;
2047
2048                 ret = update_nodes_refs(root, path->nodes[i]->start,
2049                                 path->nodes[i], nrefs, i, 0);
2050                 if (ret)
2051                         goto out;
2052
2053                 if (!nrefs->need_check[i]) {
2054                         *level += 1;
2055                         break;
2056                 }
2057         }
2058
2059         for (i = 0; i < *level; i++) {
2060                 free_extent_buffer(path->nodes[i]);
2061                 path->nodes[i] = NULL;
2062         }
2063 out:
2064         err &= ~LAST_ITEM;
2065         if (err && !ret)
2066                 ret = err;
2067         return ret;
2068 }
2069
2070 static void reada_walk_down(struct btrfs_root *root,
2071                             struct extent_buffer *node, int slot)
2072 {
2073         struct btrfs_fs_info *fs_info = root->fs_info;
2074         u64 bytenr;
2075         u64 ptr_gen;
2076         u32 nritems;
2077         int i;
2078         int level;
2079
2080         level = btrfs_header_level(node);
2081         if (level != 1)
2082                 return;
2083
2084         nritems = btrfs_header_nritems(node);
2085         for (i = slot; i < nritems; i++) {
2086                 bytenr = btrfs_node_blockptr(node, i);
2087                 ptr_gen = btrfs_node_ptr_generation(node, i);
2088                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2089         }
2090 }
2091
2092 /*
2093  * Check the child node/leaf by the following condition:
2094  * 1. the first item key of the node/leaf should be the same with the one
2095  *    in parent.
2096  * 2. block in parent node should match the child node/leaf.
2097  * 3. generation of parent node and child's header should be consistent.
2098  *
2099  * Or the child node/leaf pointed by the key in parent is not valid.
2100  *
2101  * We hope to check leaf owner too, but since subvol may share leaves,
2102  * which makes leaf owner check not so strong, key check should be
2103  * sufficient enough for that case.
2104  */
2105 static int check_child_node(struct extent_buffer *parent, int slot,
2106                             struct extent_buffer *child)
2107 {
2108         struct btrfs_key parent_key;
2109         struct btrfs_key child_key;
2110         int ret = 0;
2111
2112         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2113         if (btrfs_header_level(child) == 0)
2114                 btrfs_item_key_to_cpu(child, &child_key, 0);
2115         else
2116                 btrfs_node_key_to_cpu(child, &child_key, 0);
2117
2118         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2119                 ret = -EINVAL;
2120                 fprintf(stderr,
2121                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2122                         parent_key.objectid, parent_key.type, parent_key.offset,
2123                         child_key.objectid, child_key.type, child_key.offset);
2124         }
2125         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2126                 ret = -EINVAL;
2127                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2128                         btrfs_node_blockptr(parent, slot),
2129                         btrfs_header_bytenr(child));
2130         }
2131         if (btrfs_node_ptr_generation(parent, slot) !=
2132             btrfs_header_generation(child)) {
2133                 ret = -EINVAL;
2134                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2135                         btrfs_header_generation(child),
2136                         btrfs_node_ptr_generation(parent, slot));
2137         }
2138         return ret;
2139 }
2140
2141 /*
2142  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2143  * in every fs or file tree check. Here we find its all root ids, and only check
2144  * it in the fs or file tree which has the smallest root id.
2145  */
2146 static int need_check(struct btrfs_root *root, struct ulist *roots)
2147 {
2148         struct rb_node *node;
2149         struct ulist_node *u;
2150
2151         if (roots->nnodes == 1)
2152                 return 1;
2153
2154         node = rb_first(&roots->root);
2155         u = rb_entry(node, struct ulist_node, rb_node);
2156         /*
2157          * current root id is not smallest, we skip it and let it be checked
2158          * in the fs or file tree who hash the smallest root id.
2159          */
2160         if (root->objectid != u->val)
2161                 return 0;
2162
2163         return 1;
2164 }
2165
2166 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
2167                                u64 *flags_ret)
2168 {
2169         struct btrfs_root *extent_root = root->fs_info->extent_root;
2170         struct btrfs_root_item *ri = &root->root_item;
2171         struct btrfs_extent_inline_ref *iref;
2172         struct btrfs_extent_item *ei;
2173         struct btrfs_key key;
2174         struct btrfs_path *path = NULL;
2175         unsigned long ptr;
2176         unsigned long end;
2177         u64 flags;
2178         u64 owner = 0;
2179         u64 offset;
2180         int slot;
2181         int type;
2182         int ret = 0;
2183
2184         /*
2185          * Except file/reloc tree, we can not have FULL BACKREF MODE
2186          */
2187         if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
2188                 goto normal;
2189
2190         /* root node */
2191         if (eb->start == btrfs_root_bytenr(ri))
2192                 goto normal;
2193
2194         if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
2195                 goto full_backref;
2196
2197         owner = btrfs_header_owner(eb);
2198         if (owner == root->objectid)
2199                 goto normal;
2200
2201         path = btrfs_alloc_path();
2202         if (!path)
2203                 return -ENOMEM;
2204
2205         key.objectid = btrfs_header_bytenr(eb);
2206         key.type = (u8)-1;
2207         key.offset = (u64)-1;
2208
2209         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2210         if (ret <= 0) {
2211                 ret = -EIO;
2212                 goto out;
2213         }
2214
2215         if (ret > 0) {
2216                 ret = btrfs_previous_extent_item(extent_root, path,
2217                                                  key.objectid);
2218                 if (ret)
2219                         goto full_backref;
2220
2221         }
2222         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2223
2224         eb = path->nodes[0];
2225         slot = path->slots[0];
2226         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
2227
2228         flags = btrfs_extent_flags(eb, ei);
2229         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2230                 goto full_backref;
2231
2232         ptr = (unsigned long)(ei + 1);
2233         end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
2234
2235         if (key.type == BTRFS_EXTENT_ITEM_KEY)
2236                 ptr += sizeof(struct btrfs_tree_block_info);
2237
2238 next:
2239         /* Reached extent item ends normally */
2240         if (ptr == end)
2241                 goto full_backref;
2242
2243         /* Beyond extent item end, wrong item size */
2244         if (ptr > end) {
2245                 error("extent item at bytenr %llu slot %d has wrong size",
2246                         eb->start, slot);
2247                 goto full_backref;
2248         }
2249
2250         iref = (struct btrfs_extent_inline_ref *)ptr;
2251         offset = btrfs_extent_inline_ref_offset(eb, iref);
2252         type = btrfs_extent_inline_ref_type(eb, iref);
2253
2254         if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
2255                 goto normal;
2256         ptr += btrfs_extent_inline_ref_size(type);
2257         goto next;
2258
2259 normal:
2260         *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
2261         goto out;
2262
2263 full_backref:
2264         *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2265 out:
2266         btrfs_free_path(path);
2267         return ret;
2268 }
2269
2270 /*
2271  * for a tree node or leaf, we record its reference count, so later if we still
2272  * process this node or leaf, don't need to compute its reference count again.
2273  *
2274  * @bytenr  if @bytenr == (u64)-1, only update nrefs->full_backref[level]
2275  */
2276 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2277                              struct extent_buffer *eb, struct node_refs *nrefs,
2278                              u64 level, int check_all)
2279 {
2280         struct ulist *roots;
2281         u64 refs = 0;
2282         u64 flags = 0;
2283         int root_level = btrfs_header_level(root->node);
2284         int check;
2285         int ret;
2286
2287         if (nrefs->bytenr[level] == bytenr)
2288                 return 0;
2289
2290         if (bytenr != (u64)-1) {
2291                 /* the return value of this function seems a mistake */
2292                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2293                                        level, 1, &refs, &flags);
2294                 /* temporary fix */
2295                 if (ret < 0 && !check_all)
2296                         return ret;
2297
2298                 nrefs->bytenr[level] = bytenr;
2299                 nrefs->refs[level] = refs;
2300                 nrefs->full_backref[level] = 0;
2301                 nrefs->checked[level] = 0;
2302
2303                 if (refs > 1) {
2304                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2305                                                    0, &roots);
2306                         if (ret)
2307                                 return -EIO;
2308
2309                         check = need_check(root, roots);
2310                         ulist_free(roots);
2311                         nrefs->need_check[level] = check;
2312                 } else {
2313                         if (!check_all) {
2314                                 nrefs->need_check[level] = 1;
2315                         } else {
2316                                 if (level == root_level) {
2317                                         nrefs->need_check[level] = 1;
2318                                 } else {
2319                                         /*
2320                                          * The node refs may have not been
2321                                          * updated if upper needs checking (the
2322                                          * lowest root_objectid) the node can
2323                                          * be checked.
2324                                          */
2325                                         nrefs->need_check[level] =
2326                                                 nrefs->need_check[level + 1];
2327                                 }
2328                         }
2329                 }
2330         }
2331
2332         if (check_all && eb) {
2333                 calc_extent_flag_v2(root, eb, &flags);
2334                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2335                         nrefs->full_backref[level] = 1;
2336         }
2337
2338         return 0;
2339 }
2340
2341 /*
2342  * @level           if @level == -1 means extent data item
2343  *                  else normal treeblocl.
2344  */
2345 static int should_check_extent_strictly(struct btrfs_root *root,
2346                                         struct node_refs *nrefs, int level)
2347 {
2348         int root_level = btrfs_header_level(root->node);
2349
2350         if (level > root_level || level < -1)
2351                 return 1;
2352         if (level == root_level)
2353                 return 1;
2354         /*
2355          * if the upper node is marked full backref, it should contain shared
2356          * backref of the parent (except owner == root->objectid).
2357          */
2358         while (++level <= root_level)
2359                 if (nrefs->refs[level] > 1)
2360                         return 0;
2361
2362         return 1;
2363 }
2364
2365 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2366                           struct walk_control *wc, int *level,
2367                           struct node_refs *nrefs)
2368 {
2369         enum btrfs_tree_block_status status;
2370         u64 bytenr;
2371         u64 ptr_gen;
2372         struct btrfs_fs_info *fs_info = root->fs_info;
2373         struct extent_buffer *next;
2374         struct extent_buffer *cur;
2375         int ret, err = 0;
2376         u64 refs;
2377
2378         WARN_ON(*level < 0);
2379         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2380
2381         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2382                 refs = nrefs->refs[*level];
2383                 ret = 0;
2384         } else {
2385                 ret = btrfs_lookup_extent_info(NULL, root,
2386                                        path->nodes[*level]->start,
2387                                        *level, 1, &refs, NULL);
2388                 if (ret < 0) {
2389                         err = ret;
2390                         goto out;
2391                 }
2392                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2393                 nrefs->refs[*level] = refs;
2394         }
2395
2396         if (refs > 1) {
2397                 ret = enter_shared_node(root, path->nodes[*level]->start,
2398                                         refs, wc, *level);
2399                 if (ret > 0) {
2400                         err = ret;
2401                         goto out;
2402                 }
2403         }
2404
2405         while (*level >= 0) {
2406                 WARN_ON(*level < 0);
2407                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2408                 cur = path->nodes[*level];
2409
2410                 if (btrfs_header_level(cur) != *level)
2411                         WARN_ON(1);
2412
2413                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2414                         break;
2415                 if (*level == 0) {
2416                         ret = process_one_leaf(root, cur, wc);
2417                         if (ret < 0)
2418                                 err = ret;
2419                         break;
2420                 }
2421                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2422                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2423
2424                 if (bytenr == nrefs->bytenr[*level - 1]) {
2425                         refs = nrefs->refs[*level - 1];
2426                 } else {
2427                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2428                                         *level - 1, 1, &refs, NULL);
2429                         if (ret < 0) {
2430                                 refs = 0;
2431                         } else {
2432                                 nrefs->bytenr[*level - 1] = bytenr;
2433                                 nrefs->refs[*level - 1] = refs;
2434                         }
2435                 }
2436
2437                 if (refs > 1) {
2438                         ret = enter_shared_node(root, bytenr, refs,
2439                                                 wc, *level - 1);
2440                         if (ret > 0) {
2441                                 path->slots[*level]++;
2442                                 continue;
2443                         }
2444                 }
2445
2446                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2447                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2448                         free_extent_buffer(next);
2449                         reada_walk_down(root, cur, path->slots[*level]);
2450                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2451                         if (!extent_buffer_uptodate(next)) {
2452                                 struct btrfs_key node_key;
2453
2454                                 btrfs_node_key_to_cpu(path->nodes[*level],
2455                                                       &node_key,
2456                                                       path->slots[*level]);
2457                                 btrfs_add_corrupt_extent_record(root->fs_info,
2458                                                 &node_key,
2459                                                 path->nodes[*level]->start,
2460                                                 root->fs_info->nodesize,
2461                                                 *level);
2462                                 err = -EIO;
2463                                 goto out;
2464                         }
2465                 }
2466
2467                 ret = check_child_node(cur, path->slots[*level], next);
2468                 if (ret) {
2469                         free_extent_buffer(next);
2470                         err = ret;
2471                         goto out;
2472                 }
2473
2474                 if (btrfs_is_leaf(next))
2475                         status = btrfs_check_leaf(root, NULL, next);
2476                 else
2477                         status = btrfs_check_node(root, NULL, next);
2478                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2479                         free_extent_buffer(next);
2480                         err = -EIO;
2481                         goto out;
2482                 }
2483
2484                 *level = *level - 1;
2485                 free_extent_buffer(path->nodes[*level]);
2486                 path->nodes[*level] = next;
2487                 path->slots[*level] = 0;
2488         }
2489 out:
2490         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2491         return err;
2492 }
2493
2494 static int fs_root_objectid(u64 objectid);
2495
2496 /*
2497  * Update global fs information.
2498  */
2499 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2500                          int level)
2501 {
2502         u32 free_nrs;
2503         struct extent_buffer *eb = path->nodes[level];
2504
2505         total_btree_bytes += eb->len;
2506         if (fs_root_objectid(root->objectid))
2507                 total_fs_tree_bytes += eb->len;
2508         if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2509                 total_extent_tree_bytes += eb->len;
2510
2511         if (level == 0) {
2512                 btree_space_waste += btrfs_leaf_free_space(root, eb);
2513         } else {
2514                 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root) -
2515                             btrfs_header_nritems(eb));
2516                 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2517         }
2518 }
2519
2520 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2521                             unsigned int ext_ref);
2522 static int check_tree_block_ref(struct btrfs_root *root,
2523                                 struct extent_buffer *eb, u64 bytenr,
2524                                 int level, u64 owner, struct node_refs *nrefs);
2525 static int check_leaf_items(struct btrfs_trans_handle *trans,
2526                             struct btrfs_root *root, struct btrfs_path *path,
2527                             struct node_refs *nrefs, int account_bytes);
2528
2529 /*
2530  * @trans      just for lowmem repair mode
2531  * @check all  if not 0 then check all tree block backrefs and items
2532  *             0 then just check relationship of items in fs tree(s)
2533  *
2534  * Returns >0  Found error, should continue
2535  * Returns <0  Fatal error, must exit the whole check
2536  * Returns 0   No errors found
2537  */
2538 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2539                              struct btrfs_root *root, struct btrfs_path *path,
2540                              int *level, struct node_refs *nrefs, int ext_ref,
2541                              int check_all)
2542
2543 {
2544         enum btrfs_tree_block_status status;
2545         u64 bytenr;
2546         u64 ptr_gen;
2547         struct btrfs_fs_info *fs_info = root->fs_info;
2548         struct extent_buffer *next;
2549         struct extent_buffer *cur;
2550         int ret;
2551         int err = 0;
2552         int check;
2553         int account_file_data = 0;
2554
2555         WARN_ON(*level < 0);
2556         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2557
2558         ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2559                                 path->nodes[*level], nrefs, *level, check_all);
2560         if (ret < 0)
2561                 return ret;
2562
2563         while (*level >= 0) {
2564                 WARN_ON(*level < 0);
2565                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2566                 cur = path->nodes[*level];
2567                 bytenr = btrfs_header_bytenr(cur);
2568                 check = nrefs->need_check[*level];
2569
2570                 if (btrfs_header_level(cur) != *level)
2571                         WARN_ON(1);
2572                /*
2573                 * Update bytes accounting and check tree block ref
2574                 * NOTE: Doing accounting and check before checking nritems
2575                 * is necessary because of empty node/leaf.
2576                 */
2577                 if ((check_all && !nrefs->checked[*level]) ||
2578                     (!check_all && nrefs->need_check[*level])) {
2579                         ret = check_tree_block_ref(root, cur,
2580                            btrfs_header_bytenr(cur), btrfs_header_level(cur),
2581                            btrfs_header_owner(cur), nrefs);
2582                         err |= ret;
2583
2584                         if (check_all && nrefs->need_check[*level] &&
2585                                 nrefs->refs[*level]) {
2586                                 account_bytes(root, path, *level);
2587                                 account_file_data = 1;
2588                         }
2589                         nrefs->checked[*level] = 1;
2590                 }
2591
2592                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2593                         break;
2594
2595                 /* Don't forgot to check leaf/node validation */
2596                 if (*level == 0) {
2597                         /* skip duplicate check */
2598                         if (check || !check_all) {
2599                                 ret = btrfs_check_leaf(root, NULL, cur);
2600                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2601                                         err |= -EIO;
2602                                         break;
2603                                 }
2604                         }
2605
2606                         ret = 0;
2607                         if (!check_all)
2608                                 ret = process_one_leaf_v2(root, path, nrefs,
2609                                                           level, ext_ref);
2610                         else
2611                                 ret = check_leaf_items(trans, root, path,
2612                                                nrefs, account_file_data);
2613                         err |= ret;
2614                         break;
2615                 } else {
2616                         if (check || !check_all) {
2617                                 ret = btrfs_check_node(root, NULL, cur);
2618                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2619                                         err |= -EIO;
2620                                         break;
2621                                 }
2622                         }
2623                 }
2624
2625                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2626                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2627
2628                 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2629                                         check_all);
2630                 if (ret < 0)
2631                         break;
2632                 /*
2633                  * check all trees in check_chunks_and_extent_v2
2634                  * check shared node once in check_fs_roots
2635                  */
2636                 if (!check_all && !nrefs->need_check[*level - 1]) {
2637                         path->slots[*level]++;
2638                         continue;
2639                 }
2640
2641                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2642                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2643                         free_extent_buffer(next);
2644                         reada_walk_down(root, cur, path->slots[*level]);
2645                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2646                         if (!extent_buffer_uptodate(next)) {
2647                                 struct btrfs_key node_key;
2648
2649                                 btrfs_node_key_to_cpu(path->nodes[*level],
2650                                                       &node_key,
2651                                                       path->slots[*level]);
2652                                 btrfs_add_corrupt_extent_record(fs_info,
2653                                         &node_key, path->nodes[*level]->start,
2654                                         fs_info->nodesize, *level);
2655                                 err |= -EIO;
2656                                 break;
2657                         }
2658                 }
2659
2660                 ret = check_child_node(cur, path->slots[*level], next);
2661                 err |= ret;
2662                 if (ret < 0) 
2663                         break;
2664
2665                 if (btrfs_is_leaf(next))
2666                         status = btrfs_check_leaf(root, NULL, next);
2667                 else
2668                         status = btrfs_check_node(root, NULL, next);
2669                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2670                         free_extent_buffer(next);
2671                         err |= -EIO;
2672                         break;
2673                 }
2674
2675                 *level = *level - 1;
2676                 free_extent_buffer(path->nodes[*level]);
2677                 path->nodes[*level] = next;
2678                 path->slots[*level] = 0;
2679                 account_file_data = 0;
2680
2681                 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2682         }
2683         return err;
2684 }
2685
2686 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2687                         struct walk_control *wc, int *level)
2688 {
2689         int i;
2690         struct extent_buffer *leaf;
2691
2692         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2693                 leaf = path->nodes[i];
2694                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2695                         path->slots[i]++;
2696                         *level = i;
2697                         return 0;
2698                 } else {
2699                         free_extent_buffer(path->nodes[*level]);
2700                         path->nodes[*level] = NULL;
2701                         BUG_ON(*level > wc->active_node);
2702                         if (*level == wc->active_node)
2703                                 leave_shared_node(root, wc, *level);
2704                         *level = i + 1;
2705                 }
2706         }
2707         return 1;
2708 }
2709
2710 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2711                            int *level)
2712 {
2713         int i;
2714         struct extent_buffer *leaf;
2715
2716         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2717                 leaf = path->nodes[i];
2718                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2719                         path->slots[i]++;
2720                         *level = i;
2721                         return 0;
2722                 } else {
2723                         free_extent_buffer(path->nodes[*level]);
2724                         path->nodes[*level] = NULL;
2725                         *level = i + 1;
2726                 }
2727         }
2728         return 1;
2729 }
2730
2731 static int check_root_dir(struct inode_record *rec)
2732 {
2733         struct inode_backref *backref;
2734         int ret = -1;
2735
2736         if (!rec->found_inode_item || rec->errors)
2737                 goto out;
2738         if (rec->nlink != 1 || rec->found_link != 0)
2739                 goto out;
2740         if (list_empty(&rec->backrefs))
2741                 goto out;
2742         backref = to_inode_backref(rec->backrefs.next);
2743         if (!backref->found_inode_ref)
2744                 goto out;
2745         if (backref->index != 0 || backref->namelen != 2 ||
2746             memcmp(backref->name, "..", 2))
2747                 goto out;
2748         if (backref->found_dir_index || backref->found_dir_item)
2749                 goto out;
2750         ret = 0;
2751 out:
2752         return ret;
2753 }
2754
2755 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2756                               struct btrfs_root *root, struct btrfs_path *path,
2757                               struct inode_record *rec)
2758 {
2759         struct btrfs_inode_item *ei;
2760         struct btrfs_key key;
2761         int ret;
2762
2763         key.objectid = rec->ino;
2764         key.type = BTRFS_INODE_ITEM_KEY;
2765         key.offset = (u64)-1;
2766
2767         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2768         if (ret < 0)
2769                 goto out;
2770         if (ret) {
2771                 if (!path->slots[0]) {
2772                         ret = -ENOENT;
2773                         goto out;
2774                 }
2775                 path->slots[0]--;
2776                 ret = 0;
2777         }
2778         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2779         if (key.objectid != rec->ino) {
2780                 ret = -ENOENT;
2781                 goto out;
2782         }
2783
2784         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2785                             struct btrfs_inode_item);
2786         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2787         btrfs_mark_buffer_dirty(path->nodes[0]);
2788         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2789         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2790                root->root_key.objectid);
2791 out:
2792         btrfs_release_path(path);
2793         return ret;
2794 }
2795
2796 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2797                                     struct btrfs_root *root,
2798                                     struct btrfs_path *path,
2799                                     struct inode_record *rec)
2800 {
2801         int ret;
2802
2803         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2804         btrfs_release_path(path);
2805         if (!ret)
2806                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2807         return ret;
2808 }
2809
2810 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2811                                struct btrfs_root *root,
2812                                struct btrfs_path *path,
2813                                struct inode_record *rec)
2814 {
2815         struct btrfs_inode_item *ei;
2816         struct btrfs_key key;
2817         int ret = 0;
2818
2819         key.objectid = rec->ino;
2820         key.type = BTRFS_INODE_ITEM_KEY;
2821         key.offset = 0;
2822
2823         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2824         if (ret) {
2825                 if (ret > 0)
2826                         ret = -ENOENT;
2827                 goto out;
2828         }
2829
2830         /* Since ret == 0, no need to check anything */
2831         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2832                             struct btrfs_inode_item);
2833         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2834         btrfs_mark_buffer_dirty(path->nodes[0]);
2835         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2836         printf("reset nbytes for ino %llu root %llu\n",
2837                rec->ino, root->root_key.objectid);
2838 out:
2839         btrfs_release_path(path);
2840         return ret;
2841 }
2842
2843 static int add_missing_dir_index(struct btrfs_root *root,
2844                                  struct cache_tree *inode_cache,
2845                                  struct inode_record *rec,
2846                                  struct inode_backref *backref)
2847 {
2848         struct btrfs_path path;
2849         struct btrfs_trans_handle *trans;
2850         struct btrfs_dir_item *dir_item;
2851         struct extent_buffer *leaf;
2852         struct btrfs_key key;
2853         struct btrfs_disk_key disk_key;
2854         struct inode_record *dir_rec;
2855         unsigned long name_ptr;
2856         u32 data_size = sizeof(*dir_item) + backref->namelen;
2857         int ret;
2858
2859         trans = btrfs_start_transaction(root, 1);
2860         if (IS_ERR(trans))
2861                 return PTR_ERR(trans);
2862
2863         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2864                 (unsigned long long)rec->ino);
2865
2866         btrfs_init_path(&path);
2867         key.objectid = backref->dir;
2868         key.type = BTRFS_DIR_INDEX_KEY;
2869         key.offset = backref->index;
2870         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2871         BUG_ON(ret);
2872
2873         leaf = path.nodes[0];
2874         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2875
2876         disk_key.objectid = cpu_to_le64(rec->ino);
2877         disk_key.type = BTRFS_INODE_ITEM_KEY;
2878         disk_key.offset = 0;
2879
2880         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2881         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2882         btrfs_set_dir_data_len(leaf, dir_item, 0);
2883         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2884         name_ptr = (unsigned long)(dir_item + 1);
2885         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2886         btrfs_mark_buffer_dirty(leaf);
2887         btrfs_release_path(&path);
2888         btrfs_commit_transaction(trans, root);
2889
2890         backref->found_dir_index = 1;
2891         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2892         BUG_ON(IS_ERR(dir_rec));
2893         if (!dir_rec)
2894                 return 0;
2895         dir_rec->found_size += backref->namelen;
2896         if (dir_rec->found_size == dir_rec->isize &&
2897             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2898                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2899         if (dir_rec->found_size != dir_rec->isize)
2900                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2901
2902         return 0;
2903 }
2904
2905 static int delete_dir_index(struct btrfs_root *root,
2906                             struct inode_backref *backref)
2907 {
2908         struct btrfs_trans_handle *trans;
2909         struct btrfs_dir_item *di;
2910         struct btrfs_path path;
2911         int ret = 0;
2912
2913         trans = btrfs_start_transaction(root, 1);
2914         if (IS_ERR(trans))
2915                 return PTR_ERR(trans);
2916
2917         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2918                 (unsigned long long)backref->dir,
2919                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2920                 (unsigned long long)root->objectid);
2921
2922         btrfs_init_path(&path);
2923         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2924                                     backref->name, backref->namelen,
2925                                     backref->index, -1);
2926         if (IS_ERR(di)) {
2927                 ret = PTR_ERR(di);
2928                 btrfs_release_path(&path);
2929                 btrfs_commit_transaction(trans, root);
2930                 if (ret == -ENOENT)
2931                         return 0;
2932                 return ret;
2933         }
2934
2935         if (!di)
2936                 ret = btrfs_del_item(trans, root, &path);
2937         else
2938                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2939         BUG_ON(ret);
2940         btrfs_release_path(&path);
2941         btrfs_commit_transaction(trans, root);
2942         return ret;
2943 }
2944
2945 static int __create_inode_item(struct btrfs_trans_handle *trans,
2946                                struct btrfs_root *root, u64 ino, u64 size,
2947                                u64 nbytes, u64 nlink, u32 mode)
2948 {
2949         struct btrfs_inode_item ii;
2950         time_t now = time(NULL);
2951         int ret;
2952
2953         btrfs_set_stack_inode_size(&ii, size);
2954         btrfs_set_stack_inode_nbytes(&ii, nbytes);
2955         btrfs_set_stack_inode_nlink(&ii, nlink);
2956         btrfs_set_stack_inode_mode(&ii, mode);
2957         btrfs_set_stack_inode_generation(&ii, trans->transid);
2958         btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2959         btrfs_set_stack_timespec_sec(&ii.ctime, now);
2960         btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2961         btrfs_set_stack_timespec_sec(&ii.mtime, now);
2962         btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2963         btrfs_set_stack_timespec_sec(&ii.otime, 0);
2964         btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2965
2966         ret = btrfs_insert_inode(trans, root, ino, &ii);
2967         ASSERT(!ret);
2968
2969         warning("root %llu inode %llu recreating inode item, this may "
2970                 "be incomplete, please check permissions and content after "
2971                 "the fsck completes.\n", (unsigned long long)root->objectid,
2972                 (unsigned long long)ino);
2973
2974         return 0;
2975 }
2976
2977 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2978                                     struct btrfs_root *root, u64 ino,
2979                                     u8 filetype)
2980 {
2981         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2982
2983         return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2984 }
2985
2986 static int create_inode_item(struct btrfs_root *root,
2987                              struct inode_record *rec, int root_dir)
2988 {
2989         struct btrfs_trans_handle *trans;
2990         u64 nlink = 0;
2991         u32 mode = 0;
2992         u64 size = 0;
2993         int ret;
2994
2995         trans = btrfs_start_transaction(root, 1);
2996         if (IS_ERR(trans)) {
2997                 ret = PTR_ERR(trans);
2998                 return ret;
2999         }
3000
3001         nlink = root_dir ? 1 : rec->found_link;
3002         if (rec->found_dir_item) {
3003                 if (rec->found_file_extent)
3004                         fprintf(stderr, "root %llu inode %llu has both a dir "
3005                                 "item and extents, unsure if it is a dir or a "
3006                                 "regular file so setting it as a directory\n",
3007                                 (unsigned long long)root->objectid,
3008                                 (unsigned long long)rec->ino);
3009                 mode = S_IFDIR | 0755;
3010                 size = rec->found_size;
3011         } else if (!rec->found_dir_item) {
3012                 size = rec->extent_end;
3013                 mode =  S_IFREG | 0755;
3014         }
3015
3016         ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
3017                                   nlink, mode);
3018         btrfs_commit_transaction(trans, root);
3019         return 0;
3020 }
3021
3022 static int repair_inode_backrefs(struct btrfs_root *root,
3023                                  struct inode_record *rec,
3024                                  struct cache_tree *inode_cache,
3025                                  int delete)
3026 {
3027         struct inode_backref *tmp, *backref;
3028         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3029         int ret = 0;
3030         int repaired = 0;
3031
3032         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3033                 if (!delete && rec->ino == root_dirid) {
3034                         if (!rec->found_inode_item) {
3035                                 ret = create_inode_item(root, rec, 1);
3036                                 if (ret)
3037                                         break;
3038                                 repaired++;
3039                         }
3040                 }
3041
3042                 /* Index 0 for root dir's are special, don't mess with it */
3043                 if (rec->ino == root_dirid && backref->index == 0)
3044                         continue;
3045
3046                 if (delete &&
3047                     ((backref->found_dir_index && !backref->found_inode_ref) ||
3048                      (backref->found_dir_index && backref->found_inode_ref &&
3049                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
3050                         ret = delete_dir_index(root, backref);
3051                         if (ret)
3052                                 break;
3053                         repaired++;
3054                         list_del(&backref->list);
3055                         free(backref);
3056                         continue;
3057                 }
3058
3059                 if (!delete && !backref->found_dir_index &&
3060                     backref->found_dir_item && backref->found_inode_ref) {
3061                         ret = add_missing_dir_index(root, inode_cache, rec,
3062                                                     backref);
3063                         if (ret)
3064                                 break;
3065                         repaired++;
3066                         if (backref->found_dir_item &&
3067                             backref->found_dir_index) {
3068                                 if (!backref->errors &&
3069                                     backref->found_inode_ref) {
3070                                         list_del(&backref->list);
3071                                         free(backref);
3072                                         continue;
3073                                 }
3074                         }
3075                 }
3076
3077                 if (!delete && (!backref->found_dir_index &&
3078                                 !backref->found_dir_item &&
3079                                 backref->found_inode_ref)) {
3080                         struct btrfs_trans_handle *trans;
3081                         struct btrfs_key location;
3082
3083                         ret = check_dir_conflict(root, backref->name,
3084                                                  backref->namelen,
3085                                                  backref->dir,
3086                                                  backref->index);
3087                         if (ret) {
3088                                 /*
3089                                  * let nlink fixing routine to handle it,
3090                                  * which can do it better.
3091                                  */
3092                                 ret = 0;
3093                                 break;
3094                         }
3095                         location.objectid = rec->ino;
3096                         location.type = BTRFS_INODE_ITEM_KEY;
3097                         location.offset = 0;
3098
3099                         trans = btrfs_start_transaction(root, 1);
3100                         if (IS_ERR(trans)) {
3101                                 ret = PTR_ERR(trans);
3102                                 break;
3103                         }
3104                         fprintf(stderr, "adding missing dir index/item pair "
3105                                 "for inode %llu\n",
3106                                 (unsigned long long)rec->ino);
3107                         ret = btrfs_insert_dir_item(trans, root, backref->name,
3108                                                     backref->namelen,
3109                                                     backref->dir, &location,
3110                                                     imode_to_type(rec->imode),
3111                                                     backref->index);
3112                         BUG_ON(ret);
3113                         btrfs_commit_transaction(trans, root);
3114                         repaired++;
3115                 }
3116
3117                 if (!delete && (backref->found_inode_ref &&
3118                                 backref->found_dir_index &&
3119                                 backref->found_dir_item &&
3120                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
3121                                 !rec->found_inode_item)) {
3122                         ret = create_inode_item(root, rec, 0);
3123                         if (ret)
3124                                 break;
3125                         repaired++;
3126                 }
3127
3128         }
3129         return ret ? ret : repaired;
3130 }
3131
3132 /*
3133  * To determine the file type for nlink/inode_item repair
3134  *
3135  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
3136  * Return -ENOENT if file type is not found.
3137  */
3138 static int find_file_type(struct inode_record *rec, u8 *type)
3139 {
3140         struct inode_backref *backref;
3141
3142         /* For inode item recovered case */
3143         if (rec->found_inode_item) {
3144                 *type = imode_to_type(rec->imode);
3145                 return 0;
3146         }
3147
3148         list_for_each_entry(backref, &rec->backrefs, list) {
3149                 if (backref->found_dir_index || backref->found_dir_item) {
3150                         *type = backref->filetype;
3151                         return 0;
3152                 }
3153         }
3154         return -ENOENT;
3155 }
3156
3157 /*
3158  * To determine the file name for nlink repair
3159  *
3160  * Return 0 if file name is found, set name and namelen.
3161  * Return -ENOENT if file name is not found.
3162  */
3163 static int find_file_name(struct inode_record *rec,
3164                           char *name, int *namelen)
3165 {
3166         struct inode_backref *backref;
3167
3168         list_for_each_entry(backref, &rec->backrefs, list) {
3169                 if (backref->found_dir_index || backref->found_dir_item ||
3170                     backref->found_inode_ref) {
3171                         memcpy(name, backref->name, backref->namelen);
3172                         *namelen = backref->namelen;
3173                         return 0;
3174                 }
3175         }
3176         return -ENOENT;
3177 }
3178
3179 /* Reset the nlink of the inode to the correct one */
3180 static int reset_nlink(struct btrfs_trans_handle *trans,
3181                        struct btrfs_root *root,
3182                        struct btrfs_path *path,
3183                        struct inode_record *rec)
3184 {
3185         struct inode_backref *backref;
3186         struct inode_backref *tmp;
3187         struct btrfs_key key;
3188         struct btrfs_inode_item *inode_item;
3189         int ret = 0;
3190
3191         /* We don't believe this either, reset it and iterate backref */
3192         rec->found_link = 0;
3193
3194         /* Remove all backref including the valid ones */
3195         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3196                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3197                                    backref->index, backref->name,
3198                                    backref->namelen, 0);
3199                 if (ret < 0)
3200                         goto out;
3201
3202                 /* remove invalid backref, so it won't be added back */
3203                 if (!(backref->found_dir_index &&
3204                       backref->found_dir_item &&
3205                       backref->found_inode_ref)) {
3206                         list_del(&backref->list);
3207                         free(backref);
3208                 } else {
3209                         rec->found_link++;
3210                 }
3211         }
3212
3213         /* Set nlink to 0 */
3214         key.objectid = rec->ino;
3215         key.type = BTRFS_INODE_ITEM_KEY;
3216         key.offset = 0;
3217         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3218         if (ret < 0)
3219                 goto out;
3220         if (ret > 0) {
3221                 ret = -ENOENT;
3222                 goto out;
3223         }
3224         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3225                                     struct btrfs_inode_item);
3226         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3227         btrfs_mark_buffer_dirty(path->nodes[0]);
3228         btrfs_release_path(path);
3229
3230         /*
3231          * Add back valid inode_ref/dir_item/dir_index,
3232          * add_link() will handle the nlink inc, so new nlink must be correct
3233          */
3234         list_for_each_entry(backref, &rec->backrefs, list) {
3235                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3236                                      backref->name, backref->namelen,
3237                                      backref->filetype, &backref->index, 1, 0);
3238                 if (ret < 0)
3239                         goto out;
3240         }
3241 out:
3242         btrfs_release_path(path);
3243         return ret;
3244 }
3245
3246 static int get_highest_inode(struct btrfs_trans_handle *trans,
3247                                 struct btrfs_root *root,
3248                                 struct btrfs_path *path,
3249                                 u64 *highest_ino)
3250 {
3251         struct btrfs_key key, found_key;
3252         int ret;
3253
3254         btrfs_init_path(path);
3255         key.objectid = BTRFS_LAST_FREE_OBJECTID;
3256         key.offset = -1;
3257         key.type = BTRFS_INODE_ITEM_KEY;
3258         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3259         if (ret == 1) {
3260                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3261                                 path->slots[0] - 1);
3262                 *highest_ino = found_key.objectid;
3263                 ret = 0;
3264         }
3265         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3266                 ret = -EOVERFLOW;
3267         btrfs_release_path(path);
3268         return ret;
3269 }
3270
3271 /*
3272  * Link inode to dir 'lost+found'. Increase @ref_count.
3273  *
3274  * Returns 0 means success.
3275  * Returns <0 means failure.
3276  */
3277 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3278                                    struct btrfs_root *root,
3279                                    struct btrfs_path *path,
3280                                    u64 ino, char *namebuf, u32 name_len,
3281                                    u8 filetype, u64 *ref_count)
3282 {
3283         char *dir_name = "lost+found";
3284         u64 lost_found_ino;
3285         int ret;
3286         u32 mode = 0700;
3287
3288         btrfs_release_path(path);
3289         ret = get_highest_inode(trans, root, path, &lost_found_ino);
3290         if (ret < 0)
3291                 goto out;
3292         lost_found_ino++;
3293
3294         ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3295                           BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3296                           mode);
3297         if (ret < 0) {
3298                 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3299                 goto out;
3300         }
3301         ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3302                              namebuf, name_len, filetype, NULL, 1, 0);
3303         /*
3304          * Add ".INO" suffix several times to handle case where
3305          * "FILENAME.INO" is already taken by another file.
3306          */
3307         while (ret == -EEXIST) {
3308                 /*
3309                  * Conflicting file name, add ".INO" as suffix * +1 for '.'
3310                  */
3311                 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3312                         ret = -EFBIG;
3313                         goto out;
3314                 }
3315                 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3316                          ".%llu", ino);
3317                 name_len += count_digits(ino) + 1;
3318                 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3319                                      name_len, filetype, NULL, 1, 0);
3320         }
3321         if (ret < 0) {
3322                 error("failed to link the inode %llu to %s dir: %s",
3323                       ino, dir_name, strerror(-ret));
3324                 goto out;
3325         }
3326
3327         ++*ref_count;
3328         printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3329                name_len, namebuf, dir_name);
3330 out:
3331         btrfs_release_path(path);
3332         if (ret)
3333                 error("failed to move file '%.*s' to '%s' dir", name_len,
3334                                 namebuf, dir_name);
3335         return ret;
3336 }
3337
3338 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3339                                struct btrfs_root *root,
3340                                struct btrfs_path *path,
3341                                struct inode_record *rec)
3342 {
3343         char namebuf[BTRFS_NAME_LEN] = {0};
3344         u8 type = 0;
3345         int namelen = 0;
3346         int name_recovered = 0;
3347         int type_recovered = 0;
3348         int ret = 0;
3349
3350         /*
3351          * Get file name and type first before these invalid inode ref
3352          * are deleted by remove_all_invalid_backref()
3353          */
3354         name_recovered = !find_file_name(rec, namebuf, &namelen);
3355         type_recovered = !find_file_type(rec, &type);
3356
3357         if (!name_recovered) {
3358                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3359                        rec->ino, rec->ino);
3360                 namelen = count_digits(rec->ino);
3361                 sprintf(namebuf, "%llu", rec->ino);
3362                 name_recovered = 1;
3363         }
3364         if (!type_recovered) {
3365                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3366                        rec->ino);
3367                 type = BTRFS_FT_REG_FILE;
3368                 type_recovered = 1;
3369         }
3370
3371         ret = reset_nlink(trans, root, path, rec);
3372         if (ret < 0) {
3373                 fprintf(stderr,
3374                         "Failed to reset nlink for inode %llu: %s\n",
3375                         rec->ino, strerror(-ret));
3376                 goto out;
3377         }
3378
3379         if (rec->found_link == 0) {
3380                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3381                                               namebuf, namelen, type,
3382                                               (u64 *)&rec->found_link);
3383                 if (ret)
3384                         goto out;
3385         }
3386         printf("Fixed the nlink of inode %llu\n", rec->ino);
3387 out:
3388         /*
3389          * Clear the flag anyway, or we will loop forever for the same inode
3390          * as it will not be removed from the bad inode list and the dead loop
3391          * happens.
3392          */
3393         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3394         btrfs_release_path(path);
3395         return ret;
3396 }
3397
3398 /*
3399  * Check if there is any normal(reg or prealloc) file extent for given
3400  * ino.
3401  * This is used to determine the file type when neither its dir_index/item or
3402  * inode_item exists.
3403  *
3404  * This will *NOT* report error, if any error happens, just consider it does
3405  * not have any normal file extent.
3406  */
3407 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3408 {
3409         struct btrfs_path path;
3410         struct btrfs_key key;
3411         struct btrfs_key found_key;
3412         struct btrfs_file_extent_item *fi;
3413         u8 type;
3414         int ret = 0;
3415
3416         btrfs_init_path(&path);
3417         key.objectid = ino;
3418         key.type = BTRFS_EXTENT_DATA_KEY;
3419         key.offset = 0;
3420
3421         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3422         if (ret < 0) {
3423                 ret = 0;
3424                 goto out;
3425         }
3426         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3427                 ret = btrfs_next_leaf(root, &path);
3428                 if (ret) {
3429                         ret = 0;
3430                         goto out;
3431                 }
3432         }
3433         while (1) {
3434                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3435                                       path.slots[0]);
3436                 if (found_key.objectid != ino ||
3437                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3438                         break;
3439                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3440                                     struct btrfs_file_extent_item);
3441                 type = btrfs_file_extent_type(path.nodes[0], fi);
3442                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3443                         ret = 1;
3444                         goto out;
3445                 }
3446         }
3447 out:
3448         btrfs_release_path(&path);
3449         return ret;
3450 }
3451
3452 static u32 btrfs_type_to_imode(u8 type)
3453 {
3454         static u32 imode_by_btrfs_type[] = {
3455                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3456                 [BTRFS_FT_DIR]          = S_IFDIR,
3457                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3458                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3459                 [BTRFS_FT_FIFO]         = S_IFIFO,
3460                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3461                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3462         };
3463
3464         return imode_by_btrfs_type[(type)];
3465 }
3466
3467 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3468                                 struct btrfs_root *root,
3469                                 struct btrfs_path *path,
3470                                 struct inode_record *rec)
3471 {
3472         u8 filetype;
3473         u32 mode = 0700;
3474         int type_recovered = 0;
3475         int ret = 0;
3476
3477         printf("Trying to rebuild inode:%llu\n", rec->ino);
3478
3479         type_recovered = !find_file_type(rec, &filetype);
3480
3481         /*
3482          * Try to determine inode type if type not found.
3483          *
3484          * For found regular file extent, it must be FILE.
3485          * For found dir_item/index, it must be DIR.
3486          *
3487          * For undetermined one, use FILE as fallback.
3488          *
3489          * TODO:
3490          * 1. If found backref(inode_index/item is already handled) to it,
3491          *    it must be DIR.
3492          *    Need new inode-inode ref structure to allow search for that.
3493          */
3494         if (!type_recovered) {
3495                 if (rec->found_file_extent &&
3496                     find_normal_file_extent(root, rec->ino)) {
3497                         type_recovered = 1;
3498                         filetype = BTRFS_FT_REG_FILE;
3499                 } else if (rec->found_dir_item) {
3500                         type_recovered = 1;
3501                         filetype = BTRFS_FT_DIR;
3502                 } else if (!list_empty(&rec->orphan_extents)) {
3503                         type_recovered = 1;
3504                         filetype = BTRFS_FT_REG_FILE;
3505                 } else{
3506                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3507                                rec->ino);
3508                         type_recovered = 1;
3509                         filetype = BTRFS_FT_REG_FILE;
3510                 }
3511         }
3512
3513         ret = btrfs_new_inode(trans, root, rec->ino,
3514                               mode | btrfs_type_to_imode(filetype));
3515         if (ret < 0)
3516                 goto out;
3517
3518         /*
3519          * Here inode rebuild is done, we only rebuild the inode item,
3520          * don't repair the nlink(like move to lost+found).
3521          * That is the job of nlink repair.
3522          *
3523          * We just fill the record and return
3524          */
3525         rec->found_dir_item = 1;
3526         rec->imode = mode | btrfs_type_to_imode(filetype);
3527         rec->nlink = 0;
3528         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3529         /* Ensure the inode_nlinks repair function will be called */
3530         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3531 out:
3532         return ret;
3533 }
3534
3535 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3536                                       struct btrfs_root *root,
3537                                       struct btrfs_path *path,
3538                                       struct inode_record *rec)
3539 {
3540         struct orphan_data_extent *orphan;
3541         struct orphan_data_extent *tmp;
3542         int ret = 0;
3543
3544         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3545                 /*
3546                  * Check for conflicting file extents
3547                  *
3548                  * Here we don't know whether the extents is compressed or not,
3549                  * so we can only assume it not compressed nor data offset,
3550                  * and use its disk_len as extent length.
3551                  */
3552                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3553                                        orphan->offset, orphan->disk_len, 0);
3554                 btrfs_release_path(path);
3555                 if (ret < 0)
3556                         goto out;
3557                 if (!ret) {
3558                         fprintf(stderr,
3559                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3560                                 orphan->disk_bytenr, orphan->disk_len);
3561                         ret = btrfs_free_extent(trans,
3562                                         root->fs_info->extent_root,
3563                                         orphan->disk_bytenr, orphan->disk_len,
3564                                         0, root->objectid, orphan->objectid,
3565                                         orphan->offset);
3566                         if (ret < 0)
3567                                 goto out;
3568                 }
3569                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3570                                 orphan->offset, orphan->disk_bytenr,
3571                                 orphan->disk_len, orphan->disk_len);
3572                 if (ret < 0)
3573                         goto out;
3574
3575                 /* Update file size info */
3576                 rec->found_size += orphan->disk_len;
3577                 if (rec->found_size == rec->nbytes)
3578                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3579
3580                 /* Update the file extent hole info too */
3581                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3582                                            orphan->disk_len);
3583                 if (ret < 0)
3584                         goto out;
3585                 if (RB_EMPTY_ROOT(&rec->holes))
3586                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3587
3588                 list_del(&orphan->list);
3589                 free(orphan);
3590         }
3591         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3592 out:
3593         return ret;
3594 }
3595
3596 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3597                                         struct btrfs_root *root,
3598                                         struct btrfs_path *path,
3599                                         struct inode_record *rec)
3600 {
3601         struct rb_node *node;
3602         struct file_extent_hole *hole;
3603         int found = 0;
3604         int ret = 0;
3605
3606         node = rb_first(&rec->holes);
3607
3608         while (node) {
3609                 found = 1;
3610                 hole = rb_entry(node, struct file_extent_hole, node);
3611                 ret = btrfs_punch_hole(trans, root, rec->ino,
3612                                        hole->start, hole->len);
3613                 if (ret < 0)
3614                         goto out;
3615                 ret = del_file_extent_hole(&rec->holes, hole->start,
3616                                            hole->len);
3617                 if (ret < 0)
3618                         goto out;
3619                 if (RB_EMPTY_ROOT(&rec->holes))
3620                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3621                 node = rb_first(&rec->holes);
3622         }
3623         /* special case for a file losing all its file extent */
3624         if (!found) {
3625                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3626                                        round_up(rec->isize,
3627                                                 root->fs_info->sectorsize));
3628                 if (ret < 0)
3629                         goto out;
3630         }
3631         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3632                rec->ino, root->objectid);
3633 out:
3634         return ret;
3635 }
3636
3637 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3638 {
3639         struct btrfs_trans_handle *trans;
3640         struct btrfs_path path;
3641         int ret = 0;
3642
3643         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3644                              I_ERR_NO_ORPHAN_ITEM |
3645                              I_ERR_LINK_COUNT_WRONG |
3646                              I_ERR_NO_INODE_ITEM |
3647                              I_ERR_FILE_EXTENT_ORPHAN |
3648                              I_ERR_FILE_EXTENT_DISCOUNT|
3649                              I_ERR_FILE_NBYTES_WRONG)))
3650                 return rec->errors;
3651
3652         /*
3653          * For nlink repair, it may create a dir and add link, so
3654          * 2 for parent(256)'s dir_index and dir_item
3655          * 2 for lost+found dir's inode_item and inode_ref
3656          * 1 for the new inode_ref of the file
3657          * 2 for lost+found dir's dir_index and dir_item for the file
3658          */
3659         trans = btrfs_start_transaction(root, 7);
3660         if (IS_ERR(trans))
3661                 return PTR_ERR(trans);
3662
3663         btrfs_init_path(&path);
3664         if (rec->errors & I_ERR_NO_INODE_ITEM)
3665                 ret = repair_inode_no_item(trans, root, &path, rec);
3666         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3667                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3668         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3669                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3670         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3671                 ret = repair_inode_isize(trans, root, &path, rec);
3672         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3673                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3674         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3675                 ret = repair_inode_nlinks(trans, root, &path, rec);
3676         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3677                 ret = repair_inode_nbytes(trans, root, &path, rec);
3678         btrfs_commit_transaction(trans, root);
3679         btrfs_release_path(&path);
3680         return ret;
3681 }
3682
3683 static int check_inode_recs(struct btrfs_root *root,
3684                             struct cache_tree *inode_cache)
3685 {
3686         struct cache_extent *cache;
3687         struct ptr_node *node;
3688         struct inode_record *rec;
3689         struct inode_backref *backref;
3690         int stage = 0;
3691         int ret = 0;
3692         int err = 0;
3693         u64 error = 0;
3694         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3695
3696         if (btrfs_root_refs(&root->root_item) == 0) {
3697                 if (!cache_tree_empty(inode_cache))
3698                         fprintf(stderr, "warning line %d\n", __LINE__);
3699                 return 0;
3700         }
3701
3702         /*
3703          * We need to repair backrefs first because we could change some of the
3704          * errors in the inode recs.
3705          *
3706          * We also need to go through and delete invalid backrefs first and then
3707          * add the correct ones second.  We do this because we may get EEXIST
3708          * when adding back the correct index because we hadn't yet deleted the
3709          * invalid index.
3710          *
3711          * For example, if we were missing a dir index then the directories
3712          * isize would be wrong, so if we fixed the isize to what we thought it
3713          * would be and then fixed the backref we'd still have a invalid fs, so
3714          * we need to add back the dir index and then check to see if the isize
3715          * is still wrong.
3716          */
3717         while (stage < 3) {
3718                 stage++;
3719                 if (stage == 3 && !err)
3720                         break;
3721
3722                 cache = search_cache_extent(inode_cache, 0);
3723                 while (repair && cache) {
3724                         node = container_of(cache, struct ptr_node, cache);
3725                         rec = node->data;
3726                         cache = next_cache_extent(cache);
3727
3728                         /* Need to free everything up and rescan */
3729                         if (stage == 3) {
3730                                 remove_cache_extent(inode_cache, &node->cache);
3731                                 free(node);
3732                                 free_inode_rec(rec);
3733                                 continue;
3734                         }
3735
3736                         if (list_empty(&rec->backrefs))
3737                                 continue;
3738
3739                         ret = repair_inode_backrefs(root, rec, inode_cache,
3740                                                     stage == 1);
3741                         if (ret < 0) {
3742                                 err = ret;
3743                                 stage = 2;
3744                                 break;
3745                         } if (ret > 0) {
3746                                 err = -EAGAIN;
3747                         }
3748                 }
3749         }
3750         if (err)
3751                 return err;
3752
3753         rec = get_inode_rec(inode_cache, root_dirid, 0);
3754         BUG_ON(IS_ERR(rec));
3755         if (rec) {
3756                 ret = check_root_dir(rec);
3757                 if (ret) {
3758                         fprintf(stderr, "root %llu root dir %llu error\n",
3759                                 (unsigned long long)root->root_key.objectid,
3760                                 (unsigned long long)root_dirid);
3761                         print_inode_error(root, rec);
3762                         error++;
3763                 }
3764         } else {
3765                 if (repair) {
3766                         struct btrfs_trans_handle *trans;
3767
3768                         trans = btrfs_start_transaction(root, 1);
3769                         if (IS_ERR(trans)) {
3770                                 err = PTR_ERR(trans);
3771                                 return err;
3772                         }
3773
3774                         fprintf(stderr,
3775                                 "root %llu missing its root dir, recreating\n",
3776                                 (unsigned long long)root->objectid);
3777
3778                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3779                         BUG_ON(ret);
3780
3781                         btrfs_commit_transaction(trans, root);
3782                         return -EAGAIN;
3783                 }
3784
3785                 fprintf(stderr, "root %llu root dir %llu not found\n",
3786                         (unsigned long long)root->root_key.objectid,
3787                         (unsigned long long)root_dirid);
3788         }
3789
3790         while (1) {
3791                 cache = search_cache_extent(inode_cache, 0);
3792                 if (!cache)
3793                         break;
3794                 node = container_of(cache, struct ptr_node, cache);
3795                 rec = node->data;
3796                 remove_cache_extent(inode_cache, &node->cache);
3797                 free(node);
3798                 if (rec->ino == root_dirid ||
3799                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3800                         free_inode_rec(rec);
3801                         continue;
3802                 }
3803
3804                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3805                         ret = check_orphan_item(root, rec->ino);
3806                         if (ret == 0)
3807                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3808                         if (can_free_inode_rec(rec)) {
3809                                 free_inode_rec(rec);
3810                                 continue;
3811                         }
3812                 }
3813
3814                 if (!rec->found_inode_item)
3815                         rec->errors |= I_ERR_NO_INODE_ITEM;
3816                 if (rec->found_link != rec->nlink)
3817                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3818                 if (repair) {
3819                         ret = try_repair_inode(root, rec);
3820                         if (ret == 0 && can_free_inode_rec(rec)) {
3821                                 free_inode_rec(rec);
3822                                 continue;
3823                         }
3824                         ret = 0;
3825                 }
3826
3827                 if (!(repair && ret == 0))
3828                         error++;
3829                 print_inode_error(root, rec);
3830                 list_for_each_entry(backref, &rec->backrefs, list) {
3831                         if (!backref->found_dir_item)
3832                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3833                         if (!backref->found_dir_index)
3834                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3835                         if (!backref->found_inode_ref)
3836                                 backref->errors |= REF_ERR_NO_INODE_REF;
3837                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3838                                 " namelen %u name %s filetype %d errors %x",
3839                                 (unsigned long long)backref->dir,
3840                                 (unsigned long long)backref->index,
3841                                 backref->namelen, backref->name,
3842                                 backref->filetype, backref->errors);
3843                         print_ref_error(backref->errors);
3844                 }
3845                 free_inode_rec(rec);
3846         }
3847         return (error > 0) ? -1 : 0;
3848 }
3849
3850 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3851                                         u64 objectid)
3852 {
3853         struct cache_extent *cache;
3854         struct root_record *rec = NULL;
3855         int ret;
3856
3857         cache = lookup_cache_extent(root_cache, objectid, 1);
3858         if (cache) {
3859                 rec = container_of(cache, struct root_record, cache);
3860         } else {
3861                 rec = calloc(1, sizeof(*rec));
3862                 if (!rec)
3863                         return ERR_PTR(-ENOMEM);
3864                 rec->objectid = objectid;
3865                 INIT_LIST_HEAD(&rec->backrefs);
3866                 rec->cache.start = objectid;
3867                 rec->cache.size = 1;
3868
3869                 ret = insert_cache_extent(root_cache, &rec->cache);
3870                 if (ret)
3871                         return ERR_PTR(-EEXIST);
3872         }
3873         return rec;
3874 }
3875
3876 static struct root_backref *get_root_backref(struct root_record *rec,
3877                                              u64 ref_root, u64 dir, u64 index,
3878                                              const char *name, int namelen)
3879 {
3880         struct root_backref *backref;
3881
3882         list_for_each_entry(backref, &rec->backrefs, list) {
3883                 if (backref->ref_root != ref_root || backref->dir != dir ||
3884                     backref->namelen != namelen)
3885                         continue;
3886                 if (memcmp(name, backref->name, namelen))
3887                         continue;
3888                 return backref;
3889         }
3890
3891         backref = calloc(1, sizeof(*backref) + namelen + 1);
3892         if (!backref)
3893                 return NULL;
3894         backref->ref_root = ref_root;
3895         backref->dir = dir;
3896         backref->index = index;
3897         backref->namelen = namelen;
3898         memcpy(backref->name, name, namelen);
3899         backref->name[namelen] = '\0';
3900         list_add_tail(&backref->list, &rec->backrefs);
3901         return backref;
3902 }
3903
3904 static void free_root_record(struct cache_extent *cache)
3905 {
3906         struct root_record *rec;
3907         struct root_backref *backref;
3908
3909         rec = container_of(cache, struct root_record, cache);
3910         while (!list_empty(&rec->backrefs)) {
3911                 backref = to_root_backref(rec->backrefs.next);
3912                 list_del(&backref->list);
3913                 free(backref);
3914         }
3915
3916         free(rec);
3917 }
3918
3919 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3920
3921 static int add_root_backref(struct cache_tree *root_cache,
3922                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3923                             const char *name, int namelen,
3924                             int item_type, int errors)
3925 {
3926         struct root_record *rec;
3927         struct root_backref *backref;
3928
3929         rec = get_root_rec(root_cache, root_id);
3930         BUG_ON(IS_ERR(rec));
3931         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3932         BUG_ON(!backref);
3933
3934         backref->errors |= errors;
3935
3936         if (item_type != BTRFS_DIR_ITEM_KEY) {
3937                 if (backref->found_dir_index || backref->found_back_ref ||
3938                     backref->found_forward_ref) {
3939                         if (backref->index != index)
3940                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3941                 } else {
3942                         backref->index = index;
3943                 }
3944         }
3945
3946         if (item_type == BTRFS_DIR_ITEM_KEY) {
3947                 if (backref->found_forward_ref)
3948                         rec->found_ref++;
3949                 backref->found_dir_item = 1;
3950         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3951                 backref->found_dir_index = 1;
3952         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3953                 if (backref->found_forward_ref)
3954                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3955                 else if (backref->found_dir_item)
3956                         rec->found_ref++;
3957                 backref->found_forward_ref = 1;
3958         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3959                 if (backref->found_back_ref)
3960                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3961                 backref->found_back_ref = 1;
3962         } else {
3963                 BUG_ON(1);
3964         }
3965
3966         if (backref->found_forward_ref && backref->found_dir_item)
3967                 backref->reachable = 1;
3968         return 0;
3969 }
3970
3971 static int merge_root_recs(struct btrfs_root *root,
3972                            struct cache_tree *src_cache,
3973                            struct cache_tree *dst_cache)
3974 {
3975         struct cache_extent *cache;
3976         struct ptr_node *node;
3977         struct inode_record *rec;
3978         struct inode_backref *backref;
3979         int ret = 0;
3980
3981         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3982                 free_inode_recs_tree(src_cache);
3983                 return 0;
3984         }
3985
3986         while (1) {
3987                 cache = search_cache_extent(src_cache, 0);
3988                 if (!cache)
3989                         break;
3990                 node = container_of(cache, struct ptr_node, cache);
3991                 rec = node->data;
3992                 remove_cache_extent(src_cache, &node->cache);
3993                 free(node);
3994
3995                 ret = is_child_root(root, root->objectid, rec->ino);
3996                 if (ret < 0)
3997                         break;
3998                 else if (ret == 0)
3999                         goto skip;
4000
4001                 list_for_each_entry(backref, &rec->backrefs, list) {
4002                         BUG_ON(backref->found_inode_ref);
4003                         if (backref->found_dir_item)
4004                                 add_root_backref(dst_cache, rec->ino,
4005                                         root->root_key.objectid, backref->dir,
4006                                         backref->index, backref->name,
4007                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
4008                                         backref->errors);
4009                         if (backref->found_dir_index)
4010                                 add_root_backref(dst_cache, rec->ino,
4011                                         root->root_key.objectid, backref->dir,
4012                                         backref->index, backref->name,
4013                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
4014                                         backref->errors);
4015                 }
4016 skip:
4017                 free_inode_rec(rec);
4018         }
4019         if (ret < 0)
4020                 return ret;
4021         return 0;
4022 }
4023
4024 static int check_root_refs(struct btrfs_root *root,
4025                            struct cache_tree *root_cache)
4026 {
4027         struct root_record *rec;
4028         struct root_record *ref_root;
4029         struct root_backref *backref;
4030         struct cache_extent *cache;
4031         int loop = 1;
4032         int ret;
4033         int error;
4034         int errors = 0;
4035
4036         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
4037         BUG_ON(IS_ERR(rec));
4038         rec->found_ref = 1;
4039
4040         /* fixme: this can not detect circular references */
4041         while (loop) {
4042                 loop = 0;
4043                 cache = search_cache_extent(root_cache, 0);
4044                 while (1) {
4045                         if (!cache)
4046                                 break;
4047                         rec = container_of(cache, struct root_record, cache);
4048                         cache = next_cache_extent(cache);
4049
4050                         if (rec->found_ref == 0)
4051                                 continue;
4052
4053                         list_for_each_entry(backref, &rec->backrefs, list) {
4054                                 if (!backref->reachable)
4055                                         continue;
4056
4057                                 ref_root = get_root_rec(root_cache,
4058                                                         backref->ref_root);
4059                                 BUG_ON(IS_ERR(ref_root));
4060                                 if (ref_root->found_ref > 0)
4061                                         continue;
4062
4063                                 backref->reachable = 0;
4064                                 rec->found_ref--;
4065                                 if (rec->found_ref == 0)
4066                                         loop = 1;
4067                         }
4068                 }
4069         }
4070
4071         cache = search_cache_extent(root_cache, 0);
4072         while (1) {
4073                 if (!cache)
4074                         break;
4075                 rec = container_of(cache, struct root_record, cache);
4076                 cache = next_cache_extent(cache);
4077
4078                 if (rec->found_ref == 0 &&
4079                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
4080                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
4081                         ret = check_orphan_item(root->fs_info->tree_root,
4082                                                 rec->objectid);
4083                         if (ret == 0)
4084                                 continue;
4085
4086                         /*
4087                          * If we don't have a root item then we likely just have
4088                          * a dir item in a snapshot for this root but no actual
4089                          * ref key or anything so it's meaningless.
4090                          */
4091                         if (!rec->found_root_item)
4092                                 continue;
4093                         errors++;
4094                         fprintf(stderr, "fs tree %llu not referenced\n",
4095                                 (unsigned long long)rec->objectid);
4096                 }
4097
4098                 error = 0;
4099                 if (rec->found_ref > 0 && !rec->found_root_item)
4100                         error = 1;
4101                 list_for_each_entry(backref, &rec->backrefs, list) {
4102                         if (!backref->found_dir_item)
4103                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
4104                         if (!backref->found_dir_index)
4105                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
4106                         if (!backref->found_back_ref)
4107                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
4108                         if (!backref->found_forward_ref)
4109                                 backref->errors |= REF_ERR_NO_ROOT_REF;
4110                         if (backref->reachable && backref->errors)
4111                                 error = 1;
4112                 }
4113                 if (!error)
4114                         continue;
4115
4116                 errors++;
4117                 fprintf(stderr, "fs tree %llu refs %u %s\n",
4118                         (unsigned long long)rec->objectid, rec->found_ref,
4119                          rec->found_root_item ? "" : "not found");
4120
4121                 list_for_each_entry(backref, &rec->backrefs, list) {
4122                         if (!backref->reachable)
4123                                 continue;
4124                         if (!backref->errors && rec->found_root_item)
4125                                 continue;
4126                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
4127                                 " index %llu namelen %u name %s errors %x\n",
4128                                 (unsigned long long)backref->ref_root,
4129                                 (unsigned long long)backref->dir,
4130                                 (unsigned long long)backref->index,
4131                                 backref->namelen, backref->name,
4132                                 backref->errors);
4133                         print_ref_error(backref->errors);
4134                 }
4135         }
4136         return errors > 0 ? 1 : 0;
4137 }
4138
4139 static int process_root_ref(struct extent_buffer *eb, int slot,
4140                             struct btrfs_key *key,
4141                             struct cache_tree *root_cache)
4142 {
4143         u64 dirid;
4144         u64 index;
4145         u32 len;
4146         u32 name_len;
4147         struct btrfs_root_ref *ref;
4148         char namebuf[BTRFS_NAME_LEN];
4149         int error;
4150
4151         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
4152
4153         dirid = btrfs_root_ref_dirid(eb, ref);
4154         index = btrfs_root_ref_sequence(eb, ref);
4155         name_len = btrfs_root_ref_name_len(eb, ref);
4156
4157         if (name_len <= BTRFS_NAME_LEN) {
4158                 len = name_len;
4159                 error = 0;
4160         } else {
4161                 len = BTRFS_NAME_LEN;
4162                 error = REF_ERR_NAME_TOO_LONG;
4163         }
4164         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
4165
4166         if (key->type == BTRFS_ROOT_REF_KEY) {
4167                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
4168                                  index, namebuf, len, key->type, error);
4169         } else {
4170                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
4171                                  index, namebuf, len, key->type, error);
4172         }
4173         return 0;
4174 }
4175
4176 static void free_corrupt_block(struct cache_extent *cache)
4177 {
4178         struct btrfs_corrupt_block *corrupt;
4179
4180         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4181         free(corrupt);
4182 }
4183
4184 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4185
4186 /*
4187  * Repair the btree of the given root.
4188  *
4189  * The fix is to remove the node key in corrupt_blocks cache_tree.
4190  * and rebalance the tree.
4191  * After the fix, the btree should be writeable.
4192  */
4193 static int repair_btree(struct btrfs_root *root,
4194                         struct cache_tree *corrupt_blocks)
4195 {
4196         struct btrfs_trans_handle *trans;
4197         struct btrfs_path path;
4198         struct btrfs_corrupt_block *corrupt;
4199         struct cache_extent *cache;
4200         struct btrfs_key key;
4201         u64 offset;
4202         int level;
4203         int ret = 0;
4204
4205         if (cache_tree_empty(corrupt_blocks))
4206                 return 0;
4207
4208         trans = btrfs_start_transaction(root, 1);
4209         if (IS_ERR(trans)) {
4210                 ret = PTR_ERR(trans);
4211                 fprintf(stderr, "Error starting transaction: %s\n",
4212                         strerror(-ret));
4213                 return ret;
4214         }
4215         btrfs_init_path(&path);
4216         cache = first_cache_extent(corrupt_blocks);
4217         while (cache) {
4218                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4219                                        cache);
4220                 level = corrupt->level;
4221                 path.lowest_level = level;
4222                 key.objectid = corrupt->key.objectid;
4223                 key.type = corrupt->key.type;
4224                 key.offset = corrupt->key.offset;
4225
4226                 /*
4227                  * Here we don't want to do any tree balance, since it may
4228                  * cause a balance with corrupted brother leaf/node,
4229                  * so ins_len set to 0 here.
4230                  * Balance will be done after all corrupt node/leaf is deleted.
4231                  */
4232                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4233                 if (ret < 0)
4234                         goto out;
4235                 offset = btrfs_node_blockptr(path.nodes[level],
4236                                              path.slots[level]);
4237
4238                 /* Remove the ptr */
4239                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4240                 if (ret < 0)
4241                         goto out;
4242                 /*
4243                  * Remove the corresponding extent
4244                  * return value is not concerned.
4245                  */
4246                 btrfs_release_path(&path);
4247                 ret = btrfs_free_extent(trans, root, offset,
4248                                 root->fs_info->nodesize, 0,
4249                                 root->root_key.objectid, level - 1, 0);
4250                 cache = next_cache_extent(cache);
4251         }
4252
4253         /* Balance the btree using btrfs_search_slot() */
4254         cache = first_cache_extent(corrupt_blocks);
4255         while (cache) {
4256                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4257                                        cache);
4258                 memcpy(&key, &corrupt->key, sizeof(key));
4259                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4260                 if (ret < 0)
4261                         goto out;
4262                 /* return will always >0 since it won't find the item */
4263                 ret = 0;
4264                 btrfs_release_path(&path);
4265                 cache = next_cache_extent(cache);
4266         }
4267 out:
4268         btrfs_commit_transaction(trans, root);
4269         btrfs_release_path(&path);
4270         return ret;
4271 }
4272
4273 static int check_fs_root(struct btrfs_root *root,
4274                          struct cache_tree *root_cache,
4275                          struct walk_control *wc)
4276 {
4277         int ret = 0;
4278         int err = 0;
4279         int wret;
4280         int level;
4281         struct btrfs_path path;
4282         struct shared_node root_node;
4283         struct root_record *rec;
4284         struct btrfs_root_item *root_item = &root->root_item;
4285         struct cache_tree corrupt_blocks;
4286         struct orphan_data_extent *orphan;
4287         struct orphan_data_extent *tmp;
4288         enum btrfs_tree_block_status status;
4289         struct node_refs nrefs;
4290
4291         /*
4292          * Reuse the corrupt_block cache tree to record corrupted tree block
4293          *
4294          * Unlike the usage in extent tree check, here we do it in a per
4295          * fs/subvol tree base.
4296          */
4297         cache_tree_init(&corrupt_blocks);
4298         root->fs_info->corrupt_blocks = &corrupt_blocks;
4299
4300         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4301                 rec = get_root_rec(root_cache, root->root_key.objectid);
4302                 BUG_ON(IS_ERR(rec));
4303                 if (btrfs_root_refs(root_item) > 0)
4304                         rec->found_root_item = 1;
4305         }
4306
4307         btrfs_init_path(&path);
4308         memset(&root_node, 0, sizeof(root_node));
4309         cache_tree_init(&root_node.root_cache);
4310         cache_tree_init(&root_node.inode_cache);
4311         memset(&nrefs, 0, sizeof(nrefs));
4312
4313         /* Move the orphan extent record to corresponding inode_record */
4314         list_for_each_entry_safe(orphan, tmp,
4315                                  &root->orphan_data_extents, list) {
4316                 struct inode_record *inode;
4317
4318                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4319                                       1);
4320                 BUG_ON(IS_ERR(inode));
4321                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4322                 list_move(&orphan->list, &inode->orphan_extents);
4323         }
4324
4325         level = btrfs_header_level(root->node);
4326         memset(wc->nodes, 0, sizeof(wc->nodes));
4327         wc->nodes[level] = &root_node;
4328         wc->active_node = level;
4329         wc->root_level = level;
4330
4331         /* We may not have checked the root block, lets do that now */
4332         if (btrfs_is_leaf(root->node))
4333                 status = btrfs_check_leaf(root, NULL, root->node);
4334         else
4335                 status = btrfs_check_node(root, NULL, root->node);
4336         if (status != BTRFS_TREE_BLOCK_CLEAN)
4337                 return -EIO;
4338
4339         if (btrfs_root_refs(root_item) > 0 ||
4340             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4341                 path.nodes[level] = root->node;
4342                 extent_buffer_get(root->node);
4343                 path.slots[level] = 0;
4344         } else {
4345                 struct btrfs_key key;
4346                 struct btrfs_disk_key found_key;
4347
4348                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4349                 level = root_item->drop_level;
4350                 path.lowest_level = level;
4351                 if (level > btrfs_header_level(root->node) ||
4352                     level >= BTRFS_MAX_LEVEL) {
4353                         error("ignoring invalid drop level: %u", level);
4354                         goto skip_walking;
4355                 }
4356                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4357                 if (wret < 0)
4358                         goto skip_walking;
4359                 btrfs_node_key(path.nodes[level], &found_key,
4360                                 path.slots[level]);
4361                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4362                                         sizeof(found_key)));
4363         }
4364
4365         while (1) {
4366                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4367                 if (wret < 0)
4368                         ret = wret;
4369                 if (wret != 0)
4370                         break;
4371
4372                 wret = walk_up_tree(root, &path, wc, &level);
4373                 if (wret < 0)
4374                         ret = wret;
4375                 if (wret != 0)
4376                         break;
4377         }
4378 skip_walking:
4379         btrfs_release_path(&path);
4380
4381         if (!cache_tree_empty(&corrupt_blocks)) {
4382                 struct cache_extent *cache;
4383                 struct btrfs_corrupt_block *corrupt;
4384
4385                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4386                        root->root_key.objectid);
4387                 cache = first_cache_extent(&corrupt_blocks);
4388                 while (cache) {
4389                         corrupt = container_of(cache,
4390                                                struct btrfs_corrupt_block,
4391                                                cache);
4392                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4393                                cache->start, corrupt->level,
4394                                corrupt->key.objectid, corrupt->key.type,
4395                                corrupt->key.offset);
4396                         cache = next_cache_extent(cache);
4397                 }
4398                 if (repair) {
4399                         printf("Try to repair the btree for root %llu\n",
4400                                root->root_key.objectid);
4401                         ret = repair_btree(root, &corrupt_blocks);
4402                         if (ret < 0)
4403                                 fprintf(stderr, "Failed to repair btree: %s\n",
4404                                         strerror(-ret));
4405                         if (!ret)
4406                                 printf("Btree for root %llu is fixed\n",
4407                                        root->root_key.objectid);
4408                 }
4409         }
4410
4411         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4412         if (err < 0)
4413                 ret = err;
4414
4415         if (root_node.current) {
4416                 root_node.current->checked = 1;
4417                 maybe_free_inode_rec(&root_node.inode_cache,
4418                                 root_node.current);
4419         }
4420
4421         err = check_inode_recs(root, &root_node.inode_cache);
4422         if (!ret)
4423                 ret = err;
4424
4425         free_corrupt_blocks_tree(&corrupt_blocks);
4426         root->fs_info->corrupt_blocks = NULL;
4427         free_orphan_data_extents(&root->orphan_data_extents);
4428         return ret;
4429 }
4430
4431 static int fs_root_objectid(u64 objectid)
4432 {
4433         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4434             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4435                 return 1;
4436         return is_fstree(objectid);
4437 }
4438
4439 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4440                           struct cache_tree *root_cache)
4441 {
4442         struct btrfs_path path;
4443         struct btrfs_key key;
4444         struct walk_control wc;
4445         struct extent_buffer *leaf, *tree_node;
4446         struct btrfs_root *tmp_root;
4447         struct btrfs_root *tree_root = fs_info->tree_root;
4448         int ret;
4449         int err = 0;
4450
4451         if (ctx.progress_enabled) {
4452                 ctx.tp = TASK_FS_ROOTS;
4453                 task_start(ctx.info);
4454         }
4455
4456         /*
4457          * Just in case we made any changes to the extent tree that weren't
4458          * reflected into the free space cache yet.
4459          */
4460         if (repair)
4461                 reset_cached_block_groups(fs_info);
4462         memset(&wc, 0, sizeof(wc));
4463         cache_tree_init(&wc.shared);
4464         btrfs_init_path(&path);
4465
4466 again:
4467         key.offset = 0;
4468         key.objectid = 0;
4469         key.type = BTRFS_ROOT_ITEM_KEY;
4470         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4471         if (ret < 0) {
4472                 err = 1;
4473                 goto out;
4474         }
4475         tree_node = tree_root->node;
4476         while (1) {
4477                 if (tree_node != tree_root->node) {
4478                         free_root_recs_tree(root_cache);
4479                         btrfs_release_path(&path);
4480                         goto again;
4481                 }
4482                 leaf = path.nodes[0];
4483                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4484                         ret = btrfs_next_leaf(tree_root, &path);
4485                         if (ret) {
4486                                 if (ret < 0)
4487                                         err = 1;
4488                                 break;
4489                         }
4490                         leaf = path.nodes[0];
4491                 }
4492                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4493                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4494                     fs_root_objectid(key.objectid)) {
4495                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4496                                 tmp_root = btrfs_read_fs_root_no_cache(
4497                                                 fs_info, &key);
4498                         } else {
4499                                 key.offset = (u64)-1;
4500                                 tmp_root = btrfs_read_fs_root(
4501                                                 fs_info, &key);
4502                         }
4503                         if (IS_ERR(tmp_root)) {
4504                                 err = 1;
4505                                 goto next;
4506                         }
4507                         ret = check_fs_root(tmp_root, root_cache, &wc);
4508                         if (ret == -EAGAIN) {
4509                                 free_root_recs_tree(root_cache);
4510                                 btrfs_release_path(&path);
4511                                 goto again;
4512                         }
4513                         if (ret)
4514                                 err = 1;
4515                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4516                                 btrfs_free_fs_root(tmp_root);
4517                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4518                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4519                         process_root_ref(leaf, path.slots[0], &key,
4520                                          root_cache);
4521                 }
4522 next:
4523                 path.slots[0]++;
4524         }
4525 out:
4526         btrfs_release_path(&path);
4527         if (err)
4528                 free_extent_cache_tree(&wc.shared);
4529         if (!cache_tree_empty(&wc.shared))
4530                 fprintf(stderr, "warning line %d\n", __LINE__);
4531
4532         task_stop(ctx.info);
4533
4534         return err;
4535 }
4536
4537 /*
4538  * Find the @index according by @ino and name.
4539  * Notice:time efficiency is O(N)
4540  *
4541  * @root:       the root of the fs/file tree
4542  * @index_ret:  the index as return value
4543  * @namebuf:    the name to match
4544  * @name_len:   the length of name to match
4545  * @file_type:  the file_type of INODE_ITEM to match
4546  *
4547  * Returns 0 if found and *@index_ret will be modified with right value
4548  * Returns< 0 not found and *@index_ret will be (u64)-1
4549  */
4550 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4551                           u64 *index_ret, char *namebuf, u32 name_len,
4552                           u8 file_type)
4553 {
4554         struct btrfs_path path;
4555         struct extent_buffer *node;
4556         struct btrfs_dir_item *di;
4557         struct btrfs_key key;
4558         struct btrfs_key location;
4559         char name[BTRFS_NAME_LEN] = {0};
4560
4561         u32 total;
4562         u32 cur = 0;
4563         u32 len;
4564         u32 data_len;
4565         u8 filetype;
4566         int slot;
4567         int ret;
4568
4569         ASSERT(index_ret);
4570
4571         /* search from the last index */
4572         key.objectid = dirid;
4573         key.offset = (u64)-1;
4574         key.type = BTRFS_DIR_INDEX_KEY;
4575
4576         btrfs_init_path(&path);
4577         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4578         if (ret < 0)
4579                 return ret;
4580
4581 loop:
4582         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4583         if (ret) {
4584                 ret = -ENOENT;
4585                 *index_ret = (64)-1;
4586                 goto out;
4587         }
4588         /* Check whether inode_id/filetype/name match */
4589         node = path.nodes[0];
4590         slot = path.slots[0];
4591         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4592         total = btrfs_item_size_nr(node, slot);
4593         while (cur < total) {
4594                 ret = -ENOENT;
4595                 len = btrfs_dir_name_len(node, di);
4596                 data_len = btrfs_dir_data_len(node, di);
4597
4598                 btrfs_dir_item_key_to_cpu(node, di, &location);
4599                 if (location.objectid != location_id ||
4600                     location.type != BTRFS_INODE_ITEM_KEY ||
4601                     location.offset != 0)
4602                         goto next;
4603
4604                 filetype = btrfs_dir_type(node, di);
4605                 if (file_type != filetype)
4606                         goto next;
4607
4608                 if (len > BTRFS_NAME_LEN)
4609                         len = BTRFS_NAME_LEN;
4610
4611                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4612                 if (len != name_len || strncmp(namebuf, name, len))
4613                         goto next;
4614
4615                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4616                 *index_ret = key.offset;
4617                 ret = 0;
4618                 goto out;
4619 next:
4620                 len += sizeof(*di) + data_len;
4621                 di = (struct btrfs_dir_item *)((char *)di + len);
4622                 cur += len;
4623         }
4624         goto loop;
4625
4626 out:
4627         btrfs_release_path(&path);
4628         return ret;
4629 }
4630
4631 /*
4632  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4633  * INODE_REF/INODE_EXTREF match.
4634  *
4635  * @root:       the root of the fs/file tree
4636  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4637  *              value while find index
4638  * @location_key: location key of the struct btrfs_dir_item to match
4639  * @name:       the name to match
4640  * @namelen:    the length of name
4641  * @file_type:  the type of file to math
4642  *
4643  * Return 0 if no error occurred.
4644  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4645  * DIR_ITEM/DIR_INDEX
4646  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4647  * and DIR_ITEM/DIR_INDEX mismatch
4648  */
4649 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4650                          struct btrfs_key *location_key, char *name,
4651                          u32 namelen, u8 file_type)
4652 {
4653         struct btrfs_path path;
4654         struct extent_buffer *node;
4655         struct btrfs_dir_item *di;
4656         struct btrfs_key location;
4657         char namebuf[BTRFS_NAME_LEN] = {0};
4658         u32 total;
4659         u32 cur = 0;
4660         u32 len;
4661         u32 data_len;
4662         u8 filetype;
4663         int slot;
4664         int ret;
4665
4666         /* get the index by traversing all index */
4667         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4668                 ret = find_dir_index(root, key->objectid,
4669                                      location_key->objectid, &key->offset,
4670                                      name, namelen, file_type);
4671                 if (ret)
4672                         ret = DIR_INDEX_MISSING;
4673                 return ret;
4674         }
4675
4676         btrfs_init_path(&path);
4677         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4678         if (ret) {
4679                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4680                         DIR_INDEX_MISSING;
4681                 goto out;
4682         }
4683
4684         /* Check whether inode_id/filetype/name match */
4685         node = path.nodes[0];
4686         slot = path.slots[0];
4687         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4688         total = btrfs_item_size_nr(node, slot);
4689         while (cur < total) {
4690                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4691                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4692
4693                 len = btrfs_dir_name_len(node, di);
4694                 data_len = btrfs_dir_data_len(node, di);
4695
4696                 btrfs_dir_item_key_to_cpu(node, di, &location);
4697                 if (location.objectid != location_key->objectid ||
4698                     location.type != location_key->type ||
4699                     location.offset != location_key->offset)
4700                         goto next;
4701
4702                 filetype = btrfs_dir_type(node, di);
4703                 if (file_type != filetype)
4704                         goto next;
4705
4706                 if (len > BTRFS_NAME_LEN) {
4707                         len = BTRFS_NAME_LEN;
4708                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4709                         root->objectid,
4710                         key->type == BTRFS_DIR_ITEM_KEY ?
4711                         "DIR_ITEM" : "DIR_INDEX",
4712                         key->objectid, key->offset, len);
4713                 }
4714                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4715                                    len);
4716                 if (len != namelen || strncmp(namebuf, name, len))
4717                         goto next;
4718
4719                 ret = 0;
4720                 goto out;
4721 next:
4722                 len += sizeof(*di) + data_len;
4723                 di = (struct btrfs_dir_item *)((char *)di + len);
4724                 cur += len;
4725         }
4726
4727 out:
4728         btrfs_release_path(&path);
4729         return ret;
4730 }
4731
4732 /*
4733  * Prints inode ref error message
4734  */
4735 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4736                                 u64 index, const char *namebuf, int name_len,
4737                                 u8 filetype, int err)
4738 {
4739         if (!err)
4740                 return;
4741
4742         /* root dir error */
4743         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4744                 error(
4745         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4746                       root->objectid, key->objectid, key->offset, namebuf);
4747                 return;
4748         }
4749
4750         /* normal error */
4751         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4752                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4753                       root->objectid, key->offset,
4754                       btrfs_name_hash(namebuf, name_len),
4755                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4756                       namebuf, filetype);
4757         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4758                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4759                       root->objectid, key->offset, index,
4760                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4761                       namebuf, filetype);
4762 }
4763
4764 /*
4765  * Insert the missing inode item.
4766  *
4767  * Returns 0 means success.
4768  * Returns <0 means error.
4769  */
4770 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4771                                      u8 filetype)
4772 {
4773         struct btrfs_key key;
4774         struct btrfs_trans_handle *trans;
4775         struct btrfs_path path;
4776         int ret;
4777
4778         key.objectid = ino;
4779         key.type = BTRFS_INODE_ITEM_KEY;
4780         key.offset = 0;
4781
4782         btrfs_init_path(&path);
4783         trans = btrfs_start_transaction(root, 1);
4784         if (IS_ERR(trans)) {
4785                 ret = -EIO;
4786                 goto out;
4787         }
4788
4789         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4790         if (ret < 0 || !ret)
4791                 goto fail;
4792
4793         /* insert inode item */
4794         create_inode_item_lowmem(trans, root, ino, filetype);
4795         ret = 0;
4796 fail:
4797         btrfs_commit_transaction(trans, root);
4798 out:
4799         if (ret)
4800                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4801                       root->objectid, ino);
4802         btrfs_release_path(&path);
4803         return ret;
4804 }
4805
4806 /*
4807  * The ternary means dir item, dir index and relative inode ref.
4808  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4809  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4810  * strategy:
4811  * If two of three is missing or mismatched, delete the existing one.
4812  * If one of three is missing or mismatched, add the missing one.
4813  *
4814  * returns 0 means success.
4815  * returns not 0 means on error;
4816  */
4817 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4818                           u64 index, char *name, int name_len, u8 filetype,
4819                           int err)
4820 {
4821         struct btrfs_trans_handle *trans;
4822         int stage = 0;
4823         int ret = 0;
4824
4825         /*
4826          * stage shall be one of following valild values:
4827          *      0: Fine, nothing to do.
4828          *      1: One of three is wrong, so add missing one.
4829          *      2: Two of three is wrong, so delete existed one.
4830          */
4831         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4832                 stage++;
4833         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4834                 stage++;
4835         if (err & (INODE_REF_MISSING))
4836                 stage++;
4837
4838         /* stage must be smllarer than 3 */
4839         ASSERT(stage < 3);
4840
4841         trans = btrfs_start_transaction(root, 1);
4842         if (stage == 2) {
4843                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4844                                    name_len, 0);
4845                 goto out;
4846         }
4847         if (stage == 1) {
4848                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4849                                filetype, &index, 1, 1);
4850                 goto out;
4851         }
4852 out:
4853         btrfs_commit_transaction(trans, root);
4854
4855         if (ret)
4856                 error("fail to repair inode %llu name %s filetype %u",
4857                       ino, name, filetype);
4858         else
4859                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4860                        stage == 2 ? "Delete" : "Add",
4861                        ino, name, filetype);
4862
4863         return ret;
4864 }
4865
4866 /*
4867  * Traverse the given INODE_REF and call find_dir_item() to find related
4868  * DIR_ITEM/DIR_INDEX.
4869  *
4870  * @root:       the root of the fs/file tree
4871  * @ref_key:    the key of the INODE_REF
4872  * @path        the path provides node and slot
4873  * @refs:       the count of INODE_REF
4874  * @mode:       the st_mode of INODE_ITEM
4875  * @name_ret:   returns with the first ref's name
4876  * @name_len_ret:    len of the name_ret
4877  *
4878  * Return 0 if no error occurred.
4879  */
4880 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4881                            struct btrfs_path *path, char *name_ret,
4882                            u32 *namelen_ret, u64 *refs_ret, int mode)
4883 {
4884         struct btrfs_key key;
4885         struct btrfs_key location;
4886         struct btrfs_inode_ref *ref;
4887         struct extent_buffer *node;
4888         char namebuf[BTRFS_NAME_LEN] = {0};
4889         u32 total;
4890         u32 cur = 0;
4891         u32 len;
4892         u32 name_len;
4893         u64 index;
4894         int ret;
4895         int err = 0;
4896         int tmp_err;
4897         int slot;
4898         int need_research = 0;
4899         u64 refs;
4900
4901 begin:
4902         err = 0;
4903         cur = 0;
4904         refs = *refs_ret;
4905
4906         /* since after repair, path and the dir item may be changed */
4907         if (need_research) {
4908                 need_research = 0;
4909                 btrfs_release_path(path);
4910                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4911                 /* the item was deleted, let path point to the last checked item */
4912                 if (ret > 0) {
4913                         if (path->slots[0] == 0)
4914                                 btrfs_prev_leaf(root, path);
4915                         else
4916                                 path->slots[0]--;
4917                 }
4918                 if (ret)
4919                         goto out;
4920         }
4921
4922         location.objectid = ref_key->objectid;
4923         location.type = BTRFS_INODE_ITEM_KEY;
4924         location.offset = 0;
4925         node = path->nodes[0];
4926         slot = path->slots[0];
4927
4928         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4929         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4930         total = btrfs_item_size_nr(node, slot);
4931
4932 next:
4933         /* Update inode ref count */
4934         refs++;
4935         tmp_err = 0;
4936         index = btrfs_inode_ref_index(node, ref);
4937         name_len = btrfs_inode_ref_name_len(node, ref);
4938
4939         if (name_len <= BTRFS_NAME_LEN) {
4940                 len = name_len;
4941         } else {
4942                 len = BTRFS_NAME_LEN;
4943                 warning("root %llu INODE_REF[%llu %llu] name too long",
4944                         root->objectid, ref_key->objectid, ref_key->offset);
4945         }
4946
4947         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4948
4949         /* copy the first name found to name_ret */
4950         if (refs == 1 && name_ret) {
4951                 memcpy(name_ret, namebuf, len);
4952                 *namelen_ret = len;
4953         }
4954
4955         /* Check root dir ref */
4956         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4957                 if (index != 0 || len != strlen("..") ||
4958                     strncmp("..", namebuf, len) ||
4959                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4960                         /* set err bits then repair will delete the ref */
4961                         err |= DIR_INDEX_MISSING;
4962                         err |= DIR_ITEM_MISSING;
4963                 }
4964                 goto end;
4965         }
4966
4967         /* Find related DIR_INDEX */
4968         key.objectid = ref_key->offset;
4969         key.type = BTRFS_DIR_INDEX_KEY;
4970         key.offset = index;
4971         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4972                             imode_to_type(mode));
4973
4974         /* Find related dir_item */
4975         key.objectid = ref_key->offset;
4976         key.type = BTRFS_DIR_ITEM_KEY;
4977         key.offset = btrfs_name_hash(namebuf, len);
4978         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4979                             imode_to_type(mode));
4980 end:
4981         if (tmp_err && repair) {
4982                 ret = repair_ternary_lowmem(root, ref_key->offset,
4983                                             ref_key->objectid, index, namebuf,
4984                                             name_len, imode_to_type(mode),
4985                                             tmp_err);
4986                 if (!ret) {
4987                         need_research = 1;
4988                         goto begin;
4989                 }
4990         }
4991         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4992                             imode_to_type(mode), tmp_err);
4993         err |= tmp_err;
4994         len = sizeof(*ref) + name_len;
4995         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4996         cur += len;
4997         if (cur < total)
4998                 goto next;
4999
5000 out:
5001         *refs_ret = refs;
5002         return err;
5003 }
5004
5005 /*
5006  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
5007  * DIR_ITEM/DIR_INDEX.
5008  *
5009  * @root:       the root of the fs/file tree
5010  * @ref_key:    the key of the INODE_EXTREF
5011  * @refs:       the count of INODE_EXTREF
5012  * @mode:       the st_mode of INODE_ITEM
5013  *
5014  * Return 0 if no error occurred.
5015  */
5016 static int check_inode_extref(struct btrfs_root *root,
5017                               struct btrfs_key *ref_key,
5018                               struct extent_buffer *node, int slot, u64 *refs,
5019                               int mode)
5020 {
5021         struct btrfs_key key;
5022         struct btrfs_key location;
5023         struct btrfs_inode_extref *extref;
5024         char namebuf[BTRFS_NAME_LEN] = {0};
5025         u32 total;
5026         u32 cur = 0;
5027         u32 len;
5028         u32 name_len;
5029         u64 index;
5030         u64 parent;
5031         int ret;
5032         int err = 0;
5033
5034         location.objectid = ref_key->objectid;
5035         location.type = BTRFS_INODE_ITEM_KEY;
5036         location.offset = 0;
5037
5038         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5039         total = btrfs_item_size_nr(node, slot);
5040
5041 next:
5042         /* update inode ref count */
5043         (*refs)++;
5044         name_len = btrfs_inode_extref_name_len(node, extref);
5045         index = btrfs_inode_extref_index(node, extref);
5046         parent = btrfs_inode_extref_parent(node, extref);
5047         if (name_len <= BTRFS_NAME_LEN) {
5048                 len = name_len;
5049         } else {
5050                 len = BTRFS_NAME_LEN;
5051                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
5052                         root->objectid, ref_key->objectid, ref_key->offset);
5053         }
5054         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
5055
5056         /* Check root dir ref name */
5057         if (index == 0 && strncmp(namebuf, "..", name_len)) {
5058                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
5059                       root->objectid, ref_key->objectid, ref_key->offset,
5060                       namebuf);
5061                 err |= ROOT_DIR_ERROR;
5062         }
5063
5064         /* find related dir_index */
5065         key.objectid = parent;
5066         key.type = BTRFS_DIR_INDEX_KEY;
5067         key.offset = index;
5068         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5069         err |= ret;
5070
5071         /* find related dir_item */
5072         key.objectid = parent;
5073         key.type = BTRFS_DIR_ITEM_KEY;
5074         key.offset = btrfs_name_hash(namebuf, len);
5075         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5076         err |= ret;
5077
5078         len = sizeof(*extref) + name_len;
5079         extref = (struct btrfs_inode_extref *)((char *)extref + len);
5080         cur += len;
5081
5082         if (cur < total)
5083                 goto next;
5084
5085         return err;
5086 }
5087
5088 /*
5089  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
5090  * DIR_ITEM/DIR_INDEX match.
5091  * Return with @index_ret.
5092  *
5093  * @root:       the root of the fs/file tree
5094  * @key:        the key of the INODE_REF/INODE_EXTREF
5095  * @name:       the name in the INODE_REF/INODE_EXTREF
5096  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
5097  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
5098  *              value (64)-1 means do not check index
5099  * @ext_ref:    the EXTENDED_IREF feature
5100  *
5101  * Return 0 if no error occurred.
5102  * Return >0 for error bitmap
5103  */
5104 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
5105                           char *name, int namelen, u64 *index_ret,
5106                           unsigned int ext_ref)
5107 {
5108         struct btrfs_path path;
5109         struct btrfs_inode_ref *ref;
5110         struct btrfs_inode_extref *extref;
5111         struct extent_buffer *node;
5112         char ref_namebuf[BTRFS_NAME_LEN] = {0};
5113         u32 total;
5114         u32 cur = 0;
5115         u32 len;
5116         u32 ref_namelen;
5117         u64 ref_index;
5118         u64 parent;
5119         u64 dir_id;
5120         int slot;
5121         int ret;
5122
5123         ASSERT(index_ret);
5124
5125         btrfs_init_path(&path);
5126         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5127         if (ret) {
5128                 ret = INODE_REF_MISSING;
5129                 goto extref;
5130         }
5131
5132         node = path.nodes[0];
5133         slot = path.slots[0];
5134
5135         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5136         total = btrfs_item_size_nr(node, slot);
5137
5138         /* Iterate all entry of INODE_REF */
5139         while (cur < total) {
5140                 ret = INODE_REF_MISSING;
5141
5142                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
5143                 ref_index = btrfs_inode_ref_index(node, ref);
5144                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5145                         goto next_ref;
5146
5147                 if (cur + sizeof(*ref) + ref_namelen > total ||
5148                     ref_namelen > BTRFS_NAME_LEN) {
5149                         warning("root %llu INODE %s[%llu %llu] name too long",
5150                                 root->objectid,
5151                                 key->type == BTRFS_INODE_REF_KEY ?
5152                                         "REF" : "EXTREF",
5153                                 key->objectid, key->offset);
5154
5155                         if (cur + sizeof(*ref) > total)
5156                                 break;
5157                         len = min_t(u32, total - cur - sizeof(*ref),
5158                                     BTRFS_NAME_LEN);
5159                 } else {
5160                         len = ref_namelen;
5161                 }
5162
5163                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
5164                                    len);
5165
5166                 if (len != namelen || strncmp(ref_namebuf, name, len))
5167                         goto next_ref;
5168
5169                 *index_ret = ref_index;
5170                 ret = 0;
5171                 goto out;
5172 next_ref:
5173                 len = sizeof(*ref) + ref_namelen;
5174                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5175                 cur += len;
5176         }
5177
5178 extref:
5179         /* Skip if not support EXTENDED_IREF feature */
5180         if (!ext_ref)
5181                 goto out;
5182
5183         btrfs_release_path(&path);
5184         btrfs_init_path(&path);
5185
5186         dir_id = key->offset;
5187         key->type = BTRFS_INODE_EXTREF_KEY;
5188         key->offset = btrfs_extref_hash(dir_id, name, namelen);
5189
5190         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5191         if (ret) {
5192                 ret = INODE_REF_MISSING;
5193                 goto out;
5194         }
5195
5196         node = path.nodes[0];
5197         slot = path.slots[0];
5198
5199         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5200         cur = 0;
5201         total = btrfs_item_size_nr(node, slot);
5202
5203         /* Iterate all entry of INODE_EXTREF */
5204         while (cur < total) {
5205                 ret = INODE_REF_MISSING;
5206
5207                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5208                 ref_index = btrfs_inode_extref_index(node, extref);
5209                 parent = btrfs_inode_extref_parent(node, extref);
5210                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5211                         goto next_extref;
5212
5213                 if (parent != dir_id)
5214                         goto next_extref;
5215
5216                 if (ref_namelen <= BTRFS_NAME_LEN) {
5217                         len = ref_namelen;
5218                 } else {
5219                         len = BTRFS_NAME_LEN;
5220                         warning("root %llu INODE %s[%llu %llu] name too long",
5221                                 root->objectid,
5222                                 key->type == BTRFS_INODE_REF_KEY ?
5223                                         "REF" : "EXTREF",
5224                                 key->objectid, key->offset);
5225                 }
5226                 read_extent_buffer(node, ref_namebuf,
5227                                    (unsigned long)(extref + 1), len);
5228
5229                 if (len != namelen || strncmp(ref_namebuf, name, len))
5230                         goto next_extref;
5231
5232                 *index_ret = ref_index;
5233                 ret = 0;
5234                 goto out;
5235
5236 next_extref:
5237                 len = sizeof(*extref) + ref_namelen;
5238                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5239                 cur += len;
5240
5241         }
5242 out:
5243         btrfs_release_path(&path);
5244         return ret;
5245 }
5246
5247 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5248                                u64 ino, u64 index, const char *namebuf,
5249                                int name_len, u8 filetype, int err)
5250 {
5251         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5252                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5253                       root->objectid, key->objectid, key->offset, namebuf,
5254                       filetype,
5255                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5256         }
5257
5258         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5259                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5260                       root->objectid, key->objectid, index, namebuf, filetype,
5261                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5262         }
5263
5264         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5265                 error(
5266                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5267                       root->objectid, ino, index, namebuf, filetype,
5268                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5269         }
5270
5271         if (err & INODE_REF_MISSING)
5272                 error(
5273                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5274                       root->objectid, ino, key->objectid, namebuf, filetype);
5275
5276 }
5277
5278 /*
5279  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5280  *
5281  * Returns error after repair
5282  */
5283 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5284                            u64 index, u8 filetype, char *namebuf, u32 name_len,
5285                            int err)
5286 {
5287         int ret;
5288
5289         if (err & INODE_ITEM_MISSING) {
5290                 ret = repair_inode_item_missing(root, ino, filetype);
5291                 if (!ret)
5292                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5293         }
5294
5295         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5296                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5297                                             name_len, filetype, err);
5298                 if (!ret) {
5299                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5300                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5301                         err &= ~(INODE_REF_MISSING);
5302                 }
5303         }
5304         return err;
5305 }
5306
5307 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5308                 u64 *size_ret)
5309 {
5310         struct btrfs_key key;
5311         struct btrfs_path path;
5312         u32 len;
5313         struct btrfs_dir_item *di;
5314         int ret;
5315         int cur = 0;
5316         int total = 0;
5317
5318         ASSERT(size_ret);
5319         *size_ret = 0;
5320
5321         key.objectid = ino;
5322         key.type = type;
5323         key.offset = (u64)-1;
5324
5325         btrfs_init_path(&path);
5326         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5327         if (ret < 0) {
5328                 ret = -EIO;
5329                 goto out;
5330         }
5331         /* if found, go to spacial case */
5332         if (ret == 0)
5333                 goto special_case;
5334
5335 loop:
5336         ret = btrfs_previous_item(root, &path, ino, type);
5337
5338         if (ret) {
5339                 ret = 0;
5340                 goto out;
5341         }
5342
5343 special_case:
5344         di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5345         cur = 0;
5346         total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5347
5348         while (cur < total) {
5349                 len = btrfs_dir_name_len(path.nodes[0], di);
5350                 if (len > BTRFS_NAME_LEN)
5351                         len = BTRFS_NAME_LEN;
5352                 *size_ret += len;
5353
5354                 len += btrfs_dir_data_len(path.nodes[0], di);
5355                 len += sizeof(*di);
5356                 di = (struct btrfs_dir_item *)((char *)di + len);
5357                 cur += len;
5358         }
5359         goto loop;
5360
5361 out:
5362         btrfs_release_path(&path);
5363         return ret;
5364 }
5365
5366 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5367 {
5368         u64 item_size;
5369         u64 index_size;
5370         int ret;
5371
5372         ASSERT(size);
5373         ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5374         if (ret)
5375                 goto out;
5376
5377         ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5378         if (ret)
5379                 goto out;
5380
5381         *size = item_size + index_size;
5382
5383 out:
5384         if (ret)
5385                 error("failed to count root %llu INODE[%llu] root size",
5386                       root->objectid, ino);
5387         return ret;
5388 }
5389
5390 /*
5391  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5392  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5393  *
5394  * @root:       the root of the fs/file tree
5395  * @key:        the key of the INODE_REF/INODE_EXTREF
5396  * @path:       the path
5397  * @size:       the st_size of the INODE_ITEM
5398  * @ext_ref:    the EXTENDED_IREF feature
5399  *
5400  * Return 0 if no error occurred.
5401  * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5402  */
5403 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5404                           struct btrfs_path *path, u64 *size,
5405                           unsigned int ext_ref)
5406 {
5407         struct btrfs_dir_item *di;
5408         struct btrfs_inode_item *ii;
5409         struct btrfs_key key;
5410         struct btrfs_key location;
5411         struct extent_buffer *node;
5412         int slot;
5413         char namebuf[BTRFS_NAME_LEN] = {0};
5414         u32 total;
5415         u32 cur = 0;
5416         u32 len;
5417         u32 name_len;
5418         u32 data_len;
5419         u8 filetype;
5420         u32 mode = 0;
5421         u64 index;
5422         int ret;
5423         int err;
5424         int tmp_err;
5425         int need_research = 0;
5426
5427         /*
5428          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5429          * ignore index check.
5430          */
5431         if (di_key->type == BTRFS_DIR_INDEX_KEY)
5432                 index = di_key->offset;
5433         else
5434                 index = (u64)-1;
5435 begin:
5436         err = 0;
5437         cur = 0;
5438
5439         /* since after repair, path and the dir item may be changed */
5440         if (need_research) {
5441                 need_research = 0;
5442                 err |= DIR_COUNT_AGAIN;
5443                 btrfs_release_path(path);
5444                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5445                 /* the item was deleted, let path point the last checked item */
5446                 if (ret > 0) {
5447                         if (path->slots[0] == 0)
5448                                 btrfs_prev_leaf(root, path);
5449                         else
5450                                 path->slots[0]--;
5451                 }
5452                 if (ret)
5453                         goto out;
5454         }
5455
5456         node = path->nodes[0];
5457         slot = path->slots[0];
5458
5459         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5460         total = btrfs_item_size_nr(node, slot);
5461         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5462
5463         while (cur < total) {
5464                 data_len = btrfs_dir_data_len(node, di);
5465                 tmp_err = 0;
5466                 if (data_len)
5467                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5468                               root->objectid,
5469               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5470                               di_key->objectid, di_key->offset, data_len);
5471
5472                 name_len = btrfs_dir_name_len(node, di);
5473                 if (name_len <= BTRFS_NAME_LEN) {
5474                         len = name_len;
5475                 } else {
5476                         len = BTRFS_NAME_LEN;
5477                         warning("root %llu %s[%llu %llu] name too long",
5478                                 root->objectid,
5479                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5480                                 di_key->objectid, di_key->offset);
5481                 }
5482                 (*size) += name_len;
5483                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5484                                    len);
5485                 filetype = btrfs_dir_type(node, di);
5486
5487                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5488                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5489                         err |= -EIO;
5490                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5491                         root->objectid, di_key->objectid, di_key->offset,
5492                         namebuf, len, filetype, di_key->offset,
5493                         btrfs_name_hash(namebuf, len));
5494                 }
5495
5496                 btrfs_dir_item_key_to_cpu(node, di, &location);
5497                 /* Ignore related ROOT_ITEM check */
5498                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5499                         goto next;
5500
5501                 btrfs_release_path(path);
5502                 /* Check relative INODE_ITEM(existence/filetype) */
5503                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5504                 if (ret) {
5505                         tmp_err |= INODE_ITEM_MISSING;
5506                         goto next;
5507                 }
5508
5509                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5510                                     struct btrfs_inode_item);
5511                 mode = btrfs_inode_mode(path->nodes[0], ii);
5512                 if (imode_to_type(mode) != filetype) {
5513                         tmp_err |= INODE_ITEM_MISMATCH;
5514                         goto next;
5515                 }
5516
5517                 /* Check relative INODE_REF/INODE_EXTREF */
5518                 key.objectid = location.objectid;
5519                 key.type = BTRFS_INODE_REF_KEY;
5520                 key.offset = di_key->objectid;
5521                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5522                                           &index, ext_ref);
5523
5524                 /* check relative INDEX/ITEM */
5525                 key.objectid = di_key->objectid;
5526                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5527                         key.type = BTRFS_DIR_INDEX_KEY;
5528                         key.offset = index;
5529                 } else {
5530                         key.type = BTRFS_DIR_ITEM_KEY;
5531                         key.offset = btrfs_name_hash(namebuf, name_len);
5532                 }
5533
5534                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5535                                          name_len, filetype);
5536                 /* find_dir_item may find index */
5537                 if (key.type == BTRFS_DIR_INDEX_KEY)
5538                         index = key.offset;
5539 next:
5540
5541                 if (tmp_err && repair) {
5542                         ret = repair_dir_item(root, di_key->objectid,
5543                                               location.objectid, index,
5544                                               imode_to_type(mode), namebuf,
5545                                               name_len, tmp_err);
5546                         if (ret != tmp_err) {
5547                                 need_research = 1;
5548                                 goto begin;
5549                         }
5550                 }
5551                 btrfs_release_path(path);
5552                 print_dir_item_err(root, di_key, location.objectid, index,
5553                                    namebuf, name_len, filetype, tmp_err);
5554                 err |= tmp_err;
5555                 len = sizeof(*di) + name_len + data_len;
5556                 di = (struct btrfs_dir_item *)((char *)di + len);
5557                 cur += len;
5558
5559                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5560                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5561                               root->objectid, di_key->objectid,
5562                               di_key->offset);
5563                         break;
5564                 }
5565         }
5566 out:
5567         /* research path */
5568         btrfs_release_path(path);
5569         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5570         if (ret)
5571                 err |= ret > 0 ? -ENOENT : ret;
5572         return err;
5573 }
5574
5575 /*
5576  * Wrapper function of btrfs_punch_hole.
5577  *
5578  * Returns 0 means success.
5579  * Returns not 0 means error.
5580  */
5581 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5582                              u64 len)
5583 {
5584         struct btrfs_trans_handle *trans;
5585         int ret = 0;
5586
5587         trans = btrfs_start_transaction(root, 1);
5588         if (IS_ERR(trans))
5589                 return PTR_ERR(trans);
5590
5591         ret = btrfs_punch_hole(trans, root, ino, start, len);
5592         if (ret)
5593                 error("failed to add hole [%llu, %llu] in inode [%llu]",
5594                       start, len, ino);
5595         else
5596                 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5597                        ino);
5598
5599         btrfs_commit_transaction(trans, root);
5600         return ret;
5601 }
5602
5603 /*
5604  * Check file extent datasum/hole, update the size of the file extents,
5605  * check and update the last offset of the file extent.
5606  *
5607  * @root:       the root of fs/file tree.
5608  * @fkey:       the key of the file extent.
5609  * @nodatasum:  INODE_NODATASUM feature.
5610  * @size:       the sum of all EXTENT_DATA items size for this inode.
5611  * @end:        the offset of the last extent.
5612  *
5613  * Return 0 if no error occurred.
5614  */
5615 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5616                              struct extent_buffer *node, int slot,
5617                              unsigned int nodatasum, u64 *size, u64 *end)
5618 {
5619         struct btrfs_file_extent_item *fi;
5620         u64 disk_bytenr;
5621         u64 disk_num_bytes;
5622         u64 extent_num_bytes;
5623         u64 extent_offset;
5624         u64 csum_found;         /* In byte size, sectorsize aligned */
5625         u64 search_start;       /* Logical range start we search for csum */
5626         u64 search_len;         /* Logical range len we search for csum */
5627         unsigned int extent_type;
5628         unsigned int is_hole;
5629         int compressed = 0;
5630         int ret;
5631         int err = 0;
5632
5633         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5634
5635         /* Check inline extent */
5636         extent_type = btrfs_file_extent_type(node, fi);
5637         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5638                 struct btrfs_item *e = btrfs_item_nr(slot);
5639                 u32 item_inline_len;
5640
5641                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5642                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5643                 compressed = btrfs_file_extent_compression(node, fi);
5644                 if (extent_num_bytes == 0) {
5645                         error(
5646                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5647                                 root->objectid, fkey->objectid, fkey->offset);
5648                         err |= FILE_EXTENT_ERROR;
5649                 }
5650                 if (!compressed && extent_num_bytes != item_inline_len) {
5651                         error(
5652                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5653                                 root->objectid, fkey->objectid, fkey->offset,
5654                                 extent_num_bytes, item_inline_len);
5655                         err |= FILE_EXTENT_ERROR;
5656                 }
5657                 *end += extent_num_bytes;
5658                 *size += extent_num_bytes;
5659                 return err;
5660         }
5661
5662         /* Check extent type */
5663         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5664                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5665                 err |= FILE_EXTENT_ERROR;
5666                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5667                       root->objectid, fkey->objectid, fkey->offset);
5668                 return err;
5669         }
5670
5671         /* Check REG_EXTENT/PREALLOC_EXTENT */
5672         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5673         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5674         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5675         extent_offset = btrfs_file_extent_offset(node, fi);
5676         compressed = btrfs_file_extent_compression(node, fi);
5677         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5678
5679         /*
5680          * Check EXTENT_DATA csum
5681          *
5682          * For plain (uncompressed) extent, we should only check the range
5683          * we're referring to, as it's possible that part of prealloc extent
5684          * has been written, and has csum:
5685          *
5686          * |<--- Original large preallocated extent A ---->|
5687          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5688          *      No csum                         Has csum
5689          *
5690          * For compressed extent, we should check the whole range.
5691          */
5692         if (!compressed) {
5693                 search_start = disk_bytenr + extent_offset;
5694                 search_len = extent_num_bytes;
5695         } else {
5696                 search_start = disk_bytenr;
5697                 search_len = disk_num_bytes;
5698         }
5699         ret = count_csum_range(root, search_start, search_len, &csum_found);
5700         if (csum_found > 0 && nodatasum) {
5701                 err |= ODD_CSUM_ITEM;
5702                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5703                       root->objectid, fkey->objectid, fkey->offset);
5704         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5705                    !is_hole && (ret < 0 || csum_found < search_len)) {
5706                 err |= CSUM_ITEM_MISSING;
5707                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5708                       root->objectid, fkey->objectid, fkey->offset,
5709                       csum_found, search_len);
5710         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5711                 err |= ODD_CSUM_ITEM;
5712                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5713                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5714         }
5715
5716         /* Check EXTENT_DATA hole */
5717         if (!no_holes && *end != fkey->offset) {
5718                 if (repair)
5719                         ret = punch_extent_hole(root, fkey->objectid,
5720                                                 *end, fkey->offset - *end);
5721                 if (!repair || ret) {
5722                         err |= FILE_EXTENT_ERROR;
5723                         error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5724                               root->objectid, fkey->objectid, fkey->offset);
5725                 }
5726         }
5727
5728         *end += extent_num_bytes;
5729         if (!is_hole)
5730                 *size += extent_num_bytes;
5731
5732         return err;
5733 }
5734
5735 /*
5736  * Set inode item nbytes to @nbytes
5737  *
5738  * Returns  0     on success
5739  * Returns  != 0  on error
5740  */
5741 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5742                                       struct btrfs_path *path,
5743                                       u64 ino, u64 nbytes)
5744 {
5745         struct btrfs_trans_handle *trans;
5746         struct btrfs_inode_item *ii;
5747         struct btrfs_key key;
5748         struct btrfs_key research_key;
5749         int err = 0;
5750         int ret;
5751
5752         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5753
5754         key.objectid = ino;
5755         key.type = BTRFS_INODE_ITEM_KEY;
5756         key.offset = 0;
5757
5758         trans = btrfs_start_transaction(root, 1);
5759         if (IS_ERR(trans)) {
5760                 ret = PTR_ERR(trans);
5761                 err |= ret;
5762                 goto out;
5763         }
5764
5765         btrfs_release_path(path);
5766         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5767         if (ret > 0)
5768                 ret = -ENOENT;
5769         if (ret) {
5770                 err |= ret;
5771                 goto fail;
5772         }
5773
5774         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5775                             struct btrfs_inode_item);
5776         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5777         btrfs_mark_buffer_dirty(path->nodes[0]);
5778 fail:
5779         btrfs_commit_transaction(trans, root);
5780 out:
5781         if (ret)
5782                 error("failed to set nbytes in inode %llu root %llu",
5783                       ino, root->root_key.objectid);
5784         else
5785                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5786                        root->root_key.objectid, nbytes);
5787
5788         /* research path */
5789         btrfs_release_path(path);
5790         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5791         err |= ret;
5792
5793         return err;
5794 }
5795
5796 /*
5797  * Set directory inode isize to @isize.
5798  *
5799  * Returns 0     on success.
5800  * Returns != 0  on error.
5801  */
5802 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5803                                    struct btrfs_path *path,
5804                                    u64 ino, u64 isize)
5805 {
5806         struct btrfs_trans_handle *trans;
5807         struct btrfs_inode_item *ii;
5808         struct btrfs_key key;
5809         struct btrfs_key research_key;
5810         int ret;
5811         int err = 0;
5812
5813         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5814
5815         key.objectid = ino;
5816         key.type = BTRFS_INODE_ITEM_KEY;
5817         key.offset = 0;
5818
5819         trans = btrfs_start_transaction(root, 1);
5820         if (IS_ERR(trans)) {
5821                 ret = PTR_ERR(trans);
5822                 err |= ret;
5823                 goto out;
5824         }
5825
5826         btrfs_release_path(path);
5827         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5828         if (ret > 0)
5829                 ret = -ENOENT;
5830         if (ret) {
5831                 err |= ret;
5832                 goto fail;
5833         }
5834
5835         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5836                             struct btrfs_inode_item);
5837         btrfs_set_inode_size(path->nodes[0], ii, isize);
5838         btrfs_mark_buffer_dirty(path->nodes[0]);
5839 fail:
5840         btrfs_commit_transaction(trans, root);
5841 out:
5842         if (ret)
5843                 error("failed to set isize in inode %llu root %llu",
5844                       ino, root->root_key.objectid);
5845         else
5846                 printf("Set isize in inode %llu root %llu to %llu\n",
5847                        ino, root->root_key.objectid, isize);
5848
5849         btrfs_release_path(path);
5850         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5851         err |= ret;
5852
5853         return err;
5854 }
5855
5856 /*
5857  * Wrapper function for btrfs_add_orphan_item().
5858  *
5859  * Returns 0     on success.
5860  * Returns != 0  on error.
5861  */
5862 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5863                                            struct btrfs_path *path, u64 ino)
5864 {
5865         struct btrfs_trans_handle *trans;
5866         struct btrfs_key research_key;
5867         int ret;
5868         int err = 0;
5869
5870         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5871
5872         trans = btrfs_start_transaction(root, 1);
5873         if (IS_ERR(trans)) {
5874                 ret = PTR_ERR(trans);
5875                 err |= ret;
5876                 goto out;
5877         }
5878
5879         btrfs_release_path(path);
5880         ret = btrfs_add_orphan_item(trans, root, path, ino);
5881         err |= ret;
5882         btrfs_commit_transaction(trans, root);
5883 out:
5884         if (ret)
5885                 error("failed to add inode %llu as orphan item root %llu",
5886                       ino, root->root_key.objectid);
5887         else
5888                 printf("Added inode %llu as orphan item root %llu\n",
5889                        ino, root->root_key.objectid);
5890
5891         btrfs_release_path(path);
5892         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5893         err |= ret;
5894
5895         return err;
5896 }
5897
5898 /* Set inode_item nlink to @ref_count.
5899  * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5900  *
5901  * Returns 0 on success
5902  */
5903 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5904                                       struct btrfs_path *path, u64 ino,
5905                                       const char *name, u32 namelen,
5906                                       u64 ref_count, u8 filetype, u64 *nlink)
5907 {
5908         struct btrfs_trans_handle *trans;
5909         struct btrfs_inode_item *ii;
5910         struct btrfs_key key;
5911         struct btrfs_key old_key;
5912         char namebuf[BTRFS_NAME_LEN] = {0};
5913         int name_len;
5914         int ret;
5915         int ret2;
5916
5917         /* save the key */
5918         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5919
5920         if (name && namelen) {
5921                 ASSERT(namelen <= BTRFS_NAME_LEN);
5922                 memcpy(namebuf, name, namelen);
5923                 name_len = namelen;
5924         } else {
5925                 sprintf(namebuf, "%llu", ino);
5926                 name_len = count_digits(ino);
5927                 printf("Can't find file name for inode %llu, use %s instead\n",
5928                        ino, namebuf);
5929         }
5930
5931         trans = btrfs_start_transaction(root, 1);
5932         if (IS_ERR(trans)) {
5933                 ret = PTR_ERR(trans);
5934                 goto out;
5935         }
5936
5937         btrfs_release_path(path);
5938         /* if refs is 0, put it into lostfound */
5939         if (ref_count == 0) {
5940                 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5941                                               name_len, filetype, &ref_count);
5942                 if (ret)
5943                         goto fail;
5944         }
5945
5946         /* reset inode_item's nlink to ref_count */
5947         key.objectid = ino;
5948         key.type = BTRFS_INODE_ITEM_KEY;
5949         key.offset = 0;
5950
5951         btrfs_release_path(path);
5952         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5953         if (ret > 0)
5954                 ret = -ENOENT;
5955         if (ret)
5956                 goto fail;
5957
5958         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5959                             struct btrfs_inode_item);
5960         btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5961         btrfs_mark_buffer_dirty(path->nodes[0]);
5962
5963         if (nlink)
5964                 *nlink = ref_count;
5965 fail:
5966         btrfs_commit_transaction(trans, root);
5967 out:
5968         if (ret)
5969                 error(
5970         "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5971                        root->objectid, ino, namebuf, filetype);
5972         else
5973                 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5974                        root->objectid, ino, namebuf, filetype);
5975
5976         /* research */
5977         btrfs_release_path(path);
5978         ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5979         if (ret2 < 0)
5980                 return ret |= ret2;
5981         return ret;
5982 }
5983
5984 /*
5985  * Check INODE_ITEM and related ITEMs (the same inode number)
5986  * 1. check link count
5987  * 2. check inode ref/extref
5988  * 3. check dir item/index
5989  *
5990  * @ext_ref:    the EXTENDED_IREF feature
5991  *
5992  * Return 0 if no error occurred.
5993  * Return >0 for error or hit the traversal is done(by error bitmap)
5994  */
5995 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5996                             unsigned int ext_ref)
5997 {
5998         struct extent_buffer *node;
5999         struct btrfs_inode_item *ii;
6000         struct btrfs_key key;
6001         struct btrfs_key last_key;
6002         u64 inode_id;
6003         u32 mode;
6004         u64 nlink;
6005         u64 nbytes;
6006         u64 isize;
6007         u64 size = 0;
6008         u64 refs = 0;
6009         u64 extent_end = 0;
6010         u64 extent_size = 0;
6011         unsigned int dir;
6012         unsigned int nodatasum;
6013         int slot;
6014         int ret;
6015         int err = 0;
6016         char namebuf[BTRFS_NAME_LEN] = {0};
6017         u32 name_len = 0;
6018
6019         node = path->nodes[0];
6020         slot = path->slots[0];
6021
6022         btrfs_item_key_to_cpu(node, &key, slot);
6023         inode_id = key.objectid;
6024
6025         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
6026                 ret = btrfs_next_item(root, path);
6027                 if (ret > 0)
6028                         err |= LAST_ITEM;
6029                 return err;
6030         }
6031
6032         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
6033         isize = btrfs_inode_size(node, ii);
6034         nbytes = btrfs_inode_nbytes(node, ii);
6035         mode = btrfs_inode_mode(node, ii);
6036         dir = imode_to_type(mode) == BTRFS_FT_DIR;
6037         nlink = btrfs_inode_nlink(node, ii);
6038         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
6039
6040         while (1) {
6041                 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
6042                 ret = btrfs_next_item(root, path);
6043                 if (ret < 0) {
6044                         /* out will fill 'err' rusing current statistics */
6045                         goto out;
6046                 } else if (ret > 0) {
6047                         err |= LAST_ITEM;
6048                         goto out;
6049                 }
6050
6051                 node = path->nodes[0];
6052                 slot = path->slots[0];
6053                 btrfs_item_key_to_cpu(node, &key, slot);
6054                 if (key.objectid != inode_id)
6055                         goto out;
6056
6057                 switch (key.type) {
6058                 case BTRFS_INODE_REF_KEY:
6059                         ret = check_inode_ref(root, &key, path, namebuf,
6060                                               &name_len, &refs, mode);
6061                         err |= ret;
6062                         break;
6063                 case BTRFS_INODE_EXTREF_KEY:
6064                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
6065                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
6066                                         root->objectid, key.objectid,
6067                                         key.offset);
6068                         ret = check_inode_extref(root, &key, node, slot, &refs,
6069                                                  mode);
6070                         err |= ret;
6071                         break;
6072                 case BTRFS_DIR_ITEM_KEY:
6073                 case BTRFS_DIR_INDEX_KEY:
6074                         if (!dir) {
6075                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
6076                                         root->objectid, inode_id,
6077                                         imode_to_type(mode), key.objectid,
6078                                         key.offset);
6079                         }
6080                         ret = check_dir_item(root, &key, path, &size, ext_ref);
6081                         err |= ret;
6082                         break;
6083                 case BTRFS_EXTENT_DATA_KEY:
6084                         if (dir) {
6085                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
6086                                         root->objectid, inode_id, key.objectid,
6087                                         key.offset);
6088                         }
6089                         ret = check_file_extent(root, &key, node, slot,
6090                                                 nodatasum, &extent_size,
6091                                                 &extent_end);
6092                         err |= ret;
6093                         break;
6094                 case BTRFS_XATTR_ITEM_KEY:
6095                         break;
6096                 default:
6097                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
6098                               key.objectid, key.type, key.offset);
6099                 }
6100         }
6101
6102 out:
6103         if (err & LAST_ITEM) {
6104                 btrfs_release_path(path);
6105                 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
6106                 if (ret)
6107                         return err;
6108         }
6109
6110         /* verify INODE_ITEM nlink/isize/nbytes */
6111         if (dir) {
6112                 if (repair && (err & DIR_COUNT_AGAIN)) {
6113                         err &= ~DIR_COUNT_AGAIN;
6114                         count_dir_isize(root, inode_id, &size);
6115                 }
6116
6117                 if ((nlink != 1 || refs != 1) && repair) {
6118                         ret = repair_inode_nlinks_lowmem(root, path, inode_id,
6119                                 namebuf, name_len, refs, imode_to_type(mode),
6120                                 &nlink);
6121                 }
6122
6123                 if (nlink != 1) {
6124                         err |= LINK_COUNT_ERROR;
6125                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
6126                               root->objectid, inode_id, nlink);
6127                 }
6128
6129                 /*
6130                  * Just a warning, as dir inode nbytes is just an
6131                  * instructive value.
6132                  */
6133                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
6134                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
6135                                 root->objectid, inode_id,
6136                                 root->fs_info->nodesize);
6137                 }
6138
6139                 if (isize != size) {
6140                         if (repair)
6141                                 ret = repair_dir_isize_lowmem(root, path,
6142                                                               inode_id, size);
6143                         if (!repair || ret) {
6144                                 err |= ISIZE_ERROR;
6145                                 error(
6146                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
6147                                       root->objectid, inode_id, isize, size);
6148                         }
6149                 }
6150         } else {
6151                 if (nlink != refs) {
6152                         if (repair)
6153                                 ret = repair_inode_nlinks_lowmem(root, path,
6154                                          inode_id, namebuf, name_len, refs,
6155                                          imode_to_type(mode), &nlink);
6156                         if (!repair || ret) {
6157                                 err |= LINK_COUNT_ERROR;
6158                                 error(
6159                 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
6160                                       root->objectid, inode_id, nlink, refs);
6161                         }
6162                 } else if (!nlink) {
6163                         if (repair)
6164                                 ret = repair_inode_orphan_item_lowmem(root,
6165                                                               path, inode_id);
6166                         if (!repair || ret) {
6167                                 err |= ORPHAN_ITEM;
6168                                 error("root %llu INODE[%llu] is orphan item",
6169                                       root->objectid, inode_id);
6170                         }
6171                 }
6172
6173                 if (!nbytes && !no_holes && extent_end < isize) {
6174                         if (repair)
6175                                 ret = punch_extent_hole(root, inode_id,
6176                                                 extent_end, isize - extent_end);
6177                         if (!repair || ret) {
6178                                 err |= NBYTES_ERROR;
6179                                 error(
6180         "root %llu INODE[%llu] size %llu should have a file extent hole",
6181                                       root->objectid, inode_id, isize);
6182                         }
6183                 }
6184
6185                 if (nbytes != extent_size) {
6186                         if (repair)
6187                                 ret = repair_inode_nbytes_lowmem(root, path,
6188                                                          inode_id, extent_size);
6189                         if (!repair || ret) {
6190                                 err |= NBYTES_ERROR;
6191                                 error(
6192         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6193                                       root->objectid, inode_id, nbytes,
6194                                       extent_size);
6195                         }
6196                 }
6197         }
6198
6199         if (err & LAST_ITEM)
6200                 btrfs_next_item(root, path);
6201         return err;
6202 }
6203
6204 /*
6205  * Insert the missing inode item and inode ref.
6206  *
6207  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6208  * Root dir should be handled specially because root dir is the root of fs.
6209  *
6210  * returns err (>0 or 0) after repair
6211  */
6212 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6213 {
6214         struct btrfs_trans_handle *trans;
6215         struct btrfs_key key;
6216         struct btrfs_path path;
6217         int filetype = BTRFS_FT_DIR;
6218         int ret = 0;
6219
6220         btrfs_init_path(&path);
6221
6222         if (err & INODE_REF_MISSING) {
6223                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6224                 key.type = BTRFS_INODE_REF_KEY;
6225                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6226
6227                 trans = btrfs_start_transaction(root, 1);
6228                 if (IS_ERR(trans)) {
6229                         ret = PTR_ERR(trans);
6230                         goto out;
6231                 }
6232
6233                 btrfs_release_path(&path);
6234                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6235                 if (ret)
6236                         goto trans_fail;
6237
6238                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6239                                              BTRFS_FIRST_FREE_OBJECTID,
6240                                              BTRFS_FIRST_FREE_OBJECTID, 0);
6241                 if (ret)
6242                         goto trans_fail;
6243
6244                 printf("Add INODE_REF[%llu %llu] name %s\n",
6245                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6246                        "..");
6247                 err &= ~INODE_REF_MISSING;
6248 trans_fail:
6249                 if (ret)
6250                         error("fail to insert first inode's ref");
6251                 btrfs_commit_transaction(trans, root);
6252         }
6253
6254         if (err & INODE_ITEM_MISSING) {
6255                 ret = repair_inode_item_missing(root,
6256                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
6257                 if (ret)
6258                         goto out;
6259                 err &= ~INODE_ITEM_MISSING;
6260         }
6261 out:
6262         if (ret)
6263                 error("fail to repair first inode");
6264         btrfs_release_path(&path);
6265         return err;
6266 }
6267
6268 /*
6269  * check first root dir's inode_item and inode_ref
6270  *
6271  * returns 0 means no error
6272  * returns >0 means error
6273  * returns <0 means fatal error
6274  */
6275 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6276 {
6277         struct btrfs_path path;
6278         struct btrfs_key key;
6279         struct btrfs_inode_item *ii;
6280         u64 index;
6281         u32 mode;
6282         int err = 0;
6283         int ret;
6284
6285         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6286         key.type = BTRFS_INODE_ITEM_KEY;
6287         key.offset = 0;
6288
6289         /* For root being dropped, we don't need to check first inode */
6290         if (btrfs_root_refs(&root->root_item) == 0 &&
6291             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6292             BTRFS_FIRST_FREE_OBJECTID)
6293                 return 0;
6294
6295         btrfs_init_path(&path);
6296         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6297         if (ret < 0)
6298                 goto out;
6299         if (ret > 0) {
6300                 ret = 0;
6301                 err |= INODE_ITEM_MISSING;
6302         } else {
6303                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6304                                     struct btrfs_inode_item);
6305                 mode = btrfs_inode_mode(path.nodes[0], ii);
6306                 if (imode_to_type(mode) != BTRFS_FT_DIR)
6307                         err |= INODE_ITEM_MISMATCH;
6308         }
6309
6310         /* lookup first inode ref */
6311         key.offset = BTRFS_FIRST_FREE_OBJECTID;
6312         key.type = BTRFS_INODE_REF_KEY;
6313         /* special index value */
6314         index = 0;
6315
6316         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6317         if (ret < 0)
6318                 goto out;
6319         err |= ret;
6320
6321 out:
6322         btrfs_release_path(&path);
6323
6324         if (err && repair)
6325                 err = repair_fs_first_inode(root, err);
6326
6327         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6328                 error("root dir INODE_ITEM is %s",
6329                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6330         if (err & INODE_REF_MISSING)
6331                 error("root dir INODE_REF is missing");
6332
6333         return ret < 0 ? ret : err;
6334 }
6335
6336 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6337                                                 u64 parent, u64 root)
6338 {
6339         struct rb_node *node;
6340         struct tree_backref *back = NULL;
6341         struct tree_backref match = {
6342                 .node = {
6343                         .is_data = 0,
6344                 },
6345         };
6346
6347         if (parent) {
6348                 match.parent = parent;
6349                 match.node.full_backref = 1;
6350         } else {
6351                 match.root = root;
6352         }
6353
6354         node = rb_search(&rec->backref_tree, &match.node.node,
6355                          (rb_compare_keys)compare_extent_backref, NULL);
6356         if (node)
6357                 back = to_tree_backref(rb_node_to_extent_backref(node));
6358
6359         return back;
6360 }
6361
6362 static struct data_backref *find_data_backref(struct extent_record *rec,
6363                                                 u64 parent, u64 root,
6364                                                 u64 owner, u64 offset,
6365                                                 int found_ref,
6366                                                 u64 disk_bytenr, u64 bytes)
6367 {
6368         struct rb_node *node;
6369         struct data_backref *back = NULL;
6370         struct data_backref match = {
6371                 .node = {
6372                         .is_data = 1,
6373                 },
6374                 .owner = owner,
6375                 .offset = offset,
6376                 .bytes = bytes,
6377                 .found_ref = found_ref,
6378                 .disk_bytenr = disk_bytenr,
6379         };
6380
6381         if (parent) {
6382                 match.parent = parent;
6383                 match.node.full_backref = 1;
6384         } else {
6385                 match.root = root;
6386         }
6387
6388         node = rb_search(&rec->backref_tree, &match.node.node,
6389                          (rb_compare_keys)compare_extent_backref, NULL);
6390         if (node)
6391                 back = to_data_backref(rb_node_to_extent_backref(node));
6392
6393         return back;
6394 }
6395 /*
6396  * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6397  * blocks and integrity of fs tree items.
6398  *
6399  * @root:         the root of the tree to be checked.
6400  * @ext_ref       feature EXTENDED_IREF is enable or not.
6401  * @account       if NOT 0 means check the tree (including tree)'s treeblocks.
6402  *                otherwise means check fs tree(s) items relationship and
6403  *                @root MUST be a fs tree root.
6404  * Returns 0      represents OK.
6405  * Returns not 0  represents error.
6406  */
6407 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6408                             struct btrfs_root *root, unsigned int ext_ref,
6409                             int check_all)
6410
6411 {
6412         struct btrfs_path path;
6413         struct node_refs nrefs;
6414         struct btrfs_root_item *root_item = &root->root_item;
6415         int ret;
6416         int level;
6417         int err = 0;
6418
6419         memset(&nrefs, 0, sizeof(nrefs));
6420         if (!check_all) {
6421                 /*
6422                  * We need to manually check the first inode item (256)
6423                  * As the following traversal function will only start from
6424                  * the first inode item in the leaf, if inode item (256) is
6425                  * missing we will skip it forever.
6426                  */
6427                 ret = check_fs_first_inode(root, ext_ref);
6428                 if (ret < 0)
6429                         return ret;
6430         }
6431
6432
6433         level = btrfs_header_level(root->node);
6434         btrfs_init_path(&path);
6435
6436         if (btrfs_root_refs(root_item) > 0 ||
6437             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6438                 path.nodes[level] = root->node;
6439                 path.slots[level] = 0;
6440                 extent_buffer_get(root->node);
6441         } else {
6442                 struct btrfs_key key;
6443
6444                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6445                 level = root_item->drop_level;
6446                 path.lowest_level = level;
6447                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6448                 if (ret < 0)
6449                         goto out;
6450                 ret = 0;
6451         }
6452
6453         while (1) {
6454                 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6455                                         ext_ref, check_all);
6456
6457                 err |= !!ret;
6458
6459                 /* if ret is negative, walk shall stop */
6460                 if (ret < 0) {
6461                         ret = err;
6462                         break;
6463                 }
6464
6465                 ret = walk_up_tree_v2(root, &path, &level);
6466                 if (ret != 0) {
6467                         /* Normal exit, reset ret to err */
6468                         ret = err;
6469                         break;
6470                 }
6471         }
6472
6473 out:
6474         btrfs_release_path(&path);
6475         return ret;
6476 }
6477
6478 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info);
6479
6480 /*
6481  * Iterate all items in the tree and call check_inode_item() to check.
6482  *
6483  * @root:       the root of the tree to be checked.
6484  * @ext_ref:    the EXTENDED_IREF feature
6485  *
6486  * Return 0 if no error found.
6487  * Return <0 for error.
6488  */
6489 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6490 {
6491         return check_btrfs_root(NULL, root, ext_ref, 0);
6492 }
6493
6494 /*
6495  * Find the relative ref for root_ref and root_backref.
6496  *
6497  * @root:       the root of the root tree.
6498  * @ref_key:    the key of the root ref.
6499  *
6500  * Return 0 if no error occurred.
6501  */
6502 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6503                           struct extent_buffer *node, int slot)
6504 {
6505         struct btrfs_path path;
6506         struct btrfs_key key;
6507         struct btrfs_root_ref *ref;
6508         struct btrfs_root_ref *backref;
6509         char ref_name[BTRFS_NAME_LEN] = {0};
6510         char backref_name[BTRFS_NAME_LEN] = {0};
6511         u64 ref_dirid;
6512         u64 ref_seq;
6513         u32 ref_namelen;
6514         u64 backref_dirid;
6515         u64 backref_seq;
6516         u32 backref_namelen;
6517         u32 len;
6518         int ret;
6519         int err = 0;
6520
6521         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6522         ref_dirid = btrfs_root_ref_dirid(node, ref);
6523         ref_seq = btrfs_root_ref_sequence(node, ref);
6524         ref_namelen = btrfs_root_ref_name_len(node, ref);
6525
6526         if (ref_namelen <= BTRFS_NAME_LEN) {
6527                 len = ref_namelen;
6528         } else {
6529                 len = BTRFS_NAME_LEN;
6530                 warning("%s[%llu %llu] ref_name too long",
6531                         ref_key->type == BTRFS_ROOT_REF_KEY ?
6532                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6533                         ref_key->offset);
6534         }
6535         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6536
6537         /* Find relative root_ref */
6538         key.objectid = ref_key->offset;
6539         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6540         key.offset = ref_key->objectid;
6541
6542         btrfs_init_path(&path);
6543         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6544         if (ret) {
6545                 err |= ROOT_REF_MISSING;
6546                 error("%s[%llu %llu] couldn't find relative ref",
6547                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6548                       "ROOT_REF" : "ROOT_BACKREF",
6549                       ref_key->objectid, ref_key->offset);
6550                 goto out;
6551         }
6552
6553         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6554                                  struct btrfs_root_ref);
6555         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6556         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6557         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6558
6559         if (backref_namelen <= BTRFS_NAME_LEN) {
6560                 len = backref_namelen;
6561         } else {
6562                 len = BTRFS_NAME_LEN;
6563                 warning("%s[%llu %llu] ref_name too long",
6564                         key.type == BTRFS_ROOT_REF_KEY ?
6565                         "ROOT_REF" : "ROOT_BACKREF",
6566                         key.objectid, key.offset);
6567         }
6568         read_extent_buffer(path.nodes[0], backref_name,
6569                            (unsigned long)(backref + 1), len);
6570
6571         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6572             ref_namelen != backref_namelen ||
6573             strncmp(ref_name, backref_name, len)) {
6574                 err |= ROOT_REF_MISMATCH;
6575                 error("%s[%llu %llu] mismatch relative ref",
6576                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6577                       "ROOT_REF" : "ROOT_BACKREF",
6578                       ref_key->objectid, ref_key->offset);
6579         }
6580 out:
6581         btrfs_release_path(&path);
6582         return err;
6583 }
6584
6585 /*
6586  * Check all fs/file tree in low_memory mode.
6587  *
6588  * 1. for fs tree root item, call check_fs_root_v2()
6589  * 2. for fs tree root ref/backref, call check_root_ref()
6590  *
6591  * Return 0 if no error occurred.
6592  */
6593 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6594 {
6595         struct btrfs_root *tree_root = fs_info->tree_root;
6596         struct btrfs_root *cur_root = NULL;
6597         struct btrfs_path path;
6598         struct btrfs_key key;
6599         struct extent_buffer *node;
6600         unsigned int ext_ref;
6601         int slot;
6602         int ret;
6603         int err = 0;
6604
6605         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6606
6607         btrfs_init_path(&path);
6608         key.objectid = BTRFS_FS_TREE_OBJECTID;
6609         key.offset = 0;
6610         key.type = BTRFS_ROOT_ITEM_KEY;
6611
6612         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6613         if (ret < 0) {
6614                 err = ret;
6615                 goto out;
6616         } else if (ret > 0) {
6617                 err = -ENOENT;
6618                 goto out;
6619         }
6620
6621         while (1) {
6622                 node = path.nodes[0];
6623                 slot = path.slots[0];
6624                 btrfs_item_key_to_cpu(node, &key, slot);
6625                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6626                         goto out;
6627                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6628                     fs_root_objectid(key.objectid)) {
6629                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6630                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6631                                                                        &key);
6632                         } else {
6633                                 key.offset = (u64)-1;
6634                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6635                         }
6636
6637                         if (IS_ERR(cur_root)) {
6638                                 error("Fail to read fs/subvol tree: %lld",
6639                                       key.objectid);
6640                                 err = -EIO;
6641                                 goto next;
6642                         }
6643
6644                         ret = check_fs_root_v2(cur_root, ext_ref);
6645                         err |= ret;
6646
6647                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6648                                 btrfs_free_fs_root(cur_root);
6649                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6650                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6651                         ret = check_root_ref(tree_root, &key, node, slot);
6652                         err |= ret;
6653                 }
6654 next:
6655                 ret = btrfs_next_item(tree_root, &path);
6656                 if (ret > 0)
6657                         goto out;
6658                 if (ret < 0) {
6659                         err = ret;
6660                         goto out;
6661                 }
6662         }
6663
6664 out:
6665         btrfs_release_path(&path);
6666         return err;
6667 }
6668
6669 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6670                           struct cache_tree *root_cache)
6671 {
6672         int ret;
6673
6674         if (!ctx.progress_enabled)
6675                 fprintf(stderr, "checking fs roots\n");
6676         if (check_mode == CHECK_MODE_LOWMEM)
6677                 ret = check_fs_roots_v2(fs_info);
6678         else
6679                 ret = check_fs_roots(fs_info, root_cache);
6680
6681         return ret;
6682 }
6683
6684 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6685 {
6686         struct extent_backref *back, *tmp;
6687         struct tree_backref *tback;
6688         struct data_backref *dback;
6689         u64 found = 0;
6690         int err = 0;
6691
6692         rbtree_postorder_for_each_entry_safe(back, tmp,
6693                                              &rec->backref_tree, node) {
6694                 if (!back->found_extent_tree) {
6695                         err = 1;
6696                         if (!print_errs)
6697                                 goto out;
6698                         if (back->is_data) {
6699                                 dback = to_data_backref(back);
6700                                 fprintf(stderr, "Data backref %llu %s %llu"
6701                                         " owner %llu offset %llu num_refs %lu"
6702                                         " not found in extent tree\n",
6703                                         (unsigned long long)rec->start,
6704                                         back->full_backref ?
6705                                         "parent" : "root",
6706                                         back->full_backref ?
6707                                         (unsigned long long)dback->parent:
6708                                         (unsigned long long)dback->root,
6709                                         (unsigned long long)dback->owner,
6710                                         (unsigned long long)dback->offset,
6711                                         (unsigned long)dback->num_refs);
6712                         } else {
6713                                 tback = to_tree_backref(back);
6714                                 fprintf(stderr, "Tree backref %llu parent %llu"
6715                                         " root %llu not found in extent tree\n",
6716                                         (unsigned long long)rec->start,
6717                                         (unsigned long long)tback->parent,
6718                                         (unsigned long long)tback->root);
6719                         }
6720                 }
6721                 if (!back->is_data && !back->found_ref) {
6722                         err = 1;
6723                         if (!print_errs)
6724                                 goto out;
6725                         tback = to_tree_backref(back);
6726                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6727                                 (unsigned long long)rec->start,
6728                                 back->full_backref ? "parent" : "root",
6729                                 back->full_backref ?
6730                                 (unsigned long long)tback->parent :
6731                                 (unsigned long long)tback->root, back);
6732                 }
6733                 if (back->is_data) {
6734                         dback = to_data_backref(back);
6735                         if (dback->found_ref != dback->num_refs) {
6736                                 err = 1;
6737                                 if (!print_errs)
6738                                         goto out;
6739                                 fprintf(stderr, "Incorrect local backref count"
6740                                         " on %llu %s %llu owner %llu"
6741                                         " offset %llu found %u wanted %u back %p\n",
6742                                         (unsigned long long)rec->start,
6743                                         back->full_backref ?
6744                                         "parent" : "root",
6745                                         back->full_backref ?
6746                                         (unsigned long long)dback->parent:
6747                                         (unsigned long long)dback->root,
6748                                         (unsigned long long)dback->owner,
6749                                         (unsigned long long)dback->offset,
6750                                         dback->found_ref, dback->num_refs, back);
6751                         }
6752                         if (dback->disk_bytenr != rec->start) {
6753                                 err = 1;
6754                                 if (!print_errs)
6755                                         goto out;
6756                                 fprintf(stderr, "Backref disk bytenr does not"
6757                                         " match extent record, bytenr=%llu, "
6758                                         "ref bytenr=%llu\n",
6759                                         (unsigned long long)rec->start,
6760                                         (unsigned long long)dback->disk_bytenr);
6761                         }
6762
6763                         if (dback->bytes != rec->nr) {
6764                                 err = 1;
6765                                 if (!print_errs)
6766                                         goto out;
6767                                 fprintf(stderr, "Backref bytes do not match "
6768                                         "extent backref, bytenr=%llu, ref "
6769                                         "bytes=%llu, backref bytes=%llu\n",
6770                                         (unsigned long long)rec->start,
6771                                         (unsigned long long)rec->nr,
6772                                         (unsigned long long)dback->bytes);
6773                         }
6774                 }
6775                 if (!back->is_data) {
6776                         found += 1;
6777                 } else {
6778                         dback = to_data_backref(back);
6779                         found += dback->found_ref;
6780                 }
6781         }
6782         if (found != rec->refs) {
6783                 err = 1;
6784                 if (!print_errs)
6785                         goto out;
6786                 fprintf(stderr, "Incorrect global backref count "
6787                         "on %llu found %llu wanted %llu\n",
6788                         (unsigned long long)rec->start,
6789                         (unsigned long long)found,
6790                         (unsigned long long)rec->refs);
6791         }
6792 out:
6793         return err;
6794 }
6795
6796 static void __free_one_backref(struct rb_node *node)
6797 {
6798         struct extent_backref *back = rb_node_to_extent_backref(node);
6799
6800         free(back);
6801 }
6802
6803 static void free_all_extent_backrefs(struct extent_record *rec)
6804 {
6805         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6806 }
6807
6808 static void free_extent_record_cache(struct cache_tree *extent_cache)
6809 {
6810         struct cache_extent *cache;
6811         struct extent_record *rec;
6812
6813         while (1) {
6814                 cache = first_cache_extent(extent_cache);
6815                 if (!cache)
6816                         break;
6817                 rec = container_of(cache, struct extent_record, cache);
6818                 remove_cache_extent(extent_cache, cache);
6819                 free_all_extent_backrefs(rec);
6820                 free(rec);
6821         }
6822 }
6823
6824 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6825                                  struct extent_record *rec)
6826 {
6827         if (rec->content_checked && rec->owner_ref_checked &&
6828             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6829             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6830             !rec->bad_full_backref && !rec->crossing_stripes &&
6831             !rec->wrong_chunk_type) {
6832                 remove_cache_extent(extent_cache, &rec->cache);
6833                 free_all_extent_backrefs(rec);
6834                 list_del_init(&rec->list);
6835                 free(rec);
6836         }
6837         return 0;
6838 }
6839
6840 static int check_owner_ref(struct btrfs_root *root,
6841                             struct extent_record *rec,
6842                             struct extent_buffer *buf)
6843 {
6844         struct extent_backref *node, *tmp;
6845         struct tree_backref *back;
6846         struct btrfs_root *ref_root;
6847         struct btrfs_key key;
6848         struct btrfs_path path;
6849         struct extent_buffer *parent;
6850         int level;
6851         int found = 0;
6852         int ret;
6853
6854         rbtree_postorder_for_each_entry_safe(node, tmp,
6855                                              &rec->backref_tree, node) {
6856                 if (node->is_data)
6857                         continue;
6858                 if (!node->found_ref)
6859                         continue;
6860                 if (node->full_backref)
6861                         continue;
6862                 back = to_tree_backref(node);
6863                 if (btrfs_header_owner(buf) == back->root)
6864                         return 0;
6865         }
6866         BUG_ON(rec->is_root);
6867
6868         /* try to find the block by search corresponding fs tree */
6869         key.objectid = btrfs_header_owner(buf);
6870         key.type = BTRFS_ROOT_ITEM_KEY;
6871         key.offset = (u64)-1;
6872
6873         ref_root = btrfs_read_fs_root(root->fs_info, &key);
6874         if (IS_ERR(ref_root))
6875                 return 1;
6876
6877         level = btrfs_header_level(buf);
6878         if (level == 0)
6879                 btrfs_item_key_to_cpu(buf, &key, 0);
6880         else
6881                 btrfs_node_key_to_cpu(buf, &key, 0);
6882
6883         btrfs_init_path(&path);
6884         path.lowest_level = level + 1;
6885         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6886         if (ret < 0)
6887                 return 0;
6888
6889         parent = path.nodes[level + 1];
6890         if (parent && buf->start == btrfs_node_blockptr(parent,
6891                                                         path.slots[level + 1]))
6892                 found = 1;
6893
6894         btrfs_release_path(&path);
6895         return found ? 0 : 1;
6896 }
6897
6898 static int is_extent_tree_record(struct extent_record *rec)
6899 {
6900         struct extent_backref *node, *tmp;
6901         struct tree_backref *back;
6902         int is_extent = 0;
6903
6904         rbtree_postorder_for_each_entry_safe(node, tmp,
6905                                              &rec->backref_tree, node) {
6906                 if (node->is_data)
6907                         return 0;
6908                 back = to_tree_backref(node);
6909                 if (node->full_backref)
6910                         return 0;
6911                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6912                         is_extent = 1;
6913         }
6914         return is_extent;
6915 }
6916
6917
6918 static int record_bad_block_io(struct btrfs_fs_info *info,
6919                                struct cache_tree *extent_cache,
6920                                u64 start, u64 len)
6921 {
6922         struct extent_record *rec;
6923         struct cache_extent *cache;
6924         struct btrfs_key key;
6925
6926         cache = lookup_cache_extent(extent_cache, start, len);
6927         if (!cache)
6928                 return 0;
6929
6930         rec = container_of(cache, struct extent_record, cache);
6931         if (!is_extent_tree_record(rec))
6932                 return 0;
6933
6934         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6935         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6936 }
6937
6938 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6939                        struct extent_buffer *buf, int slot)
6940 {
6941         if (btrfs_header_level(buf)) {
6942                 struct btrfs_key_ptr ptr1, ptr2;
6943
6944                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6945                                    sizeof(struct btrfs_key_ptr));
6946                 read_extent_buffer(buf, &ptr2,
6947                                    btrfs_node_key_ptr_offset(slot + 1),
6948                                    sizeof(struct btrfs_key_ptr));
6949                 write_extent_buffer(buf, &ptr1,
6950                                     btrfs_node_key_ptr_offset(slot + 1),
6951                                     sizeof(struct btrfs_key_ptr));
6952                 write_extent_buffer(buf, &ptr2,
6953                                     btrfs_node_key_ptr_offset(slot),
6954                                     sizeof(struct btrfs_key_ptr));
6955                 if (slot == 0) {
6956                         struct btrfs_disk_key key;
6957                         btrfs_node_key(buf, &key, 0);
6958                         btrfs_fixup_low_keys(root, path, &key,
6959                                              btrfs_header_level(buf) + 1);
6960                 }
6961         } else {
6962                 struct btrfs_item *item1, *item2;
6963                 struct btrfs_key k1, k2;
6964                 char *item1_data, *item2_data;
6965                 u32 item1_offset, item2_offset, item1_size, item2_size;
6966
6967                 item1 = btrfs_item_nr(slot);
6968                 item2 = btrfs_item_nr(slot + 1);
6969                 btrfs_item_key_to_cpu(buf, &k1, slot);
6970                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6971                 item1_offset = btrfs_item_offset(buf, item1);
6972                 item2_offset = btrfs_item_offset(buf, item2);
6973                 item1_size = btrfs_item_size(buf, item1);
6974                 item2_size = btrfs_item_size(buf, item2);
6975
6976                 item1_data = malloc(item1_size);
6977                 if (!item1_data)
6978                         return -ENOMEM;
6979                 item2_data = malloc(item2_size);
6980                 if (!item2_data) {
6981                         free(item1_data);
6982                         return -ENOMEM;
6983                 }
6984
6985                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6986                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6987
6988                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6989                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6990                 free(item1_data);
6991                 free(item2_data);
6992
6993                 btrfs_set_item_offset(buf, item1, item2_offset);
6994                 btrfs_set_item_offset(buf, item2, item1_offset);
6995                 btrfs_set_item_size(buf, item1, item2_size);
6996                 btrfs_set_item_size(buf, item2, item1_size);
6997
6998                 path->slots[0] = slot;
6999                 btrfs_set_item_key_unsafe(root, path, &k2);
7000                 path->slots[0] = slot + 1;
7001                 btrfs_set_item_key_unsafe(root, path, &k1);
7002         }
7003         return 0;
7004 }
7005
7006 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
7007 {
7008         struct extent_buffer *buf;
7009         struct btrfs_key k1, k2;
7010         int i;
7011         int level = path->lowest_level;
7012         int ret = -EIO;
7013
7014         buf = path->nodes[level];
7015         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
7016                 if (level) {
7017                         btrfs_node_key_to_cpu(buf, &k1, i);
7018                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
7019                 } else {
7020                         btrfs_item_key_to_cpu(buf, &k1, i);
7021                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
7022                 }
7023                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
7024                         continue;
7025                 ret = swap_values(root, path, buf, i);
7026                 if (ret)
7027                         break;
7028                 btrfs_mark_buffer_dirty(buf);
7029                 i = 0;
7030         }
7031         return ret;
7032 }
7033
7034 static int delete_bogus_item(struct btrfs_root *root,
7035                              struct btrfs_path *path,
7036                              struct extent_buffer *buf, int slot)
7037 {
7038         struct btrfs_key key;
7039         int nritems = btrfs_header_nritems(buf);
7040
7041         btrfs_item_key_to_cpu(buf, &key, slot);
7042
7043         /* These are all the keys we can deal with missing. */
7044         if (key.type != BTRFS_DIR_INDEX_KEY &&
7045             key.type != BTRFS_EXTENT_ITEM_KEY &&
7046             key.type != BTRFS_METADATA_ITEM_KEY &&
7047             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7048             key.type != BTRFS_EXTENT_DATA_REF_KEY)
7049                 return -1;
7050
7051         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
7052                (unsigned long long)key.objectid, key.type,
7053                (unsigned long long)key.offset, slot, buf->start);
7054         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
7055                               btrfs_item_nr_offset(slot + 1),
7056                               sizeof(struct btrfs_item) *
7057                               (nritems - slot - 1));
7058         btrfs_set_header_nritems(buf, nritems - 1);
7059         if (slot == 0) {
7060                 struct btrfs_disk_key disk_key;
7061
7062                 btrfs_item_key(buf, &disk_key, 0);
7063                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
7064         }
7065         btrfs_mark_buffer_dirty(buf);
7066         return 0;
7067 }
7068
7069 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
7070 {
7071         struct extent_buffer *buf;
7072         int i;
7073         int ret = 0;
7074
7075         /* We should only get this for leaves */
7076         BUG_ON(path->lowest_level);
7077         buf = path->nodes[0];
7078 again:
7079         for (i = 0; i < btrfs_header_nritems(buf); i++) {
7080                 unsigned int shift = 0, offset;
7081
7082                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
7083                     BTRFS_LEAF_DATA_SIZE(root)) {
7084                         if (btrfs_item_end_nr(buf, i) >
7085                             BTRFS_LEAF_DATA_SIZE(root)) {
7086                                 ret = delete_bogus_item(root, path, buf, i);
7087                                 if (!ret)
7088                                         goto again;
7089                                 fprintf(stderr, "item is off the end of the "
7090                                         "leaf, can't fix\n");
7091                                 ret = -EIO;
7092                                 break;
7093                         }
7094                         shift = BTRFS_LEAF_DATA_SIZE(root) -
7095                                 btrfs_item_end_nr(buf, i);
7096                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
7097                            btrfs_item_offset_nr(buf, i - 1)) {
7098                         if (btrfs_item_end_nr(buf, i) >
7099                             btrfs_item_offset_nr(buf, i - 1)) {
7100                                 ret = delete_bogus_item(root, path, buf, i);
7101                                 if (!ret)
7102                                         goto again;
7103                                 fprintf(stderr, "items overlap, can't fix\n");
7104                                 ret = -EIO;
7105                                 break;
7106                         }
7107                         shift = btrfs_item_offset_nr(buf, i - 1) -
7108                                 btrfs_item_end_nr(buf, i);
7109                 }
7110                 if (!shift)
7111                         continue;
7112
7113                 printf("Shifting item nr %d by %u bytes in block %llu\n",
7114                        i, shift, (unsigned long long)buf->start);
7115                 offset = btrfs_item_offset_nr(buf, i);
7116                 memmove_extent_buffer(buf,
7117                                       btrfs_leaf_data(buf) + offset + shift,
7118                                       btrfs_leaf_data(buf) + offset,
7119                                       btrfs_item_size_nr(buf, i));
7120                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
7121                                       offset + shift);
7122                 btrfs_mark_buffer_dirty(buf);
7123         }
7124
7125         /*
7126          * We may have moved things, in which case we want to exit so we don't
7127          * write those changes out.  Once we have proper abort functionality in
7128          * progs this can be changed to something nicer.
7129          */
7130         BUG_ON(ret);
7131         return ret;
7132 }
7133
7134 /*
7135  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
7136  * then just return -EIO.
7137  */
7138 static int try_to_fix_bad_block(struct btrfs_root *root,
7139                                 struct extent_buffer *buf,
7140                                 enum btrfs_tree_block_status status)
7141 {
7142         struct btrfs_trans_handle *trans;
7143         struct ulist *roots;
7144         struct ulist_node *node;
7145         struct btrfs_root *search_root;
7146         struct btrfs_path path;
7147         struct ulist_iterator iter;
7148         struct btrfs_key root_key, key;
7149         int ret;
7150
7151         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
7152             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7153                 return -EIO;
7154
7155         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
7156         if (ret)
7157                 return -EIO;
7158
7159         btrfs_init_path(&path);
7160         ULIST_ITER_INIT(&iter);
7161         while ((node = ulist_next(roots, &iter))) {
7162                 root_key.objectid = node->val;
7163                 root_key.type = BTRFS_ROOT_ITEM_KEY;
7164                 root_key.offset = (u64)-1;
7165
7166                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
7167                 if (IS_ERR(root)) {
7168                         ret = -EIO;
7169                         break;
7170                 }
7171
7172
7173                 trans = btrfs_start_transaction(search_root, 0);
7174                 if (IS_ERR(trans)) {
7175                         ret = PTR_ERR(trans);
7176                         break;
7177                 }
7178
7179                 path.lowest_level = btrfs_header_level(buf);
7180                 path.skip_check_block = 1;
7181                 if (path.lowest_level)
7182                         btrfs_node_key_to_cpu(buf, &key, 0);
7183                 else
7184                         btrfs_item_key_to_cpu(buf, &key, 0);
7185                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7186                 if (ret) {
7187                         ret = -EIO;
7188                         btrfs_commit_transaction(trans, search_root);
7189                         break;
7190                 }
7191                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7192                         ret = fix_key_order(search_root, &path);
7193                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7194                         ret = fix_item_offset(search_root, &path);
7195                 if (ret) {
7196                         btrfs_commit_transaction(trans, search_root);
7197                         break;
7198                 }
7199                 btrfs_release_path(&path);
7200                 btrfs_commit_transaction(trans, search_root);
7201         }
7202         ulist_free(roots);
7203         btrfs_release_path(&path);
7204         return ret;
7205 }
7206
7207 static int check_block(struct btrfs_root *root,
7208                        struct cache_tree *extent_cache,
7209                        struct extent_buffer *buf, u64 flags)
7210 {
7211         struct extent_record *rec;
7212         struct cache_extent *cache;
7213         struct btrfs_key key;
7214         enum btrfs_tree_block_status status;
7215         int ret = 0;
7216         int level;
7217
7218         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7219         if (!cache)
7220                 return 1;
7221         rec = container_of(cache, struct extent_record, cache);
7222         rec->generation = btrfs_header_generation(buf);
7223
7224         level = btrfs_header_level(buf);
7225         if (btrfs_header_nritems(buf) > 0) {
7226
7227                 if (level == 0)
7228                         btrfs_item_key_to_cpu(buf, &key, 0);
7229                 else
7230                         btrfs_node_key_to_cpu(buf, &key, 0);
7231
7232                 rec->info_objectid = key.objectid;
7233         }
7234         rec->info_level = level;
7235
7236         if (btrfs_is_leaf(buf))
7237                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7238         else
7239                 status = btrfs_check_node(root, &rec->parent_key, buf);
7240
7241         if (status != BTRFS_TREE_BLOCK_CLEAN) {
7242                 if (repair)
7243                         status = try_to_fix_bad_block(root, buf, status);
7244                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7245                         ret = -EIO;
7246                         fprintf(stderr, "bad block %llu\n",
7247                                 (unsigned long long)buf->start);
7248                 } else {
7249                         /*
7250                          * Signal to callers we need to start the scan over
7251                          * again since we'll have cowed blocks.
7252                          */
7253                         ret = -EAGAIN;
7254                 }
7255         } else {
7256                 rec->content_checked = 1;
7257                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7258                         rec->owner_ref_checked = 1;
7259                 else {
7260                         ret = check_owner_ref(root, rec, buf);
7261                         if (!ret)
7262                                 rec->owner_ref_checked = 1;
7263                 }
7264         }
7265         if (!ret)
7266                 maybe_free_extent_rec(extent_cache, rec);
7267         return ret;
7268 }
7269
7270 #if 0
7271 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7272                                                 u64 parent, u64 root)
7273 {
7274         struct list_head *cur = rec->backrefs.next;
7275         struct extent_backref *node;
7276         struct tree_backref *back;
7277
7278         while(cur != &rec->backrefs) {
7279                 node = to_extent_backref(cur);
7280                 cur = cur->next;
7281                 if (node->is_data)
7282                         continue;
7283                 back = to_tree_backref(node);
7284                 if (parent > 0) {
7285                         if (!node->full_backref)
7286                                 continue;
7287                         if (parent == back->parent)
7288                                 return back;
7289                 } else {
7290                         if (node->full_backref)
7291                                 continue;
7292                         if (back->root == root)
7293                                 return back;
7294                 }
7295         }
7296         return NULL;
7297 }
7298 #endif
7299
7300 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7301                                                 u64 parent, u64 root)
7302 {
7303         struct tree_backref *ref = malloc(sizeof(*ref));
7304
7305         if (!ref)
7306                 return NULL;
7307         memset(&ref->node, 0, sizeof(ref->node));
7308         if (parent > 0) {
7309                 ref->parent = parent;
7310                 ref->node.full_backref = 1;
7311         } else {
7312                 ref->root = root;
7313                 ref->node.full_backref = 0;
7314         }
7315
7316         return ref;
7317 }
7318
7319 #if 0
7320 static struct data_backref *find_data_backref(struct extent_record *rec,
7321                                                 u64 parent, u64 root,
7322                                                 u64 owner, u64 offset,
7323                                                 int found_ref,
7324                                                 u64 disk_bytenr, u64 bytes)
7325 {
7326         struct list_head *cur = rec->backrefs.next;
7327         struct extent_backref *node;
7328         struct data_backref *back;
7329
7330         while(cur != &rec->backrefs) {
7331                 node = to_extent_backref(cur);
7332                 cur = cur->next;
7333                 if (!node->is_data)
7334                         continue;
7335                 back = to_data_backref(node);
7336                 if (parent > 0) {
7337                         if (!node->full_backref)
7338                                 continue;
7339                         if (parent == back->parent)
7340                                 return back;
7341                 } else {
7342                         if (node->full_backref)
7343                                 continue;
7344                         if (back->root == root && back->owner == owner &&
7345                             back->offset == offset) {
7346                                 if (found_ref && node->found_ref &&
7347                                     (back->bytes != bytes ||
7348                                     back->disk_bytenr != disk_bytenr))
7349                                         continue;
7350                                 return back;
7351                         }
7352                 }
7353         }
7354         return NULL;
7355 }
7356 #endif
7357
7358 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7359                                                 u64 parent, u64 root,
7360                                                 u64 owner, u64 offset,
7361                                                 u64 max_size)
7362 {
7363         struct data_backref *ref = malloc(sizeof(*ref));
7364
7365         if (!ref)
7366                 return NULL;
7367         memset(&ref->node, 0, sizeof(ref->node));
7368         ref->node.is_data = 1;
7369
7370         if (parent > 0) {
7371                 ref->parent = parent;
7372                 ref->owner = 0;
7373                 ref->offset = 0;
7374                 ref->node.full_backref = 1;
7375         } else {
7376                 ref->root = root;
7377                 ref->owner = owner;
7378                 ref->offset = offset;
7379                 ref->node.full_backref = 0;
7380         }
7381         ref->bytes = max_size;
7382         ref->found_ref = 0;
7383         ref->num_refs = 0;
7384         if (max_size > rec->max_size)
7385                 rec->max_size = max_size;
7386         return ref;
7387 }
7388
7389 /* Check if the type of extent matches with its chunk */
7390 static void check_extent_type(struct extent_record *rec)
7391 {
7392         struct btrfs_block_group_cache *bg_cache;
7393
7394         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7395         if (!bg_cache)
7396                 return;
7397
7398         /* data extent, check chunk directly*/
7399         if (!rec->metadata) {
7400                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7401                         rec->wrong_chunk_type = 1;
7402                 return;
7403         }
7404
7405         /* metadata extent, check the obvious case first */
7406         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7407                                  BTRFS_BLOCK_GROUP_METADATA))) {
7408                 rec->wrong_chunk_type = 1;
7409                 return;
7410         }
7411
7412         /*
7413          * Check SYSTEM extent, as it's also marked as metadata, we can only
7414          * make sure it's a SYSTEM extent by its backref
7415          */
7416         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7417                 struct extent_backref *node;
7418                 struct tree_backref *tback;
7419                 u64 bg_type;
7420
7421                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7422                 if (node->is_data) {
7423                         /* tree block shouldn't have data backref */
7424                         rec->wrong_chunk_type = 1;
7425                         return;
7426                 }
7427                 tback = container_of(node, struct tree_backref, node);
7428
7429                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7430                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7431                 else
7432                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
7433                 if (!(bg_cache->flags & bg_type))
7434                         rec->wrong_chunk_type = 1;
7435         }
7436 }
7437
7438 /*
7439  * Allocate a new extent record, fill default values from @tmpl and insert int
7440  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7441  * the cache, otherwise it fails.
7442  */
7443 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7444                 struct extent_record *tmpl)
7445 {
7446         struct extent_record *rec;
7447         int ret = 0;
7448
7449         BUG_ON(tmpl->max_size == 0);
7450         rec = malloc(sizeof(*rec));
7451         if (!rec)
7452                 return -ENOMEM;
7453         rec->start = tmpl->start;
7454         rec->max_size = tmpl->max_size;
7455         rec->nr = max(tmpl->nr, tmpl->max_size);
7456         rec->found_rec = tmpl->found_rec;
7457         rec->content_checked = tmpl->content_checked;
7458         rec->owner_ref_checked = tmpl->owner_ref_checked;
7459         rec->num_duplicates = 0;
7460         rec->metadata = tmpl->metadata;
7461         rec->flag_block_full_backref = FLAG_UNSET;
7462         rec->bad_full_backref = 0;
7463         rec->crossing_stripes = 0;
7464         rec->wrong_chunk_type = 0;
7465         rec->is_root = tmpl->is_root;
7466         rec->refs = tmpl->refs;
7467         rec->extent_item_refs = tmpl->extent_item_refs;
7468         rec->parent_generation = tmpl->parent_generation;
7469         INIT_LIST_HEAD(&rec->backrefs);
7470         INIT_LIST_HEAD(&rec->dups);
7471         INIT_LIST_HEAD(&rec->list);
7472         rec->backref_tree = RB_ROOT;
7473         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7474         rec->cache.start = tmpl->start;
7475         rec->cache.size = tmpl->nr;
7476         ret = insert_cache_extent(extent_cache, &rec->cache);
7477         if (ret) {
7478                 free(rec);
7479                 return ret;
7480         }
7481         bytes_used += rec->nr;
7482
7483         if (tmpl->metadata)
7484                 rec->crossing_stripes = check_crossing_stripes(global_info,
7485                                 rec->start, global_info->nodesize);
7486         check_extent_type(rec);
7487         return ret;
7488 }
7489
7490 /*
7491  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7492  * some are hints:
7493  * - refs              - if found, increase refs
7494  * - is_root           - if found, set
7495  * - content_checked   - if found, set
7496  * - owner_ref_checked - if found, set
7497  *
7498  * If not found, create a new one, initialize and insert.
7499  */
7500 static int add_extent_rec(struct cache_tree *extent_cache,
7501                 struct extent_record *tmpl)
7502 {
7503         struct extent_record *rec;
7504         struct cache_extent *cache;
7505         int ret = 0;
7506         int dup = 0;
7507
7508         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7509         if (cache) {
7510                 rec = container_of(cache, struct extent_record, cache);
7511                 if (tmpl->refs)
7512                         rec->refs++;
7513                 if (rec->nr == 1)
7514                         rec->nr = max(tmpl->nr, tmpl->max_size);
7515
7516                 /*
7517                  * We need to make sure to reset nr to whatever the extent
7518                  * record says was the real size, this way we can compare it to
7519                  * the backrefs.
7520                  */
7521                 if (tmpl->found_rec) {
7522                         if (tmpl->start != rec->start || rec->found_rec) {
7523                                 struct extent_record *tmp;
7524
7525                                 dup = 1;
7526                                 if (list_empty(&rec->list))
7527                                         list_add_tail(&rec->list,
7528                                                       &duplicate_extents);
7529
7530                                 /*
7531                                  * We have to do this song and dance in case we
7532                                  * find an extent record that falls inside of
7533                                  * our current extent record but does not have
7534                                  * the same objectid.
7535                                  */
7536                                 tmp = malloc(sizeof(*tmp));
7537                                 if (!tmp)
7538                                         return -ENOMEM;
7539                                 tmp->start = tmpl->start;
7540                                 tmp->max_size = tmpl->max_size;
7541                                 tmp->nr = tmpl->nr;
7542                                 tmp->found_rec = 1;
7543                                 tmp->metadata = tmpl->metadata;
7544                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7545                                 INIT_LIST_HEAD(&tmp->list);
7546                                 list_add_tail(&tmp->list, &rec->dups);
7547                                 rec->num_duplicates++;
7548                         } else {
7549                                 rec->nr = tmpl->nr;
7550                                 rec->found_rec = 1;
7551                         }
7552                 }
7553
7554                 if (tmpl->extent_item_refs && !dup) {
7555                         if (rec->extent_item_refs) {
7556                                 fprintf(stderr, "block %llu rec "
7557                                         "extent_item_refs %llu, passed %llu\n",
7558                                         (unsigned long long)tmpl->start,
7559                                         (unsigned long long)
7560                                                         rec->extent_item_refs,
7561                                         (unsigned long long)tmpl->extent_item_refs);
7562                         }
7563                         rec->extent_item_refs = tmpl->extent_item_refs;
7564                 }
7565                 if (tmpl->is_root)
7566                         rec->is_root = 1;
7567                 if (tmpl->content_checked)
7568                         rec->content_checked = 1;
7569                 if (tmpl->owner_ref_checked)
7570                         rec->owner_ref_checked = 1;
7571                 memcpy(&rec->parent_key, &tmpl->parent_key,
7572                                 sizeof(tmpl->parent_key));
7573                 if (tmpl->parent_generation)
7574                         rec->parent_generation = tmpl->parent_generation;
7575                 if (rec->max_size < tmpl->max_size)
7576                         rec->max_size = tmpl->max_size;
7577
7578                 /*
7579                  * A metadata extent can't cross stripe_len boundary, otherwise
7580                  * kernel scrub won't be able to handle it.
7581                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7582                  * it.
7583                  */
7584                 if (tmpl->metadata)
7585                         rec->crossing_stripes = check_crossing_stripes(
7586                                         global_info, rec->start,
7587                                         global_info->nodesize);
7588                 check_extent_type(rec);
7589                 maybe_free_extent_rec(extent_cache, rec);
7590                 return ret;
7591         }
7592
7593         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7594
7595         return ret;
7596 }
7597
7598 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7599                             u64 parent, u64 root, int found_ref)
7600 {
7601         struct extent_record *rec;
7602         struct tree_backref *back;
7603         struct cache_extent *cache;
7604         int ret;
7605         bool insert = false;
7606
7607         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7608         if (!cache) {
7609                 struct extent_record tmpl;
7610
7611                 memset(&tmpl, 0, sizeof(tmpl));
7612                 tmpl.start = bytenr;
7613                 tmpl.nr = 1;
7614                 tmpl.metadata = 1;
7615                 tmpl.max_size = 1;
7616
7617                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7618                 if (ret)
7619                         return ret;
7620
7621                 /* really a bug in cache_extent implement now */
7622                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7623                 if (!cache)
7624                         return -ENOENT;
7625         }
7626
7627         rec = container_of(cache, struct extent_record, cache);
7628         if (rec->start != bytenr) {
7629                 /*
7630                  * Several cause, from unaligned bytenr to over lapping extents
7631                  */
7632                 return -EEXIST;
7633         }
7634
7635         back = find_tree_backref(rec, parent, root);
7636         if (!back) {
7637                 back = alloc_tree_backref(rec, parent, root);
7638                 if (!back)
7639                         return -ENOMEM;
7640                 insert = true;
7641         }
7642
7643         if (found_ref) {
7644                 if (back->node.found_ref) {
7645                         fprintf(stderr, "Extent back ref already exists "
7646                                 "for %llu parent %llu root %llu \n",
7647                                 (unsigned long long)bytenr,
7648                                 (unsigned long long)parent,
7649                                 (unsigned long long)root);
7650                 }
7651                 back->node.found_ref = 1;
7652         } else {
7653                 if (back->node.found_extent_tree) {
7654                         fprintf(stderr, "Extent back ref already exists "
7655                                 "for %llu parent %llu root %llu \n",
7656                                 (unsigned long long)bytenr,
7657                                 (unsigned long long)parent,
7658                                 (unsigned long long)root);
7659                 }
7660                 back->node.found_extent_tree = 1;
7661         }
7662         if (insert)
7663                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7664                         compare_extent_backref));
7665         check_extent_type(rec);
7666         maybe_free_extent_rec(extent_cache, rec);
7667         return 0;
7668 }
7669
7670 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7671                             u64 parent, u64 root, u64 owner, u64 offset,
7672                             u32 num_refs, int found_ref, u64 max_size)
7673 {
7674         struct extent_record *rec;
7675         struct data_backref *back;
7676         struct cache_extent *cache;
7677         int ret;
7678         bool insert = false;
7679
7680         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7681         if (!cache) {
7682                 struct extent_record tmpl;
7683
7684                 memset(&tmpl, 0, sizeof(tmpl));
7685                 tmpl.start = bytenr;
7686                 tmpl.nr = 1;
7687                 tmpl.max_size = max_size;
7688
7689                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7690                 if (ret)
7691                         return ret;
7692
7693                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7694                 if (!cache)
7695                         abort();
7696         }
7697
7698         rec = container_of(cache, struct extent_record, cache);
7699         if (rec->max_size < max_size)
7700                 rec->max_size = max_size;
7701
7702         /*
7703          * If found_ref is set then max_size is the real size and must match the
7704          * existing refs.  So if we have already found a ref then we need to
7705          * make sure that this ref matches the existing one, otherwise we need
7706          * to add a new backref so we can notice that the backrefs don't match
7707          * and we need to figure out who is telling the truth.  This is to
7708          * account for that awful fsync bug I introduced where we'd end up with
7709          * a btrfs_file_extent_item that would have its length include multiple
7710          * prealloc extents or point inside of a prealloc extent.
7711          */
7712         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7713                                  bytenr, max_size);
7714         if (!back) {
7715                 back = alloc_data_backref(rec, parent, root, owner, offset,
7716                                           max_size);
7717                 BUG_ON(!back);
7718                 insert = true;
7719         }
7720
7721         if (found_ref) {
7722                 BUG_ON(num_refs != 1);
7723                 if (back->node.found_ref)
7724                         BUG_ON(back->bytes != max_size);
7725                 back->node.found_ref = 1;
7726                 back->found_ref += 1;
7727                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7728                         back->bytes = max_size;
7729                         back->disk_bytenr = bytenr;
7730
7731                         /* Need to reinsert if not already in the tree */
7732                         if (!insert) {
7733                                 rb_erase(&back->node.node, &rec->backref_tree);
7734                                 insert = true;
7735                         }
7736                 }
7737                 rec->refs += 1;
7738                 rec->content_checked = 1;
7739                 rec->owner_ref_checked = 1;
7740         } else {
7741                 if (back->node.found_extent_tree) {
7742                         fprintf(stderr, "Extent back ref already exists "
7743                                 "for %llu parent %llu root %llu "
7744                                 "owner %llu offset %llu num_refs %lu\n",
7745                                 (unsigned long long)bytenr,
7746                                 (unsigned long long)parent,
7747                                 (unsigned long long)root,
7748                                 (unsigned long long)owner,
7749                                 (unsigned long long)offset,
7750                                 (unsigned long)num_refs);
7751                 }
7752                 back->num_refs = num_refs;
7753                 back->node.found_extent_tree = 1;
7754         }
7755         if (insert)
7756                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7757                         compare_extent_backref));
7758
7759         maybe_free_extent_rec(extent_cache, rec);
7760         return 0;
7761 }
7762
7763 static int add_pending(struct cache_tree *pending,
7764                        struct cache_tree *seen, u64 bytenr, u32 size)
7765 {
7766         int ret;
7767         ret = add_cache_extent(seen, bytenr, size);
7768         if (ret)
7769                 return ret;
7770         add_cache_extent(pending, bytenr, size);
7771         return 0;
7772 }
7773
7774 static int pick_next_pending(struct cache_tree *pending,
7775                         struct cache_tree *reada,
7776                         struct cache_tree *nodes,
7777                         u64 last, struct block_info *bits, int bits_nr,
7778                         int *reada_bits)
7779 {
7780         unsigned long node_start = last;
7781         struct cache_extent *cache;
7782         int ret;
7783
7784         cache = search_cache_extent(reada, 0);
7785         if (cache) {
7786                 bits[0].start = cache->start;
7787                 bits[0].size = cache->size;
7788                 *reada_bits = 1;
7789                 return 1;
7790         }
7791         *reada_bits = 0;
7792         if (node_start > 32768)
7793                 node_start -= 32768;
7794
7795         cache = search_cache_extent(nodes, node_start);
7796         if (!cache)
7797                 cache = search_cache_extent(nodes, 0);
7798
7799         if (!cache) {
7800                  cache = search_cache_extent(pending, 0);
7801                  if (!cache)
7802                          return 0;
7803                  ret = 0;
7804                  do {
7805                          bits[ret].start = cache->start;
7806                          bits[ret].size = cache->size;
7807                          cache = next_cache_extent(cache);
7808                          ret++;
7809                  } while (cache && ret < bits_nr);
7810                  return ret;
7811         }
7812
7813         ret = 0;
7814         do {
7815                 bits[ret].start = cache->start;
7816                 bits[ret].size = cache->size;
7817                 cache = next_cache_extent(cache);
7818                 ret++;
7819         } while (cache && ret < bits_nr);
7820
7821         if (bits_nr - ret > 8) {
7822                 u64 lookup = bits[0].start + bits[0].size;
7823                 struct cache_extent *next;
7824                 next = search_cache_extent(pending, lookup);
7825                 while(next) {
7826                         if (next->start - lookup > 32768)
7827                                 break;
7828                         bits[ret].start = next->start;
7829                         bits[ret].size = next->size;
7830                         lookup = next->start + next->size;
7831                         ret++;
7832                         if (ret == bits_nr)
7833                                 break;
7834                         next = next_cache_extent(next);
7835                         if (!next)
7836                                 break;
7837                 }
7838         }
7839         return ret;
7840 }
7841
7842 static void free_chunk_record(struct cache_extent *cache)
7843 {
7844         struct chunk_record *rec;
7845
7846         rec = container_of(cache, struct chunk_record, cache);
7847         list_del_init(&rec->list);
7848         list_del_init(&rec->dextents);
7849         free(rec);
7850 }
7851
7852 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7853 {
7854         cache_tree_free_extents(chunk_cache, free_chunk_record);
7855 }
7856
7857 static void free_device_record(struct rb_node *node)
7858 {
7859         struct device_record *rec;
7860
7861         rec = container_of(node, struct device_record, node);
7862         free(rec);
7863 }
7864
7865 FREE_RB_BASED_TREE(device_cache, free_device_record);
7866
7867 int insert_block_group_record(struct block_group_tree *tree,
7868                               struct block_group_record *bg_rec)
7869 {
7870         int ret;
7871
7872         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7873         if (ret)
7874                 return ret;
7875
7876         list_add_tail(&bg_rec->list, &tree->block_groups);
7877         return 0;
7878 }
7879
7880 static void free_block_group_record(struct cache_extent *cache)
7881 {
7882         struct block_group_record *rec;
7883
7884         rec = container_of(cache, struct block_group_record, cache);
7885         list_del_init(&rec->list);
7886         free(rec);
7887 }
7888
7889 void free_block_group_tree(struct block_group_tree *tree)
7890 {
7891         cache_tree_free_extents(&tree->tree, free_block_group_record);
7892 }
7893
7894 int insert_device_extent_record(struct device_extent_tree *tree,
7895                                 struct device_extent_record *de_rec)
7896 {
7897         int ret;
7898
7899         /*
7900          * Device extent is a bit different from the other extents, because
7901          * the extents which belong to the different devices may have the
7902          * same start and size, so we need use the special extent cache
7903          * search/insert functions.
7904          */
7905         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7906         if (ret)
7907                 return ret;
7908
7909         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7910         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7911         return 0;
7912 }
7913
7914 static void free_device_extent_record(struct cache_extent *cache)
7915 {
7916         struct device_extent_record *rec;
7917
7918         rec = container_of(cache, struct device_extent_record, cache);
7919         if (!list_empty(&rec->chunk_list))
7920                 list_del_init(&rec->chunk_list);
7921         if (!list_empty(&rec->device_list))
7922                 list_del_init(&rec->device_list);
7923         free(rec);
7924 }
7925
7926 void free_device_extent_tree(struct device_extent_tree *tree)
7927 {
7928         cache_tree_free_extents(&tree->tree, free_device_extent_record);
7929 }
7930
7931 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7932 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7933                                  struct extent_buffer *leaf, int slot)
7934 {
7935         struct btrfs_extent_ref_v0 *ref0;
7936         struct btrfs_key key;
7937         int ret;
7938
7939         btrfs_item_key_to_cpu(leaf, &key, slot);
7940         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7941         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7942                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7943                                 0, 0);
7944         } else {
7945                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7946                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7947         }
7948         return ret;
7949 }
7950 #endif
7951
7952 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7953                                             struct btrfs_key *key,
7954                                             int slot)
7955 {
7956         struct btrfs_chunk *ptr;
7957         struct chunk_record *rec;
7958         int num_stripes, i;
7959
7960         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7961         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7962
7963         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7964         if (!rec) {
7965                 fprintf(stderr, "memory allocation failed\n");
7966                 exit(-1);
7967         }
7968
7969         INIT_LIST_HEAD(&rec->list);
7970         INIT_LIST_HEAD(&rec->dextents);
7971         rec->bg_rec = NULL;
7972
7973         rec->cache.start = key->offset;
7974         rec->cache.size = btrfs_chunk_length(leaf, ptr);
7975
7976         rec->generation = btrfs_header_generation(leaf);
7977
7978         rec->objectid = key->objectid;
7979         rec->type = key->type;
7980         rec->offset = key->offset;
7981
7982         rec->length = rec->cache.size;
7983         rec->owner = btrfs_chunk_owner(leaf, ptr);
7984         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7985         rec->type_flags = btrfs_chunk_type(leaf, ptr);
7986         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7987         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7988         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7989         rec->num_stripes = num_stripes;
7990         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7991
7992         for (i = 0; i < rec->num_stripes; ++i) {
7993                 rec->stripes[i].devid =
7994                         btrfs_stripe_devid_nr(leaf, ptr, i);
7995                 rec->stripes[i].offset =
7996                         btrfs_stripe_offset_nr(leaf, ptr, i);
7997                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7998                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7999                                 BTRFS_UUID_SIZE);
8000         }
8001
8002         return rec;
8003 }
8004
8005 static int process_chunk_item(struct cache_tree *chunk_cache,
8006                               struct btrfs_key *key, struct extent_buffer *eb,
8007                               int slot)
8008 {
8009         struct chunk_record *rec;
8010         struct btrfs_chunk *chunk;
8011         int ret = 0;
8012
8013         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
8014         /*
8015          * Do extra check for this chunk item,
8016          *
8017          * It's still possible one can craft a leaf with CHUNK_ITEM, with
8018          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
8019          * and owner<->key_type check.
8020          */
8021         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
8022                                       key->offset);
8023         if (ret < 0) {
8024                 error("chunk(%llu, %llu) is not valid, ignore it",
8025                       key->offset, btrfs_chunk_length(eb, chunk));
8026                 return 0;
8027         }
8028         rec = btrfs_new_chunk_record(eb, key, slot);
8029         ret = insert_cache_extent(chunk_cache, &rec->cache);
8030         if (ret) {
8031                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
8032                         rec->offset, rec->length);
8033                 free(rec);
8034         }
8035
8036         return ret;
8037 }
8038
8039 static int process_device_item(struct rb_root *dev_cache,
8040                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
8041 {
8042         struct btrfs_dev_item *ptr;
8043         struct device_record *rec;
8044         int ret = 0;
8045
8046         ptr = btrfs_item_ptr(eb,
8047                 slot, struct btrfs_dev_item);
8048
8049         rec = malloc(sizeof(*rec));
8050         if (!rec) {
8051                 fprintf(stderr, "memory allocation failed\n");
8052                 return -ENOMEM;
8053         }
8054
8055         rec->devid = key->offset;
8056         rec->generation = btrfs_header_generation(eb);
8057
8058         rec->objectid = key->objectid;
8059         rec->type = key->type;
8060         rec->offset = key->offset;
8061
8062         rec->devid = btrfs_device_id(eb, ptr);
8063         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
8064         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
8065
8066         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
8067         if (ret) {
8068                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
8069                 free(rec);
8070         }
8071
8072         return ret;
8073 }
8074
8075 struct block_group_record *
8076 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
8077                              int slot)
8078 {
8079         struct btrfs_block_group_item *ptr;
8080         struct block_group_record *rec;
8081
8082         rec = calloc(1, sizeof(*rec));
8083         if (!rec) {
8084                 fprintf(stderr, "memory allocation failed\n");
8085                 exit(-1);
8086         }
8087
8088         rec->cache.start = key->objectid;
8089         rec->cache.size = key->offset;
8090
8091         rec->generation = btrfs_header_generation(leaf);
8092
8093         rec->objectid = key->objectid;
8094         rec->type = key->type;
8095         rec->offset = key->offset;
8096
8097         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
8098         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
8099
8100         INIT_LIST_HEAD(&rec->list);
8101
8102         return rec;
8103 }
8104
8105 static int process_block_group_item(struct block_group_tree *block_group_cache,
8106                                     struct btrfs_key *key,
8107                                     struct extent_buffer *eb, int slot)
8108 {
8109         struct block_group_record *rec;
8110         int ret = 0;
8111
8112         rec = btrfs_new_block_group_record(eb, key, slot);
8113         ret = insert_block_group_record(block_group_cache, rec);
8114         if (ret) {
8115                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
8116                         rec->objectid, rec->offset);
8117                 free(rec);
8118         }
8119
8120         return ret;
8121 }
8122
8123 struct device_extent_record *
8124 btrfs_new_device_extent_record(struct extent_buffer *leaf,
8125                                struct btrfs_key *key, int slot)
8126 {
8127         struct device_extent_record *rec;
8128         struct btrfs_dev_extent *ptr;
8129
8130         rec = calloc(1, sizeof(*rec));
8131         if (!rec) {
8132                 fprintf(stderr, "memory allocation failed\n");
8133                 exit(-1);
8134         }
8135
8136         rec->cache.objectid = key->objectid;
8137         rec->cache.start = key->offset;
8138
8139         rec->generation = btrfs_header_generation(leaf);
8140
8141         rec->objectid = key->objectid;
8142         rec->type = key->type;
8143         rec->offset = key->offset;
8144
8145         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
8146         rec->chunk_objecteid =
8147                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
8148         rec->chunk_offset =
8149                 btrfs_dev_extent_chunk_offset(leaf, ptr);
8150         rec->length = btrfs_dev_extent_length(leaf, ptr);
8151         rec->cache.size = rec->length;
8152
8153         INIT_LIST_HEAD(&rec->chunk_list);
8154         INIT_LIST_HEAD(&rec->device_list);
8155
8156         return rec;
8157 }
8158
8159 static int
8160 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
8161                            struct btrfs_key *key, struct extent_buffer *eb,
8162                            int slot)
8163 {
8164         struct device_extent_record *rec;
8165         int ret;
8166
8167         rec = btrfs_new_device_extent_record(eb, key, slot);
8168         ret = insert_device_extent_record(dev_extent_cache, rec);
8169         if (ret) {
8170                 fprintf(stderr,
8171                         "Device extent[%llu, %llu, %llu] existed.\n",
8172                         rec->objectid, rec->offset, rec->length);
8173                 free(rec);
8174         }
8175
8176         return ret;
8177 }
8178
8179 static int process_extent_item(struct btrfs_root *root,
8180                                struct cache_tree *extent_cache,
8181                                struct extent_buffer *eb, int slot)
8182 {
8183         struct btrfs_extent_item *ei;
8184         struct btrfs_extent_inline_ref *iref;
8185         struct btrfs_extent_data_ref *dref;
8186         struct btrfs_shared_data_ref *sref;
8187         struct btrfs_key key;
8188         struct extent_record tmpl;
8189         unsigned long end;
8190         unsigned long ptr;
8191         int ret;
8192         int type;
8193         u32 item_size = btrfs_item_size_nr(eb, slot);
8194         u64 refs = 0;
8195         u64 offset;
8196         u64 num_bytes;
8197         int metadata = 0;
8198
8199         btrfs_item_key_to_cpu(eb, &key, slot);
8200
8201         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8202                 metadata = 1;
8203                 num_bytes = root->fs_info->nodesize;
8204         } else {
8205                 num_bytes = key.offset;
8206         }
8207
8208         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8209                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8210                       key.objectid, root->fs_info->sectorsize);
8211                 return -EIO;
8212         }
8213         if (item_size < sizeof(*ei)) {
8214 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8215                 struct btrfs_extent_item_v0 *ei0;
8216                 BUG_ON(item_size != sizeof(*ei0));
8217                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8218                 refs = btrfs_extent_refs_v0(eb, ei0);
8219 #else
8220                 BUG();
8221 #endif
8222                 memset(&tmpl, 0, sizeof(tmpl));
8223                 tmpl.start = key.objectid;
8224                 tmpl.nr = num_bytes;
8225                 tmpl.extent_item_refs = refs;
8226                 tmpl.metadata = metadata;
8227                 tmpl.found_rec = 1;
8228                 tmpl.max_size = num_bytes;
8229
8230                 return add_extent_rec(extent_cache, &tmpl);
8231         }
8232
8233         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8234         refs = btrfs_extent_refs(eb, ei);
8235         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8236                 metadata = 1;
8237         else
8238                 metadata = 0;
8239         if (metadata && num_bytes != root->fs_info->nodesize) {
8240                 error("ignore invalid metadata extent, length %llu does not equal to %u",
8241                       num_bytes, root->fs_info->nodesize);
8242                 return -EIO;
8243         }
8244         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8245                 error("ignore invalid data extent, length %llu is not aligned to %u",
8246                       num_bytes, root->fs_info->sectorsize);
8247                 return -EIO;
8248         }
8249
8250         memset(&tmpl, 0, sizeof(tmpl));
8251         tmpl.start = key.objectid;
8252         tmpl.nr = num_bytes;
8253         tmpl.extent_item_refs = refs;
8254         tmpl.metadata = metadata;
8255         tmpl.found_rec = 1;
8256         tmpl.max_size = num_bytes;
8257         add_extent_rec(extent_cache, &tmpl);
8258
8259         ptr = (unsigned long)(ei + 1);
8260         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8261             key.type == BTRFS_EXTENT_ITEM_KEY)
8262                 ptr += sizeof(struct btrfs_tree_block_info);
8263
8264         end = (unsigned long)ei + item_size;
8265         while (ptr < end) {
8266                 iref = (struct btrfs_extent_inline_ref *)ptr;
8267                 type = btrfs_extent_inline_ref_type(eb, iref);
8268                 offset = btrfs_extent_inline_ref_offset(eb, iref);
8269                 switch (type) {
8270                 case BTRFS_TREE_BLOCK_REF_KEY:
8271                         ret = add_tree_backref(extent_cache, key.objectid,
8272                                         0, offset, 0);
8273                         if (ret < 0)
8274                                 error(
8275                         "add_tree_backref failed (extent items tree block): %s",
8276                                       strerror(-ret));
8277                         break;
8278                 case BTRFS_SHARED_BLOCK_REF_KEY:
8279                         ret = add_tree_backref(extent_cache, key.objectid,
8280                                         offset, 0, 0);
8281                         if (ret < 0)
8282                                 error(
8283                         "add_tree_backref failed (extent items shared block): %s",
8284                                       strerror(-ret));
8285                         break;
8286                 case BTRFS_EXTENT_DATA_REF_KEY:
8287                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8288                         add_data_backref(extent_cache, key.objectid, 0,
8289                                         btrfs_extent_data_ref_root(eb, dref),
8290                                         btrfs_extent_data_ref_objectid(eb,
8291                                                                        dref),
8292                                         btrfs_extent_data_ref_offset(eb, dref),
8293                                         btrfs_extent_data_ref_count(eb, dref),
8294                                         0, num_bytes);
8295                         break;
8296                 case BTRFS_SHARED_DATA_REF_KEY:
8297                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
8298                         add_data_backref(extent_cache, key.objectid, offset,
8299                                         0, 0, 0,
8300                                         btrfs_shared_data_ref_count(eb, sref),
8301                                         0, num_bytes);
8302                         break;
8303                 default:
8304                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8305                                 key.objectid, key.type, num_bytes);
8306                         goto out;
8307                 }
8308                 ptr += btrfs_extent_inline_ref_size(type);
8309         }
8310         WARN_ON(ptr > end);
8311 out:
8312         return 0;
8313 }
8314
8315 static int check_cache_range(struct btrfs_root *root,
8316                              struct btrfs_block_group_cache *cache,
8317                              u64 offset, u64 bytes)
8318 {
8319         struct btrfs_free_space *entry;
8320         u64 *logical;
8321         u64 bytenr;
8322         int stripe_len;
8323         int i, nr, ret;
8324
8325         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8326                 bytenr = btrfs_sb_offset(i);
8327                 ret = btrfs_rmap_block(root->fs_info,
8328                                        cache->key.objectid, bytenr, 0,
8329                                        &logical, &nr, &stripe_len);
8330                 if (ret)
8331                         return ret;
8332
8333                 while (nr--) {
8334                         if (logical[nr] + stripe_len <= offset)
8335                                 continue;
8336                         if (offset + bytes <= logical[nr])
8337                                 continue;
8338                         if (logical[nr] == offset) {
8339                                 if (stripe_len >= bytes) {
8340                                         free(logical);
8341                                         return 0;
8342                                 }
8343                                 bytes -= stripe_len;
8344                                 offset += stripe_len;
8345                         } else if (logical[nr] < offset) {
8346                                 if (logical[nr] + stripe_len >=
8347                                     offset + bytes) {
8348                                         free(logical);
8349                                         return 0;
8350                                 }
8351                                 bytes = (offset + bytes) -
8352                                         (logical[nr] + stripe_len);
8353                                 offset = logical[nr] + stripe_len;
8354                         } else {
8355                                 /*
8356                                  * Could be tricky, the super may land in the
8357                                  * middle of the area we're checking.  First
8358                                  * check the easiest case, it's at the end.
8359                                  */
8360                                 if (logical[nr] + stripe_len >=
8361                                     bytes + offset) {
8362                                         bytes = logical[nr] - offset;
8363                                         continue;
8364                                 }
8365
8366                                 /* Check the left side */
8367                                 ret = check_cache_range(root, cache,
8368                                                         offset,
8369                                                         logical[nr] - offset);
8370                                 if (ret) {
8371                                         free(logical);
8372                                         return ret;
8373                                 }
8374
8375                                 /* Now we continue with the right side */
8376                                 bytes = (offset + bytes) -
8377                                         (logical[nr] + stripe_len);
8378                                 offset = logical[nr] + stripe_len;
8379                         }
8380                 }
8381
8382                 free(logical);
8383         }
8384
8385         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8386         if (!entry) {
8387                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8388                         offset, offset+bytes);
8389                 return -EINVAL;
8390         }
8391
8392         if (entry->offset != offset) {
8393                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8394                         entry->offset);
8395                 return -EINVAL;
8396         }
8397
8398         if (entry->bytes != bytes) {
8399                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8400                         bytes, entry->bytes, offset);
8401                 return -EINVAL;
8402         }
8403
8404         unlink_free_space(cache->free_space_ctl, entry);
8405         free(entry);
8406         return 0;
8407 }
8408
8409 static int verify_space_cache(struct btrfs_root *root,
8410                               struct btrfs_block_group_cache *cache)
8411 {
8412         struct btrfs_path path;
8413         struct extent_buffer *leaf;
8414         struct btrfs_key key;
8415         u64 last;
8416         int ret = 0;
8417
8418         root = root->fs_info->extent_root;
8419
8420         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8421
8422         btrfs_init_path(&path);
8423         key.objectid = last;
8424         key.offset = 0;
8425         key.type = BTRFS_EXTENT_ITEM_KEY;
8426         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8427         if (ret < 0)
8428                 goto out;
8429         ret = 0;
8430         while (1) {
8431                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8432                         ret = btrfs_next_leaf(root, &path);
8433                         if (ret < 0)
8434                                 goto out;
8435                         if (ret > 0) {
8436                                 ret = 0;
8437                                 break;
8438                         }
8439                 }
8440                 leaf = path.nodes[0];
8441                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8442                 if (key.objectid >= cache->key.offset + cache->key.objectid)
8443                         break;
8444                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8445                     key.type != BTRFS_METADATA_ITEM_KEY) {
8446                         path.slots[0]++;
8447                         continue;
8448                 }
8449
8450                 if (last == key.objectid) {
8451                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
8452                                 last = key.objectid + key.offset;
8453                         else
8454                                 last = key.objectid + root->fs_info->nodesize;
8455                         path.slots[0]++;
8456                         continue;
8457                 }
8458
8459                 ret = check_cache_range(root, cache, last,
8460                                         key.objectid - last);
8461                 if (ret)
8462                         break;
8463                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8464                         last = key.objectid + key.offset;
8465                 else
8466                         last = key.objectid + root->fs_info->nodesize;
8467                 path.slots[0]++;
8468         }
8469
8470         if (last < cache->key.objectid + cache->key.offset)
8471                 ret = check_cache_range(root, cache, last,
8472                                         cache->key.objectid +
8473                                         cache->key.offset - last);
8474
8475 out:
8476         btrfs_release_path(&path);
8477
8478         if (!ret &&
8479             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8480                 fprintf(stderr, "There are still entries left in the space "
8481                         "cache\n");
8482                 ret = -EINVAL;
8483         }
8484
8485         return ret;
8486 }
8487
8488 static int check_space_cache(struct btrfs_root *root)
8489 {
8490         struct btrfs_block_group_cache *cache;
8491         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8492         int ret;
8493         int error = 0;
8494
8495         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8496             btrfs_super_generation(root->fs_info->super_copy) !=
8497             btrfs_super_cache_generation(root->fs_info->super_copy)) {
8498                 printf("cache and super generation don't match, space cache "
8499                        "will be invalidated\n");
8500                 return 0;
8501         }
8502
8503         if (ctx.progress_enabled) {
8504                 ctx.tp = TASK_FREE_SPACE;
8505                 task_start(ctx.info);
8506         }
8507
8508         while (1) {
8509                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8510                 if (!cache)
8511                         break;
8512
8513                 start = cache->key.objectid + cache->key.offset;
8514                 if (!cache->free_space_ctl) {
8515                         if (btrfs_init_free_space_ctl(cache,
8516                                                 root->fs_info->sectorsize)) {
8517                                 ret = -ENOMEM;
8518                                 break;
8519                         }
8520                 } else {
8521                         btrfs_remove_free_space_cache(cache);
8522                 }
8523
8524                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8525                         ret = exclude_super_stripes(root, cache);
8526                         if (ret) {
8527                                 fprintf(stderr, "could not exclude super stripes: %s\n",
8528                                         strerror(-ret));
8529                                 error++;
8530                                 continue;
8531                         }
8532                         ret = load_free_space_tree(root->fs_info, cache);
8533                         free_excluded_extents(root, cache);
8534                         if (ret < 0) {
8535                                 fprintf(stderr, "could not load free space tree: %s\n",
8536                                         strerror(-ret));
8537                                 error++;
8538                                 continue;
8539                         }
8540                         error += ret;
8541                 } else {
8542                         ret = load_free_space_cache(root->fs_info, cache);
8543                         if (!ret)
8544                                 continue;
8545                 }
8546
8547                 ret = verify_space_cache(root, cache);
8548                 if (ret) {
8549                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8550                                 cache->key.objectid);
8551                         error++;
8552                 }
8553         }
8554
8555         task_stop(ctx.info);
8556
8557         return error ? -EINVAL : 0;
8558 }
8559
8560 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8561                         u64 num_bytes, unsigned long leaf_offset,
8562                         struct extent_buffer *eb) {
8563
8564         struct btrfs_fs_info *fs_info = root->fs_info;
8565         u64 offset = 0;
8566         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8567         char *data;
8568         unsigned long csum_offset;
8569         u32 csum;
8570         u32 csum_expected;
8571         u64 read_len;
8572         u64 data_checked = 0;
8573         u64 tmp;
8574         int ret = 0;
8575         int mirror;
8576         int num_copies;
8577
8578         if (num_bytes % fs_info->sectorsize)
8579                 return -EINVAL;
8580
8581         data = malloc(num_bytes);
8582         if (!data)
8583                 return -ENOMEM;
8584
8585         while (offset < num_bytes) {
8586                 mirror = 0;
8587 again:
8588                 read_len = num_bytes - offset;
8589                 /* read as much space once a time */
8590                 ret = read_extent_data(fs_info, data + offset,
8591                                 bytenr + offset, &read_len, mirror);
8592                 if (ret)
8593                         goto out;
8594                 data_checked = 0;
8595                 /* verify every 4k data's checksum */
8596                 while (data_checked < read_len) {
8597                         csum = ~(u32)0;
8598                         tmp = offset + data_checked;
8599
8600                         csum = btrfs_csum_data((char *)data + tmp,
8601                                                csum, fs_info->sectorsize);
8602                         btrfs_csum_final(csum, (u8 *)&csum);
8603
8604                         csum_offset = leaf_offset +
8605                                  tmp / fs_info->sectorsize * csum_size;
8606                         read_extent_buffer(eb, (char *)&csum_expected,
8607                                            csum_offset, csum_size);
8608                         /* try another mirror */
8609                         if (csum != csum_expected) {
8610                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8611                                                 mirror, bytenr + tmp,
8612                                                 csum, csum_expected);
8613                                 num_copies = btrfs_num_copies(root->fs_info,
8614                                                 bytenr, num_bytes);
8615                                 if (mirror < num_copies - 1) {
8616                                         mirror += 1;
8617                                         goto again;
8618                                 }
8619                         }
8620                         data_checked += fs_info->sectorsize;
8621                 }
8622                 offset += read_len;
8623         }
8624 out:
8625         free(data);
8626         return ret;
8627 }
8628
8629 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8630                                u64 num_bytes)
8631 {
8632         struct btrfs_path path;
8633         struct extent_buffer *leaf;
8634         struct btrfs_key key;
8635         int ret;
8636
8637         btrfs_init_path(&path);
8638         key.objectid = bytenr;
8639         key.type = BTRFS_EXTENT_ITEM_KEY;
8640         key.offset = (u64)-1;
8641
8642 again:
8643         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8644                                 0, 0);
8645         if (ret < 0) {
8646                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8647                 btrfs_release_path(&path);
8648                 return ret;
8649         } else if (ret) {
8650                 if (path.slots[0] > 0) {
8651                         path.slots[0]--;
8652                 } else {
8653                         ret = btrfs_prev_leaf(root, &path);
8654                         if (ret < 0) {
8655                                 goto out;
8656                         } else if (ret > 0) {
8657                                 ret = 0;
8658                                 goto out;
8659                         }
8660                 }
8661         }
8662
8663         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8664
8665         /*
8666          * Block group items come before extent items if they have the same
8667          * bytenr, so walk back one more just in case.  Dear future traveller,
8668          * first congrats on mastering time travel.  Now if it's not too much
8669          * trouble could you go back to 2006 and tell Chris to make the
8670          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8671          * EXTENT_ITEM_KEY please?
8672          */
8673         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8674                 if (path.slots[0] > 0) {
8675                         path.slots[0]--;
8676                 } else {
8677                         ret = btrfs_prev_leaf(root, &path);
8678                         if (ret < 0) {
8679                                 goto out;
8680                         } else if (ret > 0) {
8681                                 ret = 0;
8682                                 goto out;
8683                         }
8684                 }
8685                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8686         }
8687
8688         while (num_bytes) {
8689                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8690                         ret = btrfs_next_leaf(root, &path);
8691                         if (ret < 0) {
8692                                 fprintf(stderr, "Error going to next leaf "
8693                                         "%d\n", ret);
8694                                 btrfs_release_path(&path);
8695                                 return ret;
8696                         } else if (ret) {
8697                                 break;
8698                         }
8699                 }
8700                 leaf = path.nodes[0];
8701                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8702                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8703                         path.slots[0]++;
8704                         continue;
8705                 }
8706                 if (key.objectid + key.offset < bytenr) {
8707                         path.slots[0]++;
8708                         continue;
8709                 }
8710                 if (key.objectid > bytenr + num_bytes)
8711                         break;
8712
8713                 if (key.objectid == bytenr) {
8714                         if (key.offset >= num_bytes) {
8715                                 num_bytes = 0;
8716                                 break;
8717                         }
8718                         num_bytes -= key.offset;
8719                         bytenr += key.offset;
8720                 } else if (key.objectid < bytenr) {
8721                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8722                                 num_bytes = 0;
8723                                 break;
8724                         }
8725                         num_bytes = (bytenr + num_bytes) -
8726                                 (key.objectid + key.offset);
8727                         bytenr = key.objectid + key.offset;
8728                 } else {
8729                         if (key.objectid + key.offset < bytenr + num_bytes) {
8730                                 u64 new_start = key.objectid + key.offset;
8731                                 u64 new_bytes = bytenr + num_bytes - new_start;
8732
8733                                 /*
8734                                  * Weird case, the extent is in the middle of
8735                                  * our range, we'll have to search one side
8736                                  * and then the other.  Not sure if this happens
8737                                  * in real life, but no harm in coding it up
8738                                  * anyway just in case.
8739                                  */
8740                                 btrfs_release_path(&path);
8741                                 ret = check_extent_exists(root, new_start,
8742                                                           new_bytes);
8743                                 if (ret) {
8744                                         fprintf(stderr, "Right section didn't "
8745                                                 "have a record\n");
8746                                         break;
8747                                 }
8748                                 num_bytes = key.objectid - bytenr;
8749                                 goto again;
8750                         }
8751                         num_bytes = key.objectid - bytenr;
8752                 }
8753                 path.slots[0]++;
8754         }
8755         ret = 0;
8756
8757 out:
8758         if (num_bytes && !ret) {
8759                 fprintf(stderr, "There are no extents for csum range "
8760                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8761                 ret = 1;
8762         }
8763
8764         btrfs_release_path(&path);
8765         return ret;
8766 }
8767
8768 static int check_csums(struct btrfs_root *root)
8769 {
8770         struct btrfs_path path;
8771         struct extent_buffer *leaf;
8772         struct btrfs_key key;
8773         u64 offset = 0, num_bytes = 0;
8774         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8775         int errors = 0;
8776         int ret;
8777         u64 data_len;
8778         unsigned long leaf_offset;
8779
8780         root = root->fs_info->csum_root;
8781         if (!extent_buffer_uptodate(root->node)) {
8782                 fprintf(stderr, "No valid csum tree found\n");
8783                 return -ENOENT;
8784         }
8785
8786         btrfs_init_path(&path);
8787         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8788         key.type = BTRFS_EXTENT_CSUM_KEY;
8789         key.offset = 0;
8790         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8791         if (ret < 0) {
8792                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8793                 btrfs_release_path(&path);
8794                 return ret;
8795         }
8796
8797         if (ret > 0 && path.slots[0])
8798                 path.slots[0]--;
8799         ret = 0;
8800
8801         while (1) {
8802                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8803                         ret = btrfs_next_leaf(root, &path);
8804                         if (ret < 0) {
8805                                 fprintf(stderr, "Error going to next leaf "
8806                                         "%d\n", ret);
8807                                 break;
8808                         }
8809                         if (ret)
8810                                 break;
8811                 }
8812                 leaf = path.nodes[0];
8813
8814                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8815                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8816                         path.slots[0]++;
8817                         continue;
8818                 }
8819
8820                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8821                               csum_size) * root->fs_info->sectorsize;
8822                 if (!check_data_csum)
8823                         goto skip_csum_check;
8824                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8825                 ret = check_extent_csums(root, key.offset, data_len,
8826                                          leaf_offset, leaf);
8827                 if (ret)
8828                         break;
8829 skip_csum_check:
8830                 if (!num_bytes) {
8831                         offset = key.offset;
8832                 } else if (key.offset != offset + num_bytes) {
8833                         ret = check_extent_exists(root, offset, num_bytes);
8834                         if (ret) {
8835                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8836                                         "there is no extent record\n",
8837                                         offset, offset+num_bytes);
8838                                 errors++;
8839                         }
8840                         offset = key.offset;
8841                         num_bytes = 0;
8842                 }
8843                 num_bytes += data_len;
8844                 path.slots[0]++;
8845         }
8846
8847         btrfs_release_path(&path);
8848         return errors;
8849 }
8850
8851 static int is_dropped_key(struct btrfs_key *key,
8852                           struct btrfs_key *drop_key) {
8853         if (key->objectid < drop_key->objectid)
8854                 return 1;
8855         else if (key->objectid == drop_key->objectid) {
8856                 if (key->type < drop_key->type)
8857                         return 1;
8858                 else if (key->type == drop_key->type) {
8859                         if (key->offset < drop_key->offset)
8860                                 return 1;
8861                 }
8862         }
8863         return 0;
8864 }
8865
8866 /*
8867  * Here are the rules for FULL_BACKREF.
8868  *
8869  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8870  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8871  *      FULL_BACKREF set.
8872  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
8873  *    if it happened after the relocation occurred since we'll have dropped the
8874  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8875  *    have no real way to know for sure.
8876  *
8877  * We process the blocks one root at a time, and we start from the lowest root
8878  * objectid and go to the highest.  So we can just lookup the owner backref for
8879  * the record and if we don't find it then we know it doesn't exist and we have
8880  * a FULL BACKREF.
8881  *
8882  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8883  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8884  * be set or not and then we can check later once we've gathered all the refs.
8885  */
8886 static int calc_extent_flag(struct cache_tree *extent_cache,
8887                            struct extent_buffer *buf,
8888                            struct root_item_record *ri,
8889                            u64 *flags)
8890 {
8891         struct extent_record *rec;
8892         struct cache_extent *cache;
8893         struct tree_backref *tback;
8894         u64 owner = 0;
8895
8896         cache = lookup_cache_extent(extent_cache, buf->start, 1);
8897         /* we have added this extent before */
8898         if (!cache)
8899                 return -ENOENT;
8900
8901         rec = container_of(cache, struct extent_record, cache);
8902
8903         /*
8904          * Except file/reloc tree, we can not have
8905          * FULL BACKREF MODE
8906          */
8907         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8908                 goto normal;
8909         /*
8910          * root node
8911          */
8912         if (buf->start == ri->bytenr)
8913                 goto normal;
8914
8915         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8916                 goto full_backref;
8917
8918         owner = btrfs_header_owner(buf);
8919         if (owner == ri->objectid)
8920                 goto normal;
8921
8922         tback = find_tree_backref(rec, 0, owner);
8923         if (!tback)
8924                 goto full_backref;
8925 normal:
8926         *flags = 0;
8927         if (rec->flag_block_full_backref != FLAG_UNSET &&
8928             rec->flag_block_full_backref != 0)
8929                 rec->bad_full_backref = 1;
8930         return 0;
8931 full_backref:
8932         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8933         if (rec->flag_block_full_backref != FLAG_UNSET &&
8934             rec->flag_block_full_backref != 1)
8935                 rec->bad_full_backref = 1;
8936         return 0;
8937 }
8938
8939 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8940 {
8941         fprintf(stderr, "Invalid key type(");
8942         print_key_type(stderr, 0, key_type);
8943         fprintf(stderr, ") found in root(");
8944         print_objectid(stderr, rootid, 0);
8945         fprintf(stderr, ")\n");
8946 }
8947
8948 /*
8949  * Check if the key is valid with its extent buffer.
8950  *
8951  * This is a early check in case invalid key exists in a extent buffer
8952  * This is not comprehensive yet, but should prevent wrong key/item passed
8953  * further
8954  */
8955 static int check_type_with_root(u64 rootid, u8 key_type)
8956 {
8957         switch (key_type) {
8958         /* Only valid in chunk tree */
8959         case BTRFS_DEV_ITEM_KEY:
8960         case BTRFS_CHUNK_ITEM_KEY:
8961                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8962                         goto err;
8963                 break;
8964         /* valid in csum and log tree */
8965         case BTRFS_CSUM_TREE_OBJECTID:
8966                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8967                       is_fstree(rootid)))
8968                         goto err;
8969                 break;
8970         case BTRFS_EXTENT_ITEM_KEY:
8971         case BTRFS_METADATA_ITEM_KEY:
8972         case BTRFS_BLOCK_GROUP_ITEM_KEY:
8973                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8974                         goto err;
8975                 break;
8976         case BTRFS_ROOT_ITEM_KEY:
8977                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8978                         goto err;
8979                 break;
8980         case BTRFS_DEV_EXTENT_KEY:
8981                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8982                         goto err;
8983                 break;
8984         }
8985         return 0;
8986 err:
8987         report_mismatch_key_root(key_type, rootid);
8988         return -EINVAL;
8989 }
8990
8991 static int run_next_block(struct btrfs_root *root,
8992                           struct block_info *bits,
8993                           int bits_nr,
8994                           u64 *last,
8995                           struct cache_tree *pending,
8996                           struct cache_tree *seen,
8997                           struct cache_tree *reada,
8998                           struct cache_tree *nodes,
8999                           struct cache_tree *extent_cache,
9000                           struct cache_tree *chunk_cache,
9001                           struct rb_root *dev_cache,
9002                           struct block_group_tree *block_group_cache,
9003                           struct device_extent_tree *dev_extent_cache,
9004                           struct root_item_record *ri)
9005 {
9006         struct btrfs_fs_info *fs_info = root->fs_info;
9007         struct extent_buffer *buf;
9008         struct extent_record *rec = NULL;
9009         u64 bytenr;
9010         u32 size;
9011         u64 parent;
9012         u64 owner;
9013         u64 flags;
9014         u64 ptr;
9015         u64 gen = 0;
9016         int ret = 0;
9017         int i;
9018         int nritems;
9019         struct btrfs_key key;
9020         struct cache_extent *cache;
9021         int reada_bits;
9022
9023         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
9024                                     bits_nr, &reada_bits);
9025         if (nritems == 0)
9026                 return 1;
9027
9028         if (!reada_bits) {
9029                 for(i = 0; i < nritems; i++) {
9030                         ret = add_cache_extent(reada, bits[i].start,
9031                                                bits[i].size);
9032                         if (ret == -EEXIST)
9033                                 continue;
9034
9035                         /* fixme, get the parent transid */
9036                         readahead_tree_block(fs_info, bits[i].start, 0);
9037                 }
9038         }
9039         *last = bits[0].start;
9040         bytenr = bits[0].start;
9041         size = bits[0].size;
9042
9043         cache = lookup_cache_extent(pending, bytenr, size);
9044         if (cache) {
9045                 remove_cache_extent(pending, cache);
9046                 free(cache);
9047         }
9048         cache = lookup_cache_extent(reada, bytenr, size);
9049         if (cache) {
9050                 remove_cache_extent(reada, cache);
9051                 free(cache);
9052         }
9053         cache = lookup_cache_extent(nodes, bytenr, size);
9054         if (cache) {
9055                 remove_cache_extent(nodes, cache);
9056                 free(cache);
9057         }
9058         cache = lookup_cache_extent(extent_cache, bytenr, size);
9059         if (cache) {
9060                 rec = container_of(cache, struct extent_record, cache);
9061                 gen = rec->parent_generation;
9062         }
9063
9064         /* fixme, get the real parent transid */
9065         buf = read_tree_block(root->fs_info, bytenr, gen);
9066         if (!extent_buffer_uptodate(buf)) {
9067                 record_bad_block_io(root->fs_info,
9068                                     extent_cache, bytenr, size);
9069                 goto out;
9070         }
9071
9072         nritems = btrfs_header_nritems(buf);
9073
9074         flags = 0;
9075         if (!init_extent_tree) {
9076                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
9077                                        btrfs_header_level(buf), 1, NULL,
9078                                        &flags);
9079                 if (ret < 0) {
9080                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9081                         if (ret < 0) {
9082                                 fprintf(stderr, "Couldn't calc extent flags\n");
9083                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9084                         }
9085                 }
9086         } else {
9087                 flags = 0;
9088                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9089                 if (ret < 0) {
9090                         fprintf(stderr, "Couldn't calc extent flags\n");
9091                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9092                 }
9093         }
9094
9095         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9096                 if (ri != NULL &&
9097                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
9098                     ri->objectid == btrfs_header_owner(buf)) {
9099                         /*
9100                          * Ok we got to this block from it's original owner and
9101                          * we have FULL_BACKREF set.  Relocation can leave
9102                          * converted blocks over so this is altogether possible,
9103                          * however it's not possible if the generation > the
9104                          * last snapshot, so check for this case.
9105                          */
9106                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
9107                             btrfs_header_generation(buf) > ri->last_snapshot) {
9108                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9109                                 rec->bad_full_backref = 1;
9110                         }
9111                 }
9112         } else {
9113                 if (ri != NULL &&
9114                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
9115                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
9116                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9117                         rec->bad_full_backref = 1;
9118                 }
9119         }
9120
9121         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9122                 rec->flag_block_full_backref = 1;
9123                 parent = bytenr;
9124                 owner = 0;
9125         } else {
9126                 rec->flag_block_full_backref = 0;
9127                 parent = 0;
9128                 owner = btrfs_header_owner(buf);
9129         }
9130
9131         ret = check_block(root, extent_cache, buf, flags);
9132         if (ret)
9133                 goto out;
9134
9135         if (btrfs_is_leaf(buf)) {
9136                 btree_space_waste += btrfs_leaf_free_space(root, buf);
9137                 for (i = 0; i < nritems; i++) {
9138                         struct btrfs_file_extent_item *fi;
9139                         btrfs_item_key_to_cpu(buf, &key, i);
9140                         /*
9141                          * Check key type against the leaf owner.
9142                          * Could filter quite a lot of early error if
9143                          * owner is correct
9144                          */
9145                         if (check_type_with_root(btrfs_header_owner(buf),
9146                                                  key.type)) {
9147                                 fprintf(stderr, "ignoring invalid key\n");
9148                                 continue;
9149                         }
9150                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
9151                                 process_extent_item(root, extent_cache, buf,
9152                                                     i);
9153                                 continue;
9154                         }
9155                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9156                                 process_extent_item(root, extent_cache, buf,
9157                                                     i);
9158                                 continue;
9159                         }
9160                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
9161                                 total_csum_bytes +=
9162                                         btrfs_item_size_nr(buf, i);
9163                                 continue;
9164                         }
9165                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
9166                                 process_chunk_item(chunk_cache, &key, buf, i);
9167                                 continue;
9168                         }
9169                         if (key.type == BTRFS_DEV_ITEM_KEY) {
9170                                 process_device_item(dev_cache, &key, buf, i);
9171                                 continue;
9172                         }
9173                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9174                                 process_block_group_item(block_group_cache,
9175                                         &key, buf, i);
9176                                 continue;
9177                         }
9178                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
9179                                 process_device_extent_item(dev_extent_cache,
9180                                         &key, buf, i);
9181                                 continue;
9182
9183                         }
9184                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9185 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9186                                 process_extent_ref_v0(extent_cache, buf, i);
9187 #else
9188                                 BUG();
9189 #endif
9190                                 continue;
9191                         }
9192
9193                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9194                                 ret = add_tree_backref(extent_cache,
9195                                                 key.objectid, 0, key.offset, 0);
9196                                 if (ret < 0)
9197                                         error(
9198                                 "add_tree_backref failed (leaf tree block): %s",
9199                                               strerror(-ret));
9200                                 continue;
9201                         }
9202                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9203                                 ret = add_tree_backref(extent_cache,
9204                                                 key.objectid, key.offset, 0, 0);
9205                                 if (ret < 0)
9206                                         error(
9207                                 "add_tree_backref failed (leaf shared block): %s",
9208                                               strerror(-ret));
9209                                 continue;
9210                         }
9211                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9212                                 struct btrfs_extent_data_ref *ref;
9213                                 ref = btrfs_item_ptr(buf, i,
9214                                                 struct btrfs_extent_data_ref);
9215                                 add_data_backref(extent_cache,
9216                                         key.objectid, 0,
9217                                         btrfs_extent_data_ref_root(buf, ref),
9218                                         btrfs_extent_data_ref_objectid(buf,
9219                                                                        ref),
9220                                         btrfs_extent_data_ref_offset(buf, ref),
9221                                         btrfs_extent_data_ref_count(buf, ref),
9222                                         0, root->fs_info->sectorsize);
9223                                 continue;
9224                         }
9225                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9226                                 struct btrfs_shared_data_ref *ref;
9227                                 ref = btrfs_item_ptr(buf, i,
9228                                                 struct btrfs_shared_data_ref);
9229                                 add_data_backref(extent_cache,
9230                                         key.objectid, key.offset, 0, 0, 0,
9231                                         btrfs_shared_data_ref_count(buf, ref),
9232                                         0, root->fs_info->sectorsize);
9233                                 continue;
9234                         }
9235                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9236                                 struct bad_item *bad;
9237
9238                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9239                                         continue;
9240                                 if (!owner)
9241                                         continue;
9242                                 bad = malloc(sizeof(struct bad_item));
9243                                 if (!bad)
9244                                         continue;
9245                                 INIT_LIST_HEAD(&bad->list);
9246                                 memcpy(&bad->key, &key,
9247                                        sizeof(struct btrfs_key));
9248                                 bad->root_id = owner;
9249                                 list_add_tail(&bad->list, &delete_items);
9250                                 continue;
9251                         }
9252                         if (key.type != BTRFS_EXTENT_DATA_KEY)
9253                                 continue;
9254                         fi = btrfs_item_ptr(buf, i,
9255                                             struct btrfs_file_extent_item);
9256                         if (btrfs_file_extent_type(buf, fi) ==
9257                             BTRFS_FILE_EXTENT_INLINE)
9258                                 continue;
9259                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9260                                 continue;
9261
9262                         data_bytes_allocated +=
9263                                 btrfs_file_extent_disk_num_bytes(buf, fi);
9264                         if (data_bytes_allocated < root->fs_info->sectorsize) {
9265                                 abort();
9266                         }
9267                         data_bytes_referenced +=
9268                                 btrfs_file_extent_num_bytes(buf, fi);
9269                         add_data_backref(extent_cache,
9270                                 btrfs_file_extent_disk_bytenr(buf, fi),
9271                                 parent, owner, key.objectid, key.offset -
9272                                 btrfs_file_extent_offset(buf, fi), 1, 1,
9273                                 btrfs_file_extent_disk_num_bytes(buf, fi));
9274                 }
9275         } else {
9276                 int level;
9277                 struct btrfs_key first_key;
9278
9279                 first_key.objectid = 0;
9280
9281                 if (nritems > 0)
9282                         btrfs_item_key_to_cpu(buf, &first_key, 0);
9283                 level = btrfs_header_level(buf);
9284                 for (i = 0; i < nritems; i++) {
9285                         struct extent_record tmpl;
9286
9287                         ptr = btrfs_node_blockptr(buf, i);
9288                         size = root->fs_info->nodesize;
9289                         btrfs_node_key_to_cpu(buf, &key, i);
9290                         if (ri != NULL) {
9291                                 if ((level == ri->drop_level)
9292                                     && is_dropped_key(&key, &ri->drop_key)) {
9293                                         continue;
9294                                 }
9295                         }
9296
9297                         memset(&tmpl, 0, sizeof(tmpl));
9298                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9299                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9300                         tmpl.start = ptr;
9301                         tmpl.nr = size;
9302                         tmpl.refs = 1;
9303                         tmpl.metadata = 1;
9304                         tmpl.max_size = size;
9305                         ret = add_extent_rec(extent_cache, &tmpl);
9306                         if (ret < 0)
9307                                 goto out;
9308
9309                         ret = add_tree_backref(extent_cache, ptr, parent,
9310                                         owner, 1);
9311                         if (ret < 0) {
9312                                 error(
9313                                 "add_tree_backref failed (non-leaf block): %s",
9314                                       strerror(-ret));
9315                                 continue;
9316                         }
9317
9318                         if (level > 1) {
9319                                 add_pending(nodes, seen, ptr, size);
9320                         } else {
9321                                 add_pending(pending, seen, ptr, size);
9322                         }
9323                 }
9324                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
9325                                       nritems) * sizeof(struct btrfs_key_ptr);
9326         }
9327         total_btree_bytes += buf->len;
9328         if (fs_root_objectid(btrfs_header_owner(buf)))
9329                 total_fs_tree_bytes += buf->len;
9330         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9331                 total_extent_tree_bytes += buf->len;
9332 out:
9333         free_extent_buffer(buf);
9334         return ret;
9335 }
9336
9337 static int add_root_to_pending(struct extent_buffer *buf,
9338                                struct cache_tree *extent_cache,
9339                                struct cache_tree *pending,
9340                                struct cache_tree *seen,
9341                                struct cache_tree *nodes,
9342                                u64 objectid)
9343 {
9344         struct extent_record tmpl;
9345         int ret;
9346
9347         if (btrfs_header_level(buf) > 0)
9348                 add_pending(nodes, seen, buf->start, buf->len);
9349         else
9350                 add_pending(pending, seen, buf->start, buf->len);
9351
9352         memset(&tmpl, 0, sizeof(tmpl));
9353         tmpl.start = buf->start;
9354         tmpl.nr = buf->len;
9355         tmpl.is_root = 1;
9356         tmpl.refs = 1;
9357         tmpl.metadata = 1;
9358         tmpl.max_size = buf->len;
9359         add_extent_rec(extent_cache, &tmpl);
9360
9361         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9362             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9363                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9364                                 0, 1);
9365         else
9366                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9367                                 1);
9368         return ret;
9369 }
9370
9371 /* as we fix the tree, we might be deleting blocks that
9372  * we're tracking for repair.  This hook makes sure we
9373  * remove any backrefs for blocks as we are fixing them.
9374  */
9375 static int free_extent_hook(struct btrfs_trans_handle *trans,
9376                             struct btrfs_root *root,
9377                             u64 bytenr, u64 num_bytes, u64 parent,
9378                             u64 root_objectid, u64 owner, u64 offset,
9379                             int refs_to_drop)
9380 {
9381         struct extent_record *rec;
9382         struct cache_extent *cache;
9383         int is_data;
9384         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9385
9386         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9387         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9388         if (!cache)
9389                 return 0;
9390
9391         rec = container_of(cache, struct extent_record, cache);
9392         if (is_data) {
9393                 struct data_backref *back;
9394                 back = find_data_backref(rec, parent, root_objectid, owner,
9395                                          offset, 1, bytenr, num_bytes);
9396                 if (!back)
9397                         goto out;
9398                 if (back->node.found_ref) {
9399                         back->found_ref -= refs_to_drop;
9400                         if (rec->refs)
9401                                 rec->refs -= refs_to_drop;
9402                 }
9403                 if (back->node.found_extent_tree) {
9404                         back->num_refs -= refs_to_drop;
9405                         if (rec->extent_item_refs)
9406                                 rec->extent_item_refs -= refs_to_drop;
9407                 }
9408                 if (back->found_ref == 0)
9409                         back->node.found_ref = 0;
9410                 if (back->num_refs == 0)
9411                         back->node.found_extent_tree = 0;
9412
9413                 if (!back->node.found_extent_tree && back->node.found_ref) {
9414                         rb_erase(&back->node.node, &rec->backref_tree);
9415                         free(back);
9416                 }
9417         } else {
9418                 struct tree_backref *back;
9419                 back = find_tree_backref(rec, parent, root_objectid);
9420                 if (!back)
9421                         goto out;
9422                 if (back->node.found_ref) {
9423                         if (rec->refs)
9424                                 rec->refs--;
9425                         back->node.found_ref = 0;
9426                 }
9427                 if (back->node.found_extent_tree) {
9428                         if (rec->extent_item_refs)
9429                                 rec->extent_item_refs--;
9430                         back->node.found_extent_tree = 0;
9431                 }
9432                 if (!back->node.found_extent_tree && back->node.found_ref) {
9433                         rb_erase(&back->node.node, &rec->backref_tree);
9434                         free(back);
9435                 }
9436         }
9437         maybe_free_extent_rec(extent_cache, rec);
9438 out:
9439         return 0;
9440 }
9441
9442 static int delete_extent_records(struct btrfs_trans_handle *trans,
9443                                  struct btrfs_root *root,
9444                                  struct btrfs_path *path,
9445                                  u64 bytenr)
9446 {
9447         struct btrfs_key key;
9448         struct btrfs_key found_key;
9449         struct extent_buffer *leaf;
9450         int ret;
9451         int slot;
9452
9453
9454         key.objectid = bytenr;
9455         key.type = (u8)-1;
9456         key.offset = (u64)-1;
9457
9458         while(1) {
9459                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9460                                         &key, path, 0, 1);
9461                 if (ret < 0)
9462                         break;
9463
9464                 if (ret > 0) {
9465                         ret = 0;
9466                         if (path->slots[0] == 0)
9467                                 break;
9468                         path->slots[0]--;
9469                 }
9470                 ret = 0;
9471
9472                 leaf = path->nodes[0];
9473                 slot = path->slots[0];
9474
9475                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9476                 if (found_key.objectid != bytenr)
9477                         break;
9478
9479                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9480                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
9481                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9482                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9483                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9484                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9485                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9486                         btrfs_release_path(path);
9487                         if (found_key.type == 0) {
9488                                 if (found_key.offset == 0)
9489                                         break;
9490                                 key.offset = found_key.offset - 1;
9491                                 key.type = found_key.type;
9492                         }
9493                         key.type = found_key.type - 1;
9494                         key.offset = (u64)-1;
9495                         continue;
9496                 }
9497
9498                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9499                         found_key.objectid, found_key.type, found_key.offset);
9500
9501                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9502                 if (ret)
9503                         break;
9504                 btrfs_release_path(path);
9505
9506                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9507                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
9508                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9509                                 found_key.offset : root->fs_info->nodesize;
9510
9511                         ret = btrfs_update_block_group(trans, root, bytenr,
9512                                                        bytes, 0, 0);
9513                         if (ret)
9514                                 break;
9515                 }
9516         }
9517
9518         btrfs_release_path(path);
9519         return ret;
9520 }
9521
9522 /*
9523  * for a single backref, this will allocate a new extent
9524  * and add the backref to it.
9525  */
9526 static int record_extent(struct btrfs_trans_handle *trans,
9527                          struct btrfs_fs_info *info,
9528                          struct btrfs_path *path,
9529                          struct extent_record *rec,
9530                          struct extent_backref *back,
9531                          int allocated, u64 flags)
9532 {
9533         int ret = 0;
9534         struct btrfs_root *extent_root = info->extent_root;
9535         struct extent_buffer *leaf;
9536         struct btrfs_key ins_key;
9537         struct btrfs_extent_item *ei;
9538         struct data_backref *dback;
9539         struct btrfs_tree_block_info *bi;
9540
9541         if (!back->is_data)
9542                 rec->max_size = max_t(u64, rec->max_size,
9543                                     info->nodesize);
9544
9545         if (!allocated) {
9546                 u32 item_size = sizeof(*ei);
9547
9548                 if (!back->is_data)
9549                         item_size += sizeof(*bi);
9550
9551                 ins_key.objectid = rec->start;
9552                 ins_key.offset = rec->max_size;
9553                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9554
9555                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9556                                         &ins_key, item_size);
9557                 if (ret)
9558                         goto fail;
9559
9560                 leaf = path->nodes[0];
9561                 ei = btrfs_item_ptr(leaf, path->slots[0],
9562                                     struct btrfs_extent_item);
9563
9564                 btrfs_set_extent_refs(leaf, ei, 0);
9565                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9566
9567                 if (back->is_data) {
9568                         btrfs_set_extent_flags(leaf, ei,
9569                                                BTRFS_EXTENT_FLAG_DATA);
9570                 } else {
9571                         struct btrfs_disk_key copy_key;;
9572
9573                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9574                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9575                                              sizeof(*bi));
9576
9577                         btrfs_set_disk_key_objectid(&copy_key,
9578                                                     rec->info_objectid);
9579                         btrfs_set_disk_key_type(&copy_key, 0);
9580                         btrfs_set_disk_key_offset(&copy_key, 0);
9581
9582                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9583                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9584
9585                         btrfs_set_extent_flags(leaf, ei,
9586                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9587                 }
9588
9589                 btrfs_mark_buffer_dirty(leaf);
9590                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9591                                                rec->max_size, 1, 0);
9592                 if (ret)
9593                         goto fail;
9594                 btrfs_release_path(path);
9595         }
9596
9597         if (back->is_data) {
9598                 u64 parent;
9599                 int i;
9600
9601                 dback = to_data_backref(back);
9602                 if (back->full_backref)
9603                         parent = dback->parent;
9604                 else
9605                         parent = 0;
9606
9607                 for (i = 0; i < dback->found_ref; i++) {
9608                         /* if parent != 0, we're doing a full backref
9609                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9610                          * just makes the backref allocator create a data
9611                          * backref
9612                          */
9613                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9614                                                    rec->start, rec->max_size,
9615                                                    parent,
9616                                                    dback->root,
9617                                                    parent ?
9618                                                    BTRFS_FIRST_FREE_OBJECTID :
9619                                                    dback->owner,
9620                                                    dback->offset);
9621                         if (ret)
9622                                 break;
9623                 }
9624                 fprintf(stderr, "adding new data backref"
9625                                 " on %llu %s %llu owner %llu"
9626                                 " offset %llu found %d\n",
9627                                 (unsigned long long)rec->start,
9628                                 back->full_backref ?
9629                                 "parent" : "root",
9630                                 back->full_backref ?
9631                                 (unsigned long long)parent :
9632                                 (unsigned long long)dback->root,
9633                                 (unsigned long long)dback->owner,
9634                                 (unsigned long long)dback->offset,
9635                                 dback->found_ref);
9636         } else {
9637                 u64 parent;
9638                 struct tree_backref *tback;
9639
9640                 tback = to_tree_backref(back);
9641                 if (back->full_backref)
9642                         parent = tback->parent;
9643                 else
9644                         parent = 0;
9645
9646                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9647                                            rec->start, rec->max_size,
9648                                            parent, tback->root, 0, 0);
9649                 fprintf(stderr, "adding new tree backref on "
9650                         "start %llu len %llu parent %llu root %llu\n",
9651                         rec->start, rec->max_size, parent, tback->root);
9652         }
9653 fail:
9654         btrfs_release_path(path);
9655         return ret;
9656 }
9657
9658 static struct extent_entry *find_entry(struct list_head *entries,
9659                                        u64 bytenr, u64 bytes)
9660 {
9661         struct extent_entry *entry = NULL;
9662
9663         list_for_each_entry(entry, entries, list) {
9664                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9665                         return entry;
9666         }
9667
9668         return NULL;
9669 }
9670
9671 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9672 {
9673         struct extent_entry *entry, *best = NULL, *prev = NULL;
9674
9675         list_for_each_entry(entry, entries, list) {
9676                 /*
9677                  * If there are as many broken entries as entries then we know
9678                  * not to trust this particular entry.
9679                  */
9680                 if (entry->broken == entry->count)
9681                         continue;
9682
9683                 /*
9684                  * Special case, when there are only two entries and 'best' is
9685                  * the first one
9686                  */
9687                 if (!prev) {
9688                         best = entry;
9689                         prev = entry;
9690                         continue;
9691                 }
9692
9693                 /*
9694                  * If our current entry == best then we can't be sure our best
9695                  * is really the best, so we need to keep searching.
9696                  */
9697                 if (best && best->count == entry->count) {
9698                         prev = entry;
9699                         best = NULL;
9700                         continue;
9701                 }
9702
9703                 /* Prev == entry, not good enough, have to keep searching */
9704                 if (!prev->broken && prev->count == entry->count)
9705                         continue;
9706
9707                 if (!best)
9708                         best = (prev->count > entry->count) ? prev : entry;
9709                 else if (best->count < entry->count)
9710                         best = entry;
9711                 prev = entry;
9712         }
9713
9714         return best;
9715 }
9716
9717 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9718                       struct data_backref *dback, struct extent_entry *entry)
9719 {
9720         struct btrfs_trans_handle *trans;
9721         struct btrfs_root *root;
9722         struct btrfs_file_extent_item *fi;
9723         struct extent_buffer *leaf;
9724         struct btrfs_key key;
9725         u64 bytenr, bytes;
9726         int ret, err;
9727
9728         key.objectid = dback->root;
9729         key.type = BTRFS_ROOT_ITEM_KEY;
9730         key.offset = (u64)-1;
9731         root = btrfs_read_fs_root(info, &key);
9732         if (IS_ERR(root)) {
9733                 fprintf(stderr, "Couldn't find root for our ref\n");
9734                 return -EINVAL;
9735         }
9736
9737         /*
9738          * The backref points to the original offset of the extent if it was
9739          * split, so we need to search down to the offset we have and then walk
9740          * forward until we find the backref we're looking for.
9741          */
9742         key.objectid = dback->owner;
9743         key.type = BTRFS_EXTENT_DATA_KEY;
9744         key.offset = dback->offset;
9745         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9746         if (ret < 0) {
9747                 fprintf(stderr, "Error looking up ref %d\n", ret);
9748                 return ret;
9749         }
9750
9751         while (1) {
9752                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9753                         ret = btrfs_next_leaf(root, path);
9754                         if (ret) {
9755                                 fprintf(stderr, "Couldn't find our ref, next\n");
9756                                 return -EINVAL;
9757                         }
9758                 }
9759                 leaf = path->nodes[0];
9760                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9761                 if (key.objectid != dback->owner ||
9762                     key.type != BTRFS_EXTENT_DATA_KEY) {
9763                         fprintf(stderr, "Couldn't find our ref, search\n");
9764                         return -EINVAL;
9765                 }
9766                 fi = btrfs_item_ptr(leaf, path->slots[0],
9767                                     struct btrfs_file_extent_item);
9768                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9769                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9770
9771                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9772                         break;
9773                 path->slots[0]++;
9774         }
9775
9776         btrfs_release_path(path);
9777
9778         trans = btrfs_start_transaction(root, 1);
9779         if (IS_ERR(trans))
9780                 return PTR_ERR(trans);
9781
9782         /*
9783          * Ok we have the key of the file extent we want to fix, now we can cow
9784          * down to the thing and fix it.
9785          */
9786         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9787         if (ret < 0) {
9788                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9789                         key.objectid, key.type, key.offset, ret);
9790                 goto out;
9791         }
9792         if (ret > 0) {
9793                 fprintf(stderr, "Well that's odd, we just found this key "
9794                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9795                         key.offset);
9796                 ret = -EINVAL;
9797                 goto out;
9798         }
9799         leaf = path->nodes[0];
9800         fi = btrfs_item_ptr(leaf, path->slots[0],
9801                             struct btrfs_file_extent_item);
9802
9803         if (btrfs_file_extent_compression(leaf, fi) &&
9804             dback->disk_bytenr != entry->bytenr) {
9805                 fprintf(stderr, "Ref doesn't match the record start and is "
9806                         "compressed, please take a btrfs-image of this file "
9807                         "system and send it to a btrfs developer so they can "
9808                         "complete this functionality for bytenr %Lu\n",
9809                         dback->disk_bytenr);
9810                 ret = -EINVAL;
9811                 goto out;
9812         }
9813
9814         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9815                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9816         } else if (dback->disk_bytenr > entry->bytenr) {
9817                 u64 off_diff, offset;
9818
9819                 off_diff = dback->disk_bytenr - entry->bytenr;
9820                 offset = btrfs_file_extent_offset(leaf, fi);
9821                 if (dback->disk_bytenr + offset +
9822                     btrfs_file_extent_num_bytes(leaf, fi) >
9823                     entry->bytenr + entry->bytes) {
9824                         fprintf(stderr, "Ref is past the entry end, please "
9825                                 "take a btrfs-image of this file system and "
9826                                 "send it to a btrfs developer, ref %Lu\n",
9827                                 dback->disk_bytenr);
9828                         ret = -EINVAL;
9829                         goto out;
9830                 }
9831                 offset += off_diff;
9832                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9833                 btrfs_set_file_extent_offset(leaf, fi, offset);
9834         } else if (dback->disk_bytenr < entry->bytenr) {
9835                 u64 offset;
9836
9837                 offset = btrfs_file_extent_offset(leaf, fi);
9838                 if (dback->disk_bytenr + offset < entry->bytenr) {
9839                         fprintf(stderr, "Ref is before the entry start, please"
9840                                 " take a btrfs-image of this file system and "
9841                                 "send it to a btrfs developer, ref %Lu\n",
9842                                 dback->disk_bytenr);
9843                         ret = -EINVAL;
9844                         goto out;
9845                 }
9846
9847                 offset += dback->disk_bytenr;
9848                 offset -= entry->bytenr;
9849                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9850                 btrfs_set_file_extent_offset(leaf, fi, offset);
9851         }
9852
9853         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9854
9855         /*
9856          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9857          * only do this if we aren't using compression, otherwise it's a
9858          * trickier case.
9859          */
9860         if (!btrfs_file_extent_compression(leaf, fi))
9861                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9862         else
9863                 printf("ram bytes may be wrong?\n");
9864         btrfs_mark_buffer_dirty(leaf);
9865 out:
9866         err = btrfs_commit_transaction(trans, root);
9867         btrfs_release_path(path);
9868         return ret ? ret : err;
9869 }
9870
9871 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9872                            struct extent_record *rec)
9873 {
9874         struct extent_backref *back, *tmp;
9875         struct data_backref *dback;
9876         struct extent_entry *entry, *best = NULL;
9877         LIST_HEAD(entries);
9878         int nr_entries = 0;
9879         int broken_entries = 0;
9880         int ret = 0;
9881         short mismatch = 0;
9882
9883         /*
9884          * Metadata is easy and the backrefs should always agree on bytenr and
9885          * size, if not we've got bigger issues.
9886          */
9887         if (rec->metadata)
9888                 return 0;
9889
9890         rbtree_postorder_for_each_entry_safe(back, tmp,
9891                                              &rec->backref_tree, node) {
9892                 if (back->full_backref || !back->is_data)
9893                         continue;
9894
9895                 dback = to_data_backref(back);
9896
9897                 /*
9898                  * We only pay attention to backrefs that we found a real
9899                  * backref for.
9900                  */
9901                 if (dback->found_ref == 0)
9902                         continue;
9903
9904                 /*
9905                  * For now we only catch when the bytes don't match, not the
9906                  * bytenr.  We can easily do this at the same time, but I want
9907                  * to have a fs image to test on before we just add repair
9908                  * functionality willy-nilly so we know we won't screw up the
9909                  * repair.
9910                  */
9911
9912                 entry = find_entry(&entries, dback->disk_bytenr,
9913                                    dback->bytes);
9914                 if (!entry) {
9915                         entry = malloc(sizeof(struct extent_entry));
9916                         if (!entry) {
9917                                 ret = -ENOMEM;
9918                                 goto out;
9919                         }
9920                         memset(entry, 0, sizeof(*entry));
9921                         entry->bytenr = dback->disk_bytenr;
9922                         entry->bytes = dback->bytes;
9923                         list_add_tail(&entry->list, &entries);
9924                         nr_entries++;
9925                 }
9926
9927                 /*
9928                  * If we only have on entry we may think the entries agree when
9929                  * in reality they don't so we have to do some extra checking.
9930                  */
9931                 if (dback->disk_bytenr != rec->start ||
9932                     dback->bytes != rec->nr || back->broken)
9933                         mismatch = 1;
9934
9935                 if (back->broken) {
9936                         entry->broken++;
9937                         broken_entries++;
9938                 }
9939
9940                 entry->count++;
9941         }
9942
9943         /* Yay all the backrefs agree, carry on good sir */
9944         if (nr_entries <= 1 && !mismatch)
9945                 goto out;
9946
9947         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9948                 "%Lu\n", rec->start);
9949
9950         /*
9951          * First we want to see if the backrefs can agree amongst themselves who
9952          * is right, so figure out which one of the entries has the highest
9953          * count.
9954          */
9955         best = find_most_right_entry(&entries);
9956
9957         /*
9958          * Ok so we may have an even split between what the backrefs think, so
9959          * this is where we use the extent ref to see what it thinks.
9960          */
9961         if (!best) {
9962                 entry = find_entry(&entries, rec->start, rec->nr);
9963                 if (!entry && (!broken_entries || !rec->found_rec)) {
9964                         fprintf(stderr, "Backrefs don't agree with each other "
9965                                 "and extent record doesn't agree with anybody,"
9966                                 " so we can't fix bytenr %Lu bytes %Lu\n",
9967                                 rec->start, rec->nr);
9968                         ret = -EINVAL;
9969                         goto out;
9970                 } else if (!entry) {
9971                         /*
9972                          * Ok our backrefs were broken, we'll assume this is the
9973                          * correct value and add an entry for this range.
9974                          */
9975                         entry = malloc(sizeof(struct extent_entry));
9976                         if (!entry) {
9977                                 ret = -ENOMEM;
9978                                 goto out;
9979                         }
9980                         memset(entry, 0, sizeof(*entry));
9981                         entry->bytenr = rec->start;
9982                         entry->bytes = rec->nr;
9983                         list_add_tail(&entry->list, &entries);
9984                         nr_entries++;
9985                 }
9986                 entry->count++;
9987                 best = find_most_right_entry(&entries);
9988                 if (!best) {
9989                         fprintf(stderr, "Backrefs and extent record evenly "
9990                                 "split on who is right, this is going to "
9991                                 "require user input to fix bytenr %Lu bytes "
9992                                 "%Lu\n", rec->start, rec->nr);
9993                         ret = -EINVAL;
9994                         goto out;
9995                 }
9996         }
9997
9998         /*
9999          * I don't think this can happen currently as we'll abort() if we catch
10000          * this case higher up, but in case somebody removes that we still can't
10001          * deal with it properly here yet, so just bail out of that's the case.
10002          */
10003         if (best->bytenr != rec->start) {
10004                 fprintf(stderr, "Extent start and backref starts don't match, "
10005                         "please use btrfs-image on this file system and send "
10006                         "it to a btrfs developer so they can make fsck fix "
10007                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
10008                         rec->start, rec->nr);
10009                 ret = -EINVAL;
10010                 goto out;
10011         }
10012
10013         /*
10014          * Ok great we all agreed on an extent record, let's go find the real
10015          * references and fix up the ones that don't match.
10016          */
10017         rbtree_postorder_for_each_entry_safe(back, tmp,
10018                                              &rec->backref_tree, node) {
10019                 if (back->full_backref || !back->is_data)
10020                         continue;
10021
10022                 dback = to_data_backref(back);
10023
10024                 /*
10025                  * Still ignoring backrefs that don't have a real ref attached
10026                  * to them.
10027                  */
10028                 if (dback->found_ref == 0)
10029                         continue;
10030
10031                 if (dback->bytes == best->bytes &&
10032                     dback->disk_bytenr == best->bytenr)
10033                         continue;
10034
10035                 ret = repair_ref(info, path, dback, best);
10036                 if (ret)
10037                         goto out;
10038         }
10039
10040         /*
10041          * Ok we messed with the actual refs, which means we need to drop our
10042          * entire cache and go back and rescan.  I know this is a huge pain and
10043          * adds a lot of extra work, but it's the only way to be safe.  Once all
10044          * the backrefs agree we may not need to do anything to the extent
10045          * record itself.
10046          */
10047         ret = -EAGAIN;
10048 out:
10049         while (!list_empty(&entries)) {
10050                 entry = list_entry(entries.next, struct extent_entry, list);
10051                 list_del_init(&entry->list);
10052                 free(entry);
10053         }
10054         return ret;
10055 }
10056
10057 static int process_duplicates(struct cache_tree *extent_cache,
10058                               struct extent_record *rec)
10059 {
10060         struct extent_record *good, *tmp;
10061         struct cache_extent *cache;
10062         int ret;
10063
10064         /*
10065          * If we found a extent record for this extent then return, or if we
10066          * have more than one duplicate we are likely going to need to delete
10067          * something.
10068          */
10069         if (rec->found_rec || rec->num_duplicates > 1)
10070                 return 0;
10071
10072         /* Shouldn't happen but just in case */
10073         BUG_ON(!rec->num_duplicates);
10074
10075         /*
10076          * So this happens if we end up with a backref that doesn't match the
10077          * actual extent entry.  So either the backref is bad or the extent
10078          * entry is bad.  Either way we want to have the extent_record actually
10079          * reflect what we found in the extent_tree, so we need to take the
10080          * duplicate out and use that as the extent_record since the only way we
10081          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
10082          */
10083         remove_cache_extent(extent_cache, &rec->cache);
10084
10085         good = to_extent_record(rec->dups.next);
10086         list_del_init(&good->list);
10087         INIT_LIST_HEAD(&good->backrefs);
10088         INIT_LIST_HEAD(&good->dups);
10089         good->cache.start = good->start;
10090         good->cache.size = good->nr;
10091         good->content_checked = 0;
10092         good->owner_ref_checked = 0;
10093         good->num_duplicates = 0;
10094         good->refs = rec->refs;
10095         list_splice_init(&rec->backrefs, &good->backrefs);
10096         while (1) {
10097                 cache = lookup_cache_extent(extent_cache, good->start,
10098                                             good->nr);
10099                 if (!cache)
10100                         break;
10101                 tmp = container_of(cache, struct extent_record, cache);
10102
10103                 /*
10104                  * If we find another overlapping extent and it's found_rec is
10105                  * set then it's a duplicate and we need to try and delete
10106                  * something.
10107                  */
10108                 if (tmp->found_rec || tmp->num_duplicates > 0) {
10109                         if (list_empty(&good->list))
10110                                 list_add_tail(&good->list,
10111                                               &duplicate_extents);
10112                         good->num_duplicates += tmp->num_duplicates + 1;
10113                         list_splice_init(&tmp->dups, &good->dups);
10114                         list_del_init(&tmp->list);
10115                         list_add_tail(&tmp->list, &good->dups);
10116                         remove_cache_extent(extent_cache, &tmp->cache);
10117                         continue;
10118                 }
10119
10120                 /*
10121                  * Ok we have another non extent item backed extent rec, so lets
10122                  * just add it to this extent and carry on like we did above.
10123                  */
10124                 good->refs += tmp->refs;
10125                 list_splice_init(&tmp->backrefs, &good->backrefs);
10126                 remove_cache_extent(extent_cache, &tmp->cache);
10127                 free(tmp);
10128         }
10129         ret = insert_cache_extent(extent_cache, &good->cache);
10130         BUG_ON(ret);
10131         free(rec);
10132         return good->num_duplicates ? 0 : 1;
10133 }
10134
10135 static int delete_duplicate_records(struct btrfs_root *root,
10136                                     struct extent_record *rec)
10137 {
10138         struct btrfs_trans_handle *trans;
10139         LIST_HEAD(delete_list);
10140         struct btrfs_path path;
10141         struct extent_record *tmp, *good, *n;
10142         int nr_del = 0;
10143         int ret = 0, err;
10144         struct btrfs_key key;
10145
10146         btrfs_init_path(&path);
10147
10148         good = rec;
10149         /* Find the record that covers all of the duplicates. */
10150         list_for_each_entry(tmp, &rec->dups, list) {
10151                 if (good->start < tmp->start)
10152                         continue;
10153                 if (good->nr > tmp->nr)
10154                         continue;
10155
10156                 if (tmp->start + tmp->nr < good->start + good->nr) {
10157                         fprintf(stderr, "Ok we have overlapping extents that "
10158                                 "aren't completely covered by each other, this "
10159                                 "is going to require more careful thought.  "
10160                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
10161                                 tmp->start, tmp->nr, good->start, good->nr);
10162                         abort();
10163                 }
10164                 good = tmp;
10165         }
10166
10167         if (good != rec)
10168                 list_add_tail(&rec->list, &delete_list);
10169
10170         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10171                 if (tmp == good)
10172                         continue;
10173                 list_move_tail(&tmp->list, &delete_list);
10174         }
10175
10176         root = root->fs_info->extent_root;
10177         trans = btrfs_start_transaction(root, 1);
10178         if (IS_ERR(trans)) {
10179                 ret = PTR_ERR(trans);
10180                 goto out;
10181         }
10182
10183         list_for_each_entry(tmp, &delete_list, list) {
10184                 if (tmp->found_rec == 0)
10185                         continue;
10186                 key.objectid = tmp->start;
10187                 key.type = BTRFS_EXTENT_ITEM_KEY;
10188                 key.offset = tmp->nr;
10189
10190                 /* Shouldn't happen but just in case */
10191                 if (tmp->metadata) {
10192                         fprintf(stderr, "Well this shouldn't happen, extent "
10193                                 "record overlaps but is metadata? "
10194                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10195                         abort();
10196                 }
10197
10198                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10199                 if (ret) {
10200                         if (ret > 0)
10201                                 ret = -EINVAL;
10202                         break;
10203                 }
10204                 ret = btrfs_del_item(trans, root, &path);
10205                 if (ret)
10206                         break;
10207                 btrfs_release_path(&path);
10208                 nr_del++;
10209         }
10210         err = btrfs_commit_transaction(trans, root);
10211         if (err && !ret)
10212                 ret = err;
10213 out:
10214         while (!list_empty(&delete_list)) {
10215                 tmp = to_extent_record(delete_list.next);
10216                 list_del_init(&tmp->list);
10217                 if (tmp == rec)
10218                         continue;
10219                 free(tmp);
10220         }
10221
10222         while (!list_empty(&rec->dups)) {
10223                 tmp = to_extent_record(rec->dups.next);
10224                 list_del_init(&tmp->list);
10225                 free(tmp);
10226         }
10227
10228         btrfs_release_path(&path);
10229
10230         if (!ret && !nr_del)
10231                 rec->num_duplicates = 0;
10232
10233         return ret ? ret : nr_del;
10234 }
10235
10236 static int find_possible_backrefs(struct btrfs_fs_info *info,
10237                                   struct btrfs_path *path,
10238                                   struct cache_tree *extent_cache,
10239                                   struct extent_record *rec)
10240 {
10241         struct btrfs_root *root;
10242         struct extent_backref *back, *tmp;
10243         struct data_backref *dback;
10244         struct cache_extent *cache;
10245         struct btrfs_file_extent_item *fi;
10246         struct btrfs_key key;
10247         u64 bytenr, bytes;
10248         int ret;
10249
10250         rbtree_postorder_for_each_entry_safe(back, tmp,
10251                                              &rec->backref_tree, node) {
10252                 /* Don't care about full backrefs (poor unloved backrefs) */
10253                 if (back->full_backref || !back->is_data)
10254                         continue;
10255
10256                 dback = to_data_backref(back);
10257
10258                 /* We found this one, we don't need to do a lookup */
10259                 if (dback->found_ref)
10260                         continue;
10261
10262                 key.objectid = dback->root;
10263                 key.type = BTRFS_ROOT_ITEM_KEY;
10264                 key.offset = (u64)-1;
10265
10266                 root = btrfs_read_fs_root(info, &key);
10267
10268                 /* No root, definitely a bad ref, skip */
10269                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10270                         continue;
10271                 /* Other err, exit */
10272                 if (IS_ERR(root))
10273                         return PTR_ERR(root);
10274
10275                 key.objectid = dback->owner;
10276                 key.type = BTRFS_EXTENT_DATA_KEY;
10277                 key.offset = dback->offset;
10278                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10279                 if (ret) {
10280                         btrfs_release_path(path);
10281                         if (ret < 0)
10282                                 return ret;
10283                         /* Didn't find it, we can carry on */
10284                         ret = 0;
10285                         continue;
10286                 }
10287
10288                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10289                                     struct btrfs_file_extent_item);
10290                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10291                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10292                 btrfs_release_path(path);
10293                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10294                 if (cache) {
10295                         struct extent_record *tmp;
10296                         tmp = container_of(cache, struct extent_record, cache);
10297
10298                         /*
10299                          * If we found an extent record for the bytenr for this
10300                          * particular backref then we can't add it to our
10301                          * current extent record.  We only want to add backrefs
10302                          * that don't have a corresponding extent item in the
10303                          * extent tree since they likely belong to this record
10304                          * and we need to fix it if it doesn't match bytenrs.
10305                          */
10306                         if  (tmp->found_rec)
10307                                 continue;
10308                 }
10309
10310                 dback->found_ref += 1;
10311                 dback->disk_bytenr = bytenr;
10312                 dback->bytes = bytes;
10313
10314                 /*
10315                  * Set this so the verify backref code knows not to trust the
10316                  * values in this backref.
10317                  */
10318                 back->broken = 1;
10319         }
10320
10321         return 0;
10322 }
10323
10324 /*
10325  * Record orphan data ref into corresponding root.
10326  *
10327  * Return 0 if the extent item contains data ref and recorded.
10328  * Return 1 if the extent item contains no useful data ref
10329  *   On that case, it may contains only shared_dataref or metadata backref
10330  *   or the file extent exists(this should be handled by the extent bytenr
10331  *   recovery routine)
10332  * Return <0 if something goes wrong.
10333  */
10334 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10335                                       struct extent_record *rec)
10336 {
10337         struct btrfs_key key;
10338         struct btrfs_root *dest_root;
10339         struct extent_backref *back, *tmp;
10340         struct data_backref *dback;
10341         struct orphan_data_extent *orphan;
10342         struct btrfs_path path;
10343         int recorded_data_ref = 0;
10344         int ret = 0;
10345
10346         if (rec->metadata)
10347                 return 1;
10348         btrfs_init_path(&path);
10349         rbtree_postorder_for_each_entry_safe(back, tmp,
10350                                              &rec->backref_tree, node) {
10351                 if (back->full_backref || !back->is_data ||
10352                     !back->found_extent_tree)
10353                         continue;
10354                 dback = to_data_backref(back);
10355                 if (dback->found_ref)
10356                         continue;
10357                 key.objectid = dback->root;
10358                 key.type = BTRFS_ROOT_ITEM_KEY;
10359                 key.offset = (u64)-1;
10360
10361                 dest_root = btrfs_read_fs_root(fs_info, &key);
10362
10363                 /* For non-exist root we just skip it */
10364                 if (IS_ERR(dest_root) || !dest_root)
10365                         continue;
10366
10367                 key.objectid = dback->owner;
10368                 key.type = BTRFS_EXTENT_DATA_KEY;
10369                 key.offset = dback->offset;
10370
10371                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10372                 btrfs_release_path(&path);
10373                 /*
10374                  * For ret < 0, it's OK since the fs-tree may be corrupted,
10375                  * we need to record it for inode/file extent rebuild.
10376                  * For ret > 0, we record it only for file extent rebuild.
10377                  * For ret == 0, the file extent exists but only bytenr
10378                  * mismatch, let the original bytenr fix routine to handle,
10379                  * don't record it.
10380                  */
10381                 if (ret == 0)
10382                         continue;
10383                 ret = 0;
10384                 orphan = malloc(sizeof(*orphan));
10385                 if (!orphan) {
10386                         ret = -ENOMEM;
10387                         goto out;
10388                 }
10389                 INIT_LIST_HEAD(&orphan->list);
10390                 orphan->root = dback->root;
10391                 orphan->objectid = dback->owner;
10392                 orphan->offset = dback->offset;
10393                 orphan->disk_bytenr = rec->cache.start;
10394                 orphan->disk_len = rec->cache.size;
10395                 list_add(&dest_root->orphan_data_extents, &orphan->list);
10396                 recorded_data_ref = 1;
10397         }
10398 out:
10399         btrfs_release_path(&path);
10400         if (!ret)
10401                 return !recorded_data_ref;
10402         else
10403                 return ret;
10404 }
10405
10406 /*
10407  * when an incorrect extent item is found, this will delete
10408  * all of the existing entries for it and recreate them
10409  * based on what the tree scan found.
10410  */
10411 static int fixup_extent_refs(struct btrfs_fs_info *info,
10412                              struct cache_tree *extent_cache,
10413                              struct extent_record *rec)
10414 {
10415         struct btrfs_trans_handle *trans = NULL;
10416         int ret;
10417         struct btrfs_path path;
10418         struct cache_extent *cache;
10419         struct extent_backref *back, *tmp;
10420         int allocated = 0;
10421         u64 flags = 0;
10422
10423         if (rec->flag_block_full_backref)
10424                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10425
10426         btrfs_init_path(&path);
10427         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10428                 /*
10429                  * Sometimes the backrefs themselves are so broken they don't
10430                  * get attached to any meaningful rec, so first go back and
10431                  * check any of our backrefs that we couldn't find and throw
10432                  * them into the list if we find the backref so that
10433                  * verify_backrefs can figure out what to do.
10434                  */
10435                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10436                 if (ret < 0)
10437                         goto out;
10438         }
10439
10440         /* step one, make sure all of the backrefs agree */
10441         ret = verify_backrefs(info, &path, rec);
10442         if (ret < 0)
10443                 goto out;
10444
10445         trans = btrfs_start_transaction(info->extent_root, 1);
10446         if (IS_ERR(trans)) {
10447                 ret = PTR_ERR(trans);
10448                 goto out;
10449         }
10450
10451         /* step two, delete all the existing records */
10452         ret = delete_extent_records(trans, info->extent_root, &path,
10453                                     rec->start);
10454
10455         if (ret < 0)
10456                 goto out;
10457
10458         /* was this block corrupt?  If so, don't add references to it */
10459         cache = lookup_cache_extent(info->corrupt_blocks,
10460                                     rec->start, rec->max_size);
10461         if (cache) {
10462                 ret = 0;
10463                 goto out;
10464         }
10465
10466         /* step three, recreate all the refs we did find */
10467         rbtree_postorder_for_each_entry_safe(back, tmp,
10468                                              &rec->backref_tree, node) {
10469                 /*
10470                  * if we didn't find any references, don't create a
10471                  * new extent record
10472                  */
10473                 if (!back->found_ref)
10474                         continue;
10475
10476                 rec->bad_full_backref = 0;
10477                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10478                 allocated = 1;
10479
10480                 if (ret)
10481                         goto out;
10482         }
10483 out:
10484         if (trans) {
10485                 int err = btrfs_commit_transaction(trans, info->extent_root);
10486                 if (!ret)
10487                         ret = err;
10488         }
10489
10490         if (!ret)
10491                 fprintf(stderr, "Repaired extent references for %llu\n",
10492                                 (unsigned long long)rec->start);
10493
10494         btrfs_release_path(&path);
10495         return ret;
10496 }
10497
10498 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10499                               struct extent_record *rec)
10500 {
10501         struct btrfs_trans_handle *trans;
10502         struct btrfs_root *root = fs_info->extent_root;
10503         struct btrfs_path path;
10504         struct btrfs_extent_item *ei;
10505         struct btrfs_key key;
10506         u64 flags;
10507         int ret = 0;
10508
10509         key.objectid = rec->start;
10510         if (rec->metadata) {
10511                 key.type = BTRFS_METADATA_ITEM_KEY;
10512                 key.offset = rec->info_level;
10513         } else {
10514                 key.type = BTRFS_EXTENT_ITEM_KEY;
10515                 key.offset = rec->max_size;
10516         }
10517
10518         trans = btrfs_start_transaction(root, 0);
10519         if (IS_ERR(trans))
10520                 return PTR_ERR(trans);
10521
10522         btrfs_init_path(&path);
10523         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10524         if (ret < 0) {
10525                 btrfs_release_path(&path);
10526                 btrfs_commit_transaction(trans, root);
10527                 return ret;
10528         } else if (ret) {
10529                 fprintf(stderr, "Didn't find extent for %llu\n",
10530                         (unsigned long long)rec->start);
10531                 btrfs_release_path(&path);
10532                 btrfs_commit_transaction(trans, root);
10533                 return -ENOENT;
10534         }
10535
10536         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10537                             struct btrfs_extent_item);
10538         flags = btrfs_extent_flags(path.nodes[0], ei);
10539         if (rec->flag_block_full_backref) {
10540                 fprintf(stderr, "setting full backref on %llu\n",
10541                         (unsigned long long)key.objectid);
10542                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10543         } else {
10544                 fprintf(stderr, "clearing full backref on %llu\n",
10545                         (unsigned long long)key.objectid);
10546                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10547         }
10548         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10549         btrfs_mark_buffer_dirty(path.nodes[0]);
10550         btrfs_release_path(&path);
10551         ret = btrfs_commit_transaction(trans, root);
10552         if (!ret)
10553                 fprintf(stderr, "Repaired extent flags for %llu\n",
10554                                 (unsigned long long)rec->start);
10555
10556         return ret;
10557 }
10558
10559 /* right now we only prune from the extent allocation tree */
10560 static int prune_one_block(struct btrfs_trans_handle *trans,
10561                            struct btrfs_fs_info *info,
10562                            struct btrfs_corrupt_block *corrupt)
10563 {
10564         int ret;
10565         struct btrfs_path path;
10566         struct extent_buffer *eb;
10567         u64 found;
10568         int slot;
10569         int nritems;
10570         int level = corrupt->level + 1;
10571
10572         btrfs_init_path(&path);
10573 again:
10574         /* we want to stop at the parent to our busted block */
10575         path.lowest_level = level;
10576
10577         ret = btrfs_search_slot(trans, info->extent_root,
10578                                 &corrupt->key, &path, -1, 1);
10579
10580         if (ret < 0)
10581                 goto out;
10582
10583         eb = path.nodes[level];
10584         if (!eb) {
10585                 ret = -ENOENT;
10586                 goto out;
10587         }
10588
10589         /*
10590          * hopefully the search gave us the block we want to prune,
10591          * lets try that first
10592          */
10593         slot = path.slots[level];
10594         found =  btrfs_node_blockptr(eb, slot);
10595         if (found == corrupt->cache.start)
10596                 goto del_ptr;
10597
10598         nritems = btrfs_header_nritems(eb);
10599
10600         /* the search failed, lets scan this node and hope we find it */
10601         for (slot = 0; slot < nritems; slot++) {
10602                 found =  btrfs_node_blockptr(eb, slot);
10603                 if (found == corrupt->cache.start)
10604                         goto del_ptr;
10605         }
10606         /*
10607          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10608          * to this block
10609          */
10610         if (eb == info->extent_root->node) {
10611                 ret = -ENOENT;
10612                 goto out;
10613         } else {
10614                 level++;
10615                 btrfs_release_path(&path);
10616                 goto again;
10617         }
10618
10619 del_ptr:
10620         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10621         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10622
10623 out:
10624         btrfs_release_path(&path);
10625         return ret;
10626 }
10627
10628 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10629 {
10630         struct btrfs_trans_handle *trans = NULL;
10631         struct cache_extent *cache;
10632         struct btrfs_corrupt_block *corrupt;
10633
10634         while (1) {
10635                 cache = search_cache_extent(info->corrupt_blocks, 0);
10636                 if (!cache)
10637                         break;
10638                 if (!trans) {
10639                         trans = btrfs_start_transaction(info->extent_root, 1);
10640                         if (IS_ERR(trans))
10641                                 return PTR_ERR(trans);
10642                 }
10643                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10644                 prune_one_block(trans, info, corrupt);
10645                 remove_cache_extent(info->corrupt_blocks, cache);
10646         }
10647         if (trans)
10648                 return btrfs_commit_transaction(trans, info->extent_root);
10649         return 0;
10650 }
10651
10652 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10653 {
10654         struct btrfs_block_group_cache *cache;
10655         u64 start, end;
10656         int ret;
10657
10658         while (1) {
10659                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10660                                             &start, &end, EXTENT_DIRTY);
10661                 if (ret)
10662                         break;
10663                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10664         }
10665
10666         start = 0;
10667         while (1) {
10668                 cache = btrfs_lookup_first_block_group(fs_info, start);
10669                 if (!cache)
10670                         break;
10671                 if (cache->cached)
10672                         cache->cached = 0;
10673                 start = cache->key.objectid + cache->key.offset;
10674         }
10675 }
10676
10677 static int check_extent_refs(struct btrfs_root *root,
10678                              struct cache_tree *extent_cache)
10679 {
10680         struct extent_record *rec;
10681         struct cache_extent *cache;
10682         int ret = 0;
10683         int had_dups = 0;
10684
10685         if (repair) {
10686                 /*
10687                  * if we're doing a repair, we have to make sure
10688                  * we don't allocate from the problem extents.
10689                  * In the worst case, this will be all the
10690                  * extents in the FS
10691                  */
10692                 cache = search_cache_extent(extent_cache, 0);
10693                 while(cache) {
10694                         rec = container_of(cache, struct extent_record, cache);
10695                         set_extent_dirty(root->fs_info->excluded_extents,
10696                                          rec->start,
10697                                          rec->start + rec->max_size - 1);
10698                         cache = next_cache_extent(cache);
10699                 }
10700
10701                 /* pin down all the corrupted blocks too */
10702                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10703                 while(cache) {
10704                         set_extent_dirty(root->fs_info->excluded_extents,
10705                                          cache->start,
10706                                          cache->start + cache->size - 1);
10707                         cache = next_cache_extent(cache);
10708                 }
10709                 prune_corrupt_blocks(root->fs_info);
10710                 reset_cached_block_groups(root->fs_info);
10711         }
10712
10713         reset_cached_block_groups(root->fs_info);
10714
10715         /*
10716          * We need to delete any duplicate entries we find first otherwise we
10717          * could mess up the extent tree when we have backrefs that actually
10718          * belong to a different extent item and not the weird duplicate one.
10719          */
10720         while (repair && !list_empty(&duplicate_extents)) {
10721                 rec = to_extent_record(duplicate_extents.next);
10722                 list_del_init(&rec->list);
10723
10724                 /* Sometimes we can find a backref before we find an actual
10725                  * extent, so we need to process it a little bit to see if there
10726                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10727                  * if this is a backref screwup.  If we need to delete stuff
10728                  * process_duplicates() will return 0, otherwise it will return
10729                  * 1 and we
10730                  */
10731                 if (process_duplicates(extent_cache, rec))
10732                         continue;
10733                 ret = delete_duplicate_records(root, rec);
10734                 if (ret < 0)
10735                         return ret;
10736                 /*
10737                  * delete_duplicate_records will return the number of entries
10738                  * deleted, so if it's greater than 0 then we know we actually
10739                  * did something and we need to remove.
10740                  */
10741                 if (ret)
10742                         had_dups = 1;
10743         }
10744
10745         if (had_dups)
10746                 return -EAGAIN;
10747
10748         while(1) {
10749                 int cur_err = 0;
10750                 int fix = 0;
10751
10752                 cache = search_cache_extent(extent_cache, 0);
10753                 if (!cache)
10754                         break;
10755                 rec = container_of(cache, struct extent_record, cache);
10756                 if (rec->num_duplicates) {
10757                         fprintf(stderr, "extent item %llu has multiple extent "
10758                                 "items\n", (unsigned long long)rec->start);
10759                         cur_err = 1;
10760                 }
10761
10762                 if (rec->refs != rec->extent_item_refs) {
10763                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10764                                 (unsigned long long)rec->start,
10765                                 (unsigned long long)rec->nr);
10766                         fprintf(stderr, "extent item %llu, found %llu\n",
10767                                 (unsigned long long)rec->extent_item_refs,
10768                                 (unsigned long long)rec->refs);
10769                         ret = record_orphan_data_extents(root->fs_info, rec);
10770                         if (ret < 0)
10771                                 goto repair_abort;
10772                         fix = ret;
10773                         cur_err = 1;
10774                 }
10775                 if (all_backpointers_checked(rec, 1)) {
10776                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10777                                 (unsigned long long)rec->start,
10778                                 (unsigned long long)rec->nr);
10779                         fix = 1;
10780                         cur_err = 1;
10781                 }
10782                 if (!rec->owner_ref_checked) {
10783                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10784                                 (unsigned long long)rec->start,
10785                                 (unsigned long long)rec->nr);
10786                         fix = 1;
10787                         cur_err = 1;
10788                 }
10789
10790                 if (repair && fix) {
10791                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10792                         if (ret)
10793                                 goto repair_abort;
10794                 }
10795
10796
10797                 if (rec->bad_full_backref) {
10798                         fprintf(stderr, "bad full backref, on [%llu]\n",
10799                                 (unsigned long long)rec->start);
10800                         if (repair) {
10801                                 ret = fixup_extent_flags(root->fs_info, rec);
10802                                 if (ret)
10803                                         goto repair_abort;
10804                                 fix = 1;
10805                         }
10806                         cur_err = 1;
10807                 }
10808                 /*
10809                  * Although it's not a extent ref's problem, we reuse this
10810                  * routine for error reporting.
10811                  * No repair function yet.
10812                  */
10813                 if (rec->crossing_stripes) {
10814                         fprintf(stderr,
10815                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10816                                 rec->start, rec->start + rec->max_size);
10817                         cur_err = 1;
10818                 }
10819
10820                 if (rec->wrong_chunk_type) {
10821                         fprintf(stderr,
10822                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10823                                 rec->start, rec->start + rec->max_size);
10824                         cur_err = 1;
10825                 }
10826
10827                 remove_cache_extent(extent_cache, cache);
10828                 free_all_extent_backrefs(rec);
10829                 if (!init_extent_tree && repair && (!cur_err || fix))
10830                         clear_extent_dirty(root->fs_info->excluded_extents,
10831                                            rec->start,
10832                                            rec->start + rec->max_size - 1);
10833                 free(rec);
10834         }
10835 repair_abort:
10836         if (repair) {
10837                 if (ret && ret != -EAGAIN) {
10838                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10839                         exit(1);
10840                 } else if (!ret) {
10841                         struct btrfs_trans_handle *trans;
10842
10843                         root = root->fs_info->extent_root;
10844                         trans = btrfs_start_transaction(root, 1);
10845                         if (IS_ERR(trans)) {
10846                                 ret = PTR_ERR(trans);
10847                                 goto repair_abort;
10848                         }
10849
10850                         ret = btrfs_fix_block_accounting(trans, root);
10851                         if (ret)
10852                                 goto repair_abort;
10853                         ret = btrfs_commit_transaction(trans, root);
10854                         if (ret)
10855                                 goto repair_abort;
10856                 }
10857                 return ret;
10858         }
10859         return 0;
10860 }
10861
10862 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10863 {
10864         u64 stripe_size;
10865
10866         if (type & BTRFS_BLOCK_GROUP_RAID0) {
10867                 stripe_size = length;
10868                 stripe_size /= num_stripes;
10869         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10870                 stripe_size = length * 2;
10871                 stripe_size /= num_stripes;
10872         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10873                 stripe_size = length;
10874                 stripe_size /= (num_stripes - 1);
10875         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10876                 stripe_size = length;
10877                 stripe_size /= (num_stripes - 2);
10878         } else {
10879                 stripe_size = length;
10880         }
10881         return stripe_size;
10882 }
10883
10884 /*
10885  * Check the chunk with its block group/dev list ref:
10886  * Return 0 if all refs seems valid.
10887  * Return 1 if part of refs seems valid, need later check for rebuild ref
10888  * like missing block group and needs to search extent tree to rebuild them.
10889  * Return -1 if essential refs are missing and unable to rebuild.
10890  */
10891 static int check_chunk_refs(struct chunk_record *chunk_rec,
10892                             struct block_group_tree *block_group_cache,
10893                             struct device_extent_tree *dev_extent_cache,
10894                             int silent)
10895 {
10896         struct cache_extent *block_group_item;
10897         struct block_group_record *block_group_rec;
10898         struct cache_extent *dev_extent_item;
10899         struct device_extent_record *dev_extent_rec;
10900         u64 devid;
10901         u64 offset;
10902         u64 length;
10903         int metadump_v2 = 0;
10904         int i;
10905         int ret = 0;
10906
10907         block_group_item = lookup_cache_extent(&block_group_cache->tree,
10908                                                chunk_rec->offset,
10909                                                chunk_rec->length);
10910         if (block_group_item) {
10911                 block_group_rec = container_of(block_group_item,
10912                                                struct block_group_record,
10913                                                cache);
10914                 if (chunk_rec->length != block_group_rec->offset ||
10915                     chunk_rec->offset != block_group_rec->objectid ||
10916                     (!metadump_v2 &&
10917                      chunk_rec->type_flags != block_group_rec->flags)) {
10918                         if (!silent)
10919                                 fprintf(stderr,
10920                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10921                                         chunk_rec->objectid,
10922                                         chunk_rec->type,
10923                                         chunk_rec->offset,
10924                                         chunk_rec->length,
10925                                         chunk_rec->offset,
10926                                         chunk_rec->type_flags,
10927                                         block_group_rec->objectid,
10928                                         block_group_rec->type,
10929                                         block_group_rec->offset,
10930                                         block_group_rec->offset,
10931                                         block_group_rec->objectid,
10932                                         block_group_rec->flags);
10933                         ret = -1;
10934                 } else {
10935                         list_del_init(&block_group_rec->list);
10936                         chunk_rec->bg_rec = block_group_rec;
10937                 }
10938         } else {
10939                 if (!silent)
10940                         fprintf(stderr,
10941                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10942                                 chunk_rec->objectid,
10943                                 chunk_rec->type,
10944                                 chunk_rec->offset,
10945                                 chunk_rec->length,
10946                                 chunk_rec->offset,
10947                                 chunk_rec->type_flags);
10948                 ret = 1;
10949         }
10950
10951         if (metadump_v2)
10952                 return ret;
10953
10954         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10955                                     chunk_rec->num_stripes);
10956         for (i = 0; i < chunk_rec->num_stripes; ++i) {
10957                 devid = chunk_rec->stripes[i].devid;
10958                 offset = chunk_rec->stripes[i].offset;
10959                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10960                                                        devid, offset, length);
10961                 if (dev_extent_item) {
10962                         dev_extent_rec = container_of(dev_extent_item,
10963                                                 struct device_extent_record,
10964                                                 cache);
10965                         if (dev_extent_rec->objectid != devid ||
10966                             dev_extent_rec->offset != offset ||
10967                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
10968                             dev_extent_rec->length != length) {
10969                                 if (!silent)
10970                                         fprintf(stderr,
10971                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10972                                                 chunk_rec->objectid,
10973                                                 chunk_rec->type,
10974                                                 chunk_rec->offset,
10975                                                 chunk_rec->stripes[i].devid,
10976                                                 chunk_rec->stripes[i].offset,
10977                                                 dev_extent_rec->objectid,
10978                                                 dev_extent_rec->offset,
10979                                                 dev_extent_rec->length);
10980                                 ret = -1;
10981                         } else {
10982                                 list_move(&dev_extent_rec->chunk_list,
10983                                           &chunk_rec->dextents);
10984                         }
10985                 } else {
10986                         if (!silent)
10987                                 fprintf(stderr,
10988                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10989                                         chunk_rec->objectid,
10990                                         chunk_rec->type,
10991                                         chunk_rec->offset,
10992                                         chunk_rec->stripes[i].devid,
10993                                         chunk_rec->stripes[i].offset);
10994                         ret = -1;
10995                 }
10996         }
10997         return ret;
10998 }
10999
11000 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
11001 int check_chunks(struct cache_tree *chunk_cache,
11002                  struct block_group_tree *block_group_cache,
11003                  struct device_extent_tree *dev_extent_cache,
11004                  struct list_head *good, struct list_head *bad,
11005                  struct list_head *rebuild, int silent)
11006 {
11007         struct cache_extent *chunk_item;
11008         struct chunk_record *chunk_rec;
11009         struct block_group_record *bg_rec;
11010         struct device_extent_record *dext_rec;
11011         int err;
11012         int ret = 0;
11013
11014         chunk_item = first_cache_extent(chunk_cache);
11015         while (chunk_item) {
11016                 chunk_rec = container_of(chunk_item, struct chunk_record,
11017                                          cache);
11018                 err = check_chunk_refs(chunk_rec, block_group_cache,
11019                                        dev_extent_cache, silent);
11020                 if (err < 0)
11021                         ret = err;
11022                 if (err == 0 && good)
11023                         list_add_tail(&chunk_rec->list, good);
11024                 if (err > 0 && rebuild)
11025                         list_add_tail(&chunk_rec->list, rebuild);
11026                 if (err < 0 && bad)
11027                         list_add_tail(&chunk_rec->list, bad);
11028                 chunk_item = next_cache_extent(chunk_item);
11029         }
11030
11031         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
11032                 if (!silent)
11033                         fprintf(stderr,
11034                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
11035                                 bg_rec->objectid,
11036                                 bg_rec->offset,
11037                                 bg_rec->flags);
11038                 if (!ret)
11039                         ret = 1;
11040         }
11041
11042         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
11043                             chunk_list) {
11044                 if (!silent)
11045                         fprintf(stderr,
11046                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
11047                                 dext_rec->objectid,
11048                                 dext_rec->offset,
11049                                 dext_rec->length);
11050                 if (!ret)
11051                         ret = 1;
11052         }
11053         return ret;
11054 }
11055
11056
11057 static int check_device_used(struct device_record *dev_rec,
11058                              struct device_extent_tree *dext_cache)
11059 {
11060         struct cache_extent *cache;
11061         struct device_extent_record *dev_extent_rec;
11062         u64 total_byte = 0;
11063
11064         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
11065         while (cache) {
11066                 dev_extent_rec = container_of(cache,
11067                                               struct device_extent_record,
11068                                               cache);
11069                 if (dev_extent_rec->objectid != dev_rec->devid)
11070                         break;
11071
11072                 list_del_init(&dev_extent_rec->device_list);
11073                 total_byte += dev_extent_rec->length;
11074                 cache = next_cache_extent(cache);
11075         }
11076
11077         if (total_byte != dev_rec->byte_used) {
11078                 fprintf(stderr,
11079                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
11080                         total_byte, dev_rec->byte_used, dev_rec->objectid,
11081                         dev_rec->type, dev_rec->offset);
11082                 return -1;
11083         } else {
11084                 return 0;
11085         }
11086 }
11087
11088 /* check btrfs_dev_item -> btrfs_dev_extent */
11089 static int check_devices(struct rb_root *dev_cache,
11090                          struct device_extent_tree *dev_extent_cache)
11091 {
11092         struct rb_node *dev_node;
11093         struct device_record *dev_rec;
11094         struct device_extent_record *dext_rec;
11095         int err;
11096         int ret = 0;
11097
11098         dev_node = rb_first(dev_cache);
11099         while (dev_node) {
11100                 dev_rec = container_of(dev_node, struct device_record, node);
11101                 err = check_device_used(dev_rec, dev_extent_cache);
11102                 if (err)
11103                         ret = err;
11104
11105                 dev_node = rb_next(dev_node);
11106         }
11107         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11108                             device_list) {
11109                 fprintf(stderr,
11110                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11111                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
11112                 if (!ret)
11113                         ret = 1;
11114         }
11115         return ret;
11116 }
11117
11118 static int add_root_item_to_list(struct list_head *head,
11119                                   u64 objectid, u64 bytenr, u64 last_snapshot,
11120                                   u8 level, u8 drop_level,
11121                                   struct btrfs_key *drop_key)
11122 {
11123
11124         struct root_item_record *ri_rec;
11125         ri_rec = malloc(sizeof(*ri_rec));
11126         if (!ri_rec)
11127                 return -ENOMEM;
11128         ri_rec->bytenr = bytenr;
11129         ri_rec->objectid = objectid;
11130         ri_rec->level = level;
11131         ri_rec->drop_level = drop_level;
11132         ri_rec->last_snapshot = last_snapshot;
11133         if (drop_key)
11134                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11135         list_add_tail(&ri_rec->list, head);
11136
11137         return 0;
11138 }
11139
11140 static void free_root_item_list(struct list_head *list)
11141 {
11142         struct root_item_record *ri_rec;
11143
11144         while (!list_empty(list)) {
11145                 ri_rec = list_first_entry(list, struct root_item_record,
11146                                           list);
11147                 list_del_init(&ri_rec->list);
11148                 free(ri_rec);
11149         }
11150 }
11151
11152 static int deal_root_from_list(struct list_head *list,
11153                                struct btrfs_root *root,
11154                                struct block_info *bits,
11155                                int bits_nr,
11156                                struct cache_tree *pending,
11157                                struct cache_tree *seen,
11158                                struct cache_tree *reada,
11159                                struct cache_tree *nodes,
11160                                struct cache_tree *extent_cache,
11161                                struct cache_tree *chunk_cache,
11162                                struct rb_root *dev_cache,
11163                                struct block_group_tree *block_group_cache,
11164                                struct device_extent_tree *dev_extent_cache)
11165 {
11166         int ret = 0;
11167         u64 last;
11168
11169         while (!list_empty(list)) {
11170                 struct root_item_record *rec;
11171                 struct extent_buffer *buf;
11172                 rec = list_entry(list->next,
11173                                  struct root_item_record, list);
11174                 last = 0;
11175                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11176                 if (!extent_buffer_uptodate(buf)) {
11177                         free_extent_buffer(buf);
11178                         ret = -EIO;
11179                         break;
11180                 }
11181                 ret = add_root_to_pending(buf, extent_cache, pending,
11182                                     seen, nodes, rec->objectid);
11183                 if (ret < 0)
11184                         break;
11185                 /*
11186                  * To rebuild extent tree, we need deal with snapshot
11187                  * one by one, otherwise we deal with node firstly which
11188                  * can maximize readahead.
11189                  */
11190                 while (1) {
11191                         ret = run_next_block(root, bits, bits_nr, &last,
11192                                              pending, seen, reada, nodes,
11193                                              extent_cache, chunk_cache,
11194                                              dev_cache, block_group_cache,
11195                                              dev_extent_cache, rec);
11196                         if (ret != 0)
11197                                 break;
11198                 }
11199                 free_extent_buffer(buf);
11200                 list_del(&rec->list);
11201                 free(rec);
11202                 if (ret < 0)
11203                         break;
11204         }
11205         while (ret >= 0) {
11206                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11207                                      reada, nodes, extent_cache, chunk_cache,
11208                                      dev_cache, block_group_cache,
11209                                      dev_extent_cache, NULL);
11210                 if (ret != 0) {
11211                         if (ret > 0)
11212                                 ret = 0;
11213                         break;
11214                 }
11215         }
11216         return ret;
11217 }
11218
11219 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11220 {
11221         struct rb_root dev_cache;
11222         struct cache_tree chunk_cache;
11223         struct block_group_tree block_group_cache;
11224         struct device_extent_tree dev_extent_cache;
11225         struct cache_tree extent_cache;
11226         struct cache_tree seen;
11227         struct cache_tree pending;
11228         struct cache_tree reada;
11229         struct cache_tree nodes;
11230         struct extent_io_tree excluded_extents;
11231         struct cache_tree corrupt_blocks;
11232         struct btrfs_path path;
11233         struct btrfs_key key;
11234         struct btrfs_key found_key;
11235         int ret, err = 0;
11236         struct block_info *bits;
11237         int bits_nr;
11238         struct extent_buffer *leaf;
11239         int slot;
11240         struct btrfs_root_item ri;
11241         struct list_head dropping_trees;
11242         struct list_head normal_trees;
11243         struct btrfs_root *root1;
11244         struct btrfs_root *root;
11245         u64 objectid;
11246         u8 level;
11247
11248         root = fs_info->fs_root;
11249         dev_cache = RB_ROOT;
11250         cache_tree_init(&chunk_cache);
11251         block_group_tree_init(&block_group_cache);
11252         device_extent_tree_init(&dev_extent_cache);
11253
11254         cache_tree_init(&extent_cache);
11255         cache_tree_init(&seen);
11256         cache_tree_init(&pending);
11257         cache_tree_init(&nodes);
11258         cache_tree_init(&reada);
11259         cache_tree_init(&corrupt_blocks);
11260         extent_io_tree_init(&excluded_extents);
11261         INIT_LIST_HEAD(&dropping_trees);
11262         INIT_LIST_HEAD(&normal_trees);
11263
11264         if (repair) {
11265                 fs_info->excluded_extents = &excluded_extents;
11266                 fs_info->fsck_extent_cache = &extent_cache;
11267                 fs_info->free_extent_hook = free_extent_hook;
11268                 fs_info->corrupt_blocks = &corrupt_blocks;
11269         }
11270
11271         bits_nr = 1024;
11272         bits = malloc(bits_nr * sizeof(struct block_info));
11273         if (!bits) {
11274                 perror("malloc");
11275                 exit(1);
11276         }
11277
11278         if (ctx.progress_enabled) {
11279                 ctx.tp = TASK_EXTENTS;
11280                 task_start(ctx.info);
11281         }
11282
11283 again:
11284         root1 = fs_info->tree_root;
11285         level = btrfs_header_level(root1->node);
11286         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11287                                     root1->node->start, 0, level, 0, NULL);
11288         if (ret < 0)
11289                 goto out;
11290         root1 = fs_info->chunk_root;
11291         level = btrfs_header_level(root1->node);
11292         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11293                                     root1->node->start, 0, level, 0, NULL);
11294         if (ret < 0)
11295                 goto out;
11296         btrfs_init_path(&path);
11297         key.offset = 0;
11298         key.objectid = 0;
11299         key.type = BTRFS_ROOT_ITEM_KEY;
11300         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11301         if (ret < 0)
11302                 goto out;
11303         while(1) {
11304                 leaf = path.nodes[0];
11305                 slot = path.slots[0];
11306                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11307                         ret = btrfs_next_leaf(root, &path);
11308                         if (ret != 0)
11309                                 break;
11310                         leaf = path.nodes[0];
11311                         slot = path.slots[0];
11312                 }
11313                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11314                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11315                         unsigned long offset;
11316                         u64 last_snapshot;
11317
11318                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11319                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11320                         last_snapshot = btrfs_root_last_snapshot(&ri);
11321                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11322                                 level = btrfs_root_level(&ri);
11323                                 ret = add_root_item_to_list(&normal_trees,
11324                                                 found_key.objectid,
11325                                                 btrfs_root_bytenr(&ri),
11326                                                 last_snapshot, level,
11327                                                 0, NULL);
11328                                 if (ret < 0)
11329                                         goto out;
11330                         } else {
11331                                 level = btrfs_root_level(&ri);
11332                                 objectid = found_key.objectid;
11333                                 btrfs_disk_key_to_cpu(&found_key,
11334                                                       &ri.drop_progress);
11335                                 ret = add_root_item_to_list(&dropping_trees,
11336                                                 objectid,
11337                                                 btrfs_root_bytenr(&ri),
11338                                                 last_snapshot, level,
11339                                                 ri.drop_level, &found_key);
11340                                 if (ret < 0)
11341                                         goto out;
11342                         }
11343                 }
11344                 path.slots[0]++;
11345         }
11346         btrfs_release_path(&path);
11347
11348         /*
11349          * check_block can return -EAGAIN if it fixes something, please keep
11350          * this in mind when dealing with return values from these functions, if
11351          * we get -EAGAIN we want to fall through and restart the loop.
11352          */
11353         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11354                                   &seen, &reada, &nodes, &extent_cache,
11355                                   &chunk_cache, &dev_cache, &block_group_cache,
11356                                   &dev_extent_cache);
11357         if (ret < 0) {
11358                 if (ret == -EAGAIN)
11359                         goto loop;
11360                 goto out;
11361         }
11362         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11363                                   &pending, &seen, &reada, &nodes,
11364                                   &extent_cache, &chunk_cache, &dev_cache,
11365                                   &block_group_cache, &dev_extent_cache);
11366         if (ret < 0) {
11367                 if (ret == -EAGAIN)
11368                         goto loop;
11369                 goto out;
11370         }
11371
11372         ret = check_chunks(&chunk_cache, &block_group_cache,
11373                            &dev_extent_cache, NULL, NULL, NULL, 0);
11374         if (ret) {
11375                 if (ret == -EAGAIN)
11376                         goto loop;
11377                 err = ret;
11378         }
11379
11380         ret = check_extent_refs(root, &extent_cache);
11381         if (ret < 0) {
11382                 if (ret == -EAGAIN)
11383                         goto loop;
11384                 goto out;
11385         }
11386
11387         ret = check_devices(&dev_cache, &dev_extent_cache);
11388         if (ret && err)
11389                 ret = err;
11390
11391 out:
11392         task_stop(ctx.info);
11393         if (repair) {
11394                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11395                 extent_io_tree_cleanup(&excluded_extents);
11396                 fs_info->fsck_extent_cache = NULL;
11397                 fs_info->free_extent_hook = NULL;
11398                 fs_info->corrupt_blocks = NULL;
11399                 fs_info->excluded_extents = NULL;
11400         }
11401         free(bits);
11402         free_chunk_cache_tree(&chunk_cache);
11403         free_device_cache_tree(&dev_cache);
11404         free_block_group_tree(&block_group_cache);
11405         free_device_extent_tree(&dev_extent_cache);
11406         free_extent_cache_tree(&seen);
11407         free_extent_cache_tree(&pending);
11408         free_extent_cache_tree(&reada);
11409         free_extent_cache_tree(&nodes);
11410         free_root_item_list(&normal_trees);
11411         free_root_item_list(&dropping_trees);
11412         return ret;
11413 loop:
11414         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11415         free_extent_cache_tree(&seen);
11416         free_extent_cache_tree(&pending);
11417         free_extent_cache_tree(&reada);
11418         free_extent_cache_tree(&nodes);
11419         free_chunk_cache_tree(&chunk_cache);
11420         free_block_group_tree(&block_group_cache);
11421         free_device_cache_tree(&dev_cache);
11422         free_device_extent_tree(&dev_extent_cache);
11423         free_extent_record_cache(&extent_cache);
11424         free_root_item_list(&normal_trees);
11425         free_root_item_list(&dropping_trees);
11426         extent_io_tree_cleanup(&excluded_extents);
11427         goto again;
11428 }
11429
11430 /*
11431  * Check backrefs of a tree block given by @bytenr or @eb.
11432  *
11433  * @root:       the root containing the @bytenr or @eb
11434  * @eb:         tree block extent buffer, can be NULL
11435  * @bytenr:     bytenr of the tree block to search
11436  * @level:      tree level of the tree block
11437  * @owner:      owner of the tree block
11438  *
11439  * Return >0 for any error found and output error message
11440  * Return 0 for no error found
11441  */
11442 static int check_tree_block_ref(struct btrfs_root *root,
11443                                 struct extent_buffer *eb, u64 bytenr,
11444                                 int level, u64 owner, struct node_refs *nrefs)
11445 {
11446         struct btrfs_key key;
11447         struct btrfs_root *extent_root = root->fs_info->extent_root;
11448         struct btrfs_path path;
11449         struct btrfs_extent_item *ei;
11450         struct btrfs_extent_inline_ref *iref;
11451         struct extent_buffer *leaf;
11452         unsigned long end;
11453         unsigned long ptr;
11454         int slot;
11455         int skinny_level;
11456         int root_level = btrfs_header_level(root->node);
11457         int type;
11458         u32 nodesize = root->fs_info->nodesize;
11459         u32 item_size;
11460         u64 offset;
11461         int tree_reloc_root = 0;
11462         int found_ref = 0;
11463         int err = 0;
11464         int ret;
11465         int strict = 1;
11466         int parent = 0;
11467
11468         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
11469             btrfs_header_bytenr(root->node) == bytenr)
11470                 tree_reloc_root = 1;
11471         btrfs_init_path(&path);
11472         key.objectid = bytenr;
11473         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11474                 key.type = BTRFS_METADATA_ITEM_KEY;
11475         else
11476                 key.type = BTRFS_EXTENT_ITEM_KEY;
11477         key.offset = (u64)-1;
11478
11479         /* Search for the backref in extent tree */
11480         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11481         if (ret < 0) {
11482                 err |= BACKREF_MISSING;
11483                 goto out;
11484         }
11485         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11486         if (ret) {
11487                 err |= BACKREF_MISSING;
11488                 goto out;
11489         }
11490
11491         leaf = path.nodes[0];
11492         slot = path.slots[0];
11493         btrfs_item_key_to_cpu(leaf, &key, slot);
11494
11495         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11496
11497         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11498                 skinny_level = (int)key.offset;
11499                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11500         } else {
11501                 struct btrfs_tree_block_info *info;
11502
11503                 info = (struct btrfs_tree_block_info *)(ei + 1);
11504                 skinny_level = btrfs_tree_block_level(leaf, info);
11505                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11506         }
11507
11508
11509         if (eb) {
11510                 u64 header_gen;
11511                 u64 extent_gen;
11512
11513                 /*
11514                  * Due to the feature of shared tree blocks, if the upper node
11515                  * is a fs root or shared node, the extent of checked node may
11516                  * not be updated until the next CoW.
11517                  */
11518                 if (nrefs)
11519                         strict = should_check_extent_strictly(root, nrefs,
11520                                         level);
11521                 if (!(btrfs_extent_flags(leaf, ei) &
11522                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11523                         error(
11524                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11525                                 key.objectid, nodesize,
11526                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11527                         err = BACKREF_MISMATCH;
11528                 }
11529                 header_gen = btrfs_header_generation(eb);
11530                 extent_gen = btrfs_extent_generation(leaf, ei);
11531                 if (header_gen != extent_gen) {
11532                         error(
11533         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11534                                 key.objectid, nodesize, header_gen,
11535                                 extent_gen);
11536                         err = BACKREF_MISMATCH;
11537                 }
11538                 if (level != skinny_level) {
11539                         error(
11540                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11541                                 key.objectid, nodesize, level, skinny_level);
11542                         err = BACKREF_MISMATCH;
11543                 }
11544                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11545                         error(
11546                         "extent[%llu %u] is referred by other roots than %llu",
11547                                 key.objectid, nodesize, root->objectid);
11548                         err = BACKREF_MISMATCH;
11549                 }
11550         }
11551
11552         /*
11553          * Iterate the extent/metadata item to find the exact backref
11554          */
11555         item_size = btrfs_item_size_nr(leaf, slot);
11556         ptr = (unsigned long)iref;
11557         end = (unsigned long)ei + item_size;
11558
11559         while (ptr < end) {
11560                 iref = (struct btrfs_extent_inline_ref *)ptr;
11561                 type = btrfs_extent_inline_ref_type(leaf, iref);
11562                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11563
11564                 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11565                         if (offset == root->objectid)
11566                                 found_ref = 1;
11567                         if (!strict && owner == offset)
11568                                 found_ref = 1;
11569                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11570                         /*
11571                          * Backref of tree reloc root points to itself, no need
11572                          * to check backref any more.
11573                          */
11574                         if (tree_reloc_root) {
11575                                 found_ref = 1;
11576                         } else {
11577                                 /*
11578                                  * Check if the backref points to valid
11579                                  * referencer
11580                                  */
11581                                 found_ref = !check_tree_block_ref( root, NULL,
11582                                                 offset, level + 1, owner,
11583                                                 NULL);
11584                         }
11585                 }
11586
11587                 if (found_ref)
11588                         break;
11589                 ptr += btrfs_extent_inline_ref_size(type);
11590         }
11591
11592         /*
11593          * Inlined extent item doesn't have what we need, check
11594          * TREE_BLOCK_REF_KEY
11595          */
11596         if (!found_ref) {
11597                 btrfs_release_path(&path);
11598                 key.objectid = bytenr;
11599                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11600                 key.offset = root->objectid;
11601
11602                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11603                 if (!ret)
11604                         found_ref = 1;
11605         }
11606         if (!found_ref)
11607                 err |= BACKREF_MISSING;
11608 out:
11609         btrfs_release_path(&path);
11610         if (nrefs && strict &&
11611             level < root_level && nrefs->full_backref[level + 1])
11612                 parent = nrefs->bytenr[level + 1];
11613         if (eb && (err & BACKREF_MISSING))
11614                 error(
11615         "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11616                       bytenr, nodesize, owner, level,
11617                       parent ? "parent" : "root",
11618                       parent ? parent : root->objectid);
11619         return err;
11620 }
11621
11622 /*
11623  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11624  *
11625  * Return >0 any error found and output error message
11626  * Return 0 for no error found
11627  */
11628 static int check_extent_data_item(struct btrfs_root *root,
11629                                   struct btrfs_path *pathp,
11630                                   struct node_refs *nrefs,  int account_bytes)
11631 {
11632         struct btrfs_file_extent_item *fi;
11633         struct extent_buffer *eb = pathp->nodes[0];
11634         struct btrfs_path path;
11635         struct btrfs_root *extent_root = root->fs_info->extent_root;
11636         struct btrfs_key fi_key;
11637         struct btrfs_key dbref_key;
11638         struct extent_buffer *leaf;
11639         struct btrfs_extent_item *ei;
11640         struct btrfs_extent_inline_ref *iref;
11641         struct btrfs_extent_data_ref *dref;
11642         u64 owner;
11643         u64 disk_bytenr;
11644         u64 disk_num_bytes;
11645         u64 extent_num_bytes;
11646         u64 extent_flags;
11647         u32 item_size;
11648         unsigned long end;
11649         unsigned long ptr;
11650         int type;
11651         u64 ref_root;
11652         int found_dbackref = 0;
11653         int slot = pathp->slots[0];
11654         int err = 0;
11655         int ret;
11656         int strict;
11657
11658         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11659         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11660
11661         /* Nothing to check for hole and inline data extents */
11662         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11663             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11664                 return 0;
11665
11666         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11667         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11668         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11669
11670         /* Check unaligned disk_num_bytes and num_bytes */
11671         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11672                 error(
11673 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11674                         fi_key.objectid, fi_key.offset, disk_num_bytes,
11675                         root->fs_info->sectorsize);
11676                 err |= BYTES_UNALIGNED;
11677         } else if (account_bytes) {
11678                 data_bytes_allocated += disk_num_bytes;
11679         }
11680         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11681                 error(
11682 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11683                         fi_key.objectid, fi_key.offset, extent_num_bytes,
11684                         root->fs_info->sectorsize);
11685                 err |= BYTES_UNALIGNED;
11686         } else if (account_bytes) {
11687                 data_bytes_referenced += extent_num_bytes;
11688         }
11689         owner = btrfs_header_owner(eb);
11690
11691         /* Check the extent item of the file extent in extent tree */
11692         btrfs_init_path(&path);
11693         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11694         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11695         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11696
11697         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11698         if (ret)
11699                 goto out;
11700
11701         leaf = path.nodes[0];
11702         slot = path.slots[0];
11703         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11704
11705         extent_flags = btrfs_extent_flags(leaf, ei);
11706
11707         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11708                 error(
11709                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11710                     disk_bytenr, disk_num_bytes,
11711                     BTRFS_EXTENT_FLAG_DATA);
11712                 err |= BACKREF_MISMATCH;
11713         }
11714
11715         /* Check data backref inside that extent item */
11716         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11717         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11718         ptr = (unsigned long)iref;
11719         end = (unsigned long)ei + item_size;
11720         strict = should_check_extent_strictly(root, nrefs, -1);
11721
11722         while (ptr < end) {
11723                 iref = (struct btrfs_extent_inline_ref *)ptr;
11724                 type = btrfs_extent_inline_ref_type(leaf, iref);
11725                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11726
11727                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11728                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
11729                         if (ref_root == root->objectid)
11730                                 found_dbackref = 1;
11731                         else if (!strict && owner == ref_root)
11732                                 found_dbackref = 1;
11733                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11734                         found_dbackref = !check_tree_block_ref(root, NULL,
11735                                 btrfs_extent_inline_ref_offset(leaf, iref),
11736                                 0, owner, NULL);
11737                 }
11738
11739                 if (found_dbackref)
11740                         break;
11741                 ptr += btrfs_extent_inline_ref_size(type);
11742         }
11743
11744         if (!found_dbackref) {
11745                 btrfs_release_path(&path);
11746
11747                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11748                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11749                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11750                 dbref_key.offset = hash_extent_data_ref(root->objectid,
11751                                 fi_key.objectid, fi_key.offset);
11752
11753                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11754                                         &dbref_key, &path, 0, 0);
11755                 if (!ret) {
11756                         found_dbackref = 1;
11757                         goto out;
11758                 }
11759
11760                 btrfs_release_path(&path);
11761
11762                 /*
11763                  * Neither inlined nor EXTENT_DATA_REF found, try
11764                  * SHARED_DATA_REF as last chance.
11765                  */
11766                 dbref_key.objectid = disk_bytenr;
11767                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11768                 dbref_key.offset = eb->start;
11769
11770                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11771                                         &dbref_key, &path, 0, 0);
11772                 if (!ret) {
11773                         found_dbackref = 1;
11774                         goto out;
11775                 }
11776         }
11777
11778 out:
11779         if (!found_dbackref)
11780                 err |= BACKREF_MISSING;
11781         btrfs_release_path(&path);
11782         if (err & BACKREF_MISSING) {
11783                 error("data extent[%llu %llu] backref lost",
11784                       disk_bytenr, disk_num_bytes);
11785         }
11786         return err;
11787 }
11788
11789 /*
11790  * Get real tree block level for the case like shared block
11791  * Return >= 0 as tree level
11792  * Return <0 for error
11793  */
11794 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11795 {
11796         struct extent_buffer *eb;
11797         struct btrfs_path path;
11798         struct btrfs_key key;
11799         struct btrfs_extent_item *ei;
11800         u64 flags;
11801         u64 transid;
11802         u8 backref_level;
11803         u8 header_level;
11804         int ret;
11805
11806         /* Search extent tree for extent generation and level */
11807         key.objectid = bytenr;
11808         key.type = BTRFS_METADATA_ITEM_KEY;
11809         key.offset = (u64)-1;
11810
11811         btrfs_init_path(&path);
11812         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11813         if (ret < 0)
11814                 goto release_out;
11815         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11816         if (ret < 0)
11817                 goto release_out;
11818         if (ret > 0) {
11819                 ret = -ENOENT;
11820                 goto release_out;
11821         }
11822
11823         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11824         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11825                             struct btrfs_extent_item);
11826         flags = btrfs_extent_flags(path.nodes[0], ei);
11827         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11828                 ret = -ENOENT;
11829                 goto release_out;
11830         }
11831
11832         /* Get transid for later read_tree_block() check */
11833         transid = btrfs_extent_generation(path.nodes[0], ei);
11834
11835         /* Get backref level as one source */
11836         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11837                 backref_level = key.offset;
11838         } else {
11839                 struct btrfs_tree_block_info *info;
11840
11841                 info = (struct btrfs_tree_block_info *)(ei + 1);
11842                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11843         }
11844         btrfs_release_path(&path);
11845
11846         /* Get level from tree block as an alternative source */
11847         eb = read_tree_block(fs_info, bytenr, transid);
11848         if (!extent_buffer_uptodate(eb)) {
11849                 free_extent_buffer(eb);
11850                 return -EIO;
11851         }
11852         header_level = btrfs_header_level(eb);
11853         free_extent_buffer(eb);
11854
11855         if (header_level != backref_level)
11856                 return -EIO;
11857         return header_level;
11858
11859 release_out:
11860         btrfs_release_path(&path);
11861         return ret;
11862 }
11863
11864 /*
11865  * Check if a tree block backref is valid (points to a valid tree block)
11866  * if level == -1, level will be resolved
11867  * Return >0 for any error found and print error message
11868  */
11869 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11870                                     u64 bytenr, int level)
11871 {
11872         struct btrfs_root *root;
11873         struct btrfs_key key;
11874         struct btrfs_path path;
11875         struct extent_buffer *eb;
11876         struct extent_buffer *node;
11877         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11878         int err = 0;
11879         int ret;
11880
11881         /* Query level for level == -1 special case */
11882         if (level == -1)
11883                 level = query_tree_block_level(fs_info, bytenr);
11884         if (level < 0) {
11885                 err |= REFERENCER_MISSING;
11886                 goto out;
11887         }
11888
11889         key.objectid = root_id;
11890         key.type = BTRFS_ROOT_ITEM_KEY;
11891         key.offset = (u64)-1;
11892
11893         root = btrfs_read_fs_root(fs_info, &key);
11894         if (IS_ERR(root)) {
11895                 err |= REFERENCER_MISSING;
11896                 goto out;
11897         }
11898
11899         /* Read out the tree block to get item/node key */
11900         eb = read_tree_block(fs_info, bytenr, 0);
11901         if (!extent_buffer_uptodate(eb)) {
11902                 err |= REFERENCER_MISSING;
11903                 free_extent_buffer(eb);
11904                 goto out;
11905         }
11906
11907         /* Empty tree, no need to check key */
11908         if (!btrfs_header_nritems(eb) && !level) {
11909                 free_extent_buffer(eb);
11910                 goto out;
11911         }
11912
11913         if (level)
11914                 btrfs_node_key_to_cpu(eb, &key, 0);
11915         else
11916                 btrfs_item_key_to_cpu(eb, &key, 0);
11917
11918         free_extent_buffer(eb);
11919
11920         btrfs_init_path(&path);
11921         path.lowest_level = level;
11922         /* Search with the first key, to ensure we can reach it */
11923         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11924         if (ret < 0) {
11925                 err |= REFERENCER_MISSING;
11926                 goto release_out;
11927         }
11928
11929         node = path.nodes[level];
11930         if (btrfs_header_bytenr(node) != bytenr) {
11931                 error(
11932         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11933                         bytenr, nodesize, bytenr,
11934                         btrfs_header_bytenr(node));
11935                 err |= REFERENCER_MISMATCH;
11936         }
11937         if (btrfs_header_level(node) != level) {
11938                 error(
11939         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11940                         bytenr, nodesize, level,
11941                         btrfs_header_level(node));
11942                 err |= REFERENCER_MISMATCH;
11943         }
11944
11945 release_out:
11946         btrfs_release_path(&path);
11947 out:
11948         if (err & REFERENCER_MISSING) {
11949                 if (level < 0)
11950                         error("extent [%llu %d] lost referencer (owner: %llu)",
11951                                 bytenr, nodesize, root_id);
11952                 else
11953                         error(
11954                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11955                                 bytenr, nodesize, root_id, level);
11956         }
11957
11958         return err;
11959 }
11960
11961 /*
11962  * Check if tree block @eb is tree reloc root.
11963  * Return 0 if it's not or any problem happens
11964  * Return 1 if it's a tree reloc root
11965  */
11966 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11967                                  struct extent_buffer *eb)
11968 {
11969         struct btrfs_root *tree_reloc_root;
11970         struct btrfs_key key;
11971         u64 bytenr = btrfs_header_bytenr(eb);
11972         u64 owner = btrfs_header_owner(eb);
11973         int ret = 0;
11974
11975         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11976         key.offset = owner;
11977         key.type = BTRFS_ROOT_ITEM_KEY;
11978
11979         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11980         if (IS_ERR(tree_reloc_root))
11981                 return 0;
11982
11983         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11984                 ret = 1;
11985         btrfs_free_fs_root(tree_reloc_root);
11986         return ret;
11987 }
11988
11989 /*
11990  * Check referencer for shared block backref
11991  * If level == -1, this function will resolve the level.
11992  */
11993 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11994                                      u64 parent, u64 bytenr, int level)
11995 {
11996         struct extent_buffer *eb;
11997         u32 nr;
11998         int found_parent = 0;
11999         int i;
12000
12001         eb = read_tree_block(fs_info, parent, 0);
12002         if (!extent_buffer_uptodate(eb))
12003                 goto out;
12004
12005         if (level == -1)
12006                 level = query_tree_block_level(fs_info, bytenr);
12007         if (level < 0)
12008                 goto out;
12009
12010         /* It's possible it's a tree reloc root */
12011         if (parent == bytenr) {
12012                 if (is_tree_reloc_root(fs_info, eb))
12013                         found_parent = 1;
12014                 goto out;
12015         }
12016
12017         if (level + 1 != btrfs_header_level(eb))
12018                 goto out;
12019
12020         nr = btrfs_header_nritems(eb);
12021         for (i = 0; i < nr; i++) {
12022                 if (bytenr == btrfs_node_blockptr(eb, i)) {
12023                         found_parent = 1;
12024                         break;
12025                 }
12026         }
12027 out:
12028         free_extent_buffer(eb);
12029         if (!found_parent) {
12030                 error(
12031         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12032                         bytenr, fs_info->nodesize, parent, level);
12033                 return REFERENCER_MISSING;
12034         }
12035         return 0;
12036 }
12037
12038 /*
12039  * Check referencer for normal (inlined) data ref
12040  * If len == 0, it will be resolved by searching in extent tree
12041  */
12042 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12043                                      u64 root_id, u64 objectid, u64 offset,
12044                                      u64 bytenr, u64 len, u32 count)
12045 {
12046         struct btrfs_root *root;
12047         struct btrfs_root *extent_root = fs_info->extent_root;
12048         struct btrfs_key key;
12049         struct btrfs_path path;
12050         struct extent_buffer *leaf;
12051         struct btrfs_file_extent_item *fi;
12052         u32 found_count = 0;
12053         int slot;
12054         int ret = 0;
12055
12056         if (!len) {
12057                 key.objectid = bytenr;
12058                 key.type = BTRFS_EXTENT_ITEM_KEY;
12059                 key.offset = (u64)-1;
12060
12061                 btrfs_init_path(&path);
12062                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12063                 if (ret < 0)
12064                         goto out;
12065                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12066                 if (ret)
12067                         goto out;
12068                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12069                 if (key.objectid != bytenr ||
12070                     key.type != BTRFS_EXTENT_ITEM_KEY)
12071                         goto out;
12072                 len = key.offset;
12073                 btrfs_release_path(&path);
12074         }
12075         key.objectid = root_id;
12076         key.type = BTRFS_ROOT_ITEM_KEY;
12077         key.offset = (u64)-1;
12078         btrfs_init_path(&path);
12079
12080         root = btrfs_read_fs_root(fs_info, &key);
12081         if (IS_ERR(root))
12082                 goto out;
12083
12084         key.objectid = objectid;
12085         key.type = BTRFS_EXTENT_DATA_KEY;
12086         /*
12087          * It can be nasty as data backref offset is
12088          * file offset - file extent offset, which is smaller or
12089          * equal to original backref offset.  The only special case is
12090          * overflow.  So we need to special check and do further search.
12091          */
12092         key.offset = offset & (1ULL << 63) ? 0 : offset;
12093
12094         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12095         if (ret < 0)
12096                 goto out;
12097
12098         /*
12099          * Search afterwards to get correct one
12100          * NOTE: As we must do a comprehensive check on the data backref to
12101          * make sure the dref count also matches, we must iterate all file
12102          * extents for that inode.
12103          */
12104         while (1) {
12105                 leaf = path.nodes[0];
12106                 slot = path.slots[0];
12107
12108                 if (slot >= btrfs_header_nritems(leaf))
12109                         goto next;
12110                 btrfs_item_key_to_cpu(leaf, &key, slot);
12111                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12112                         break;
12113                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12114                 /*
12115                  * Except normal disk bytenr and disk num bytes, we still
12116                  * need to do extra check on dbackref offset as
12117                  * dbackref offset = file_offset - file_extent_offset
12118                  */
12119                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12120                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12121                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12122                     offset)
12123                         found_count++;
12124
12125 next:
12126                 ret = btrfs_next_item(root, &path);
12127                 if (ret)
12128                         break;
12129         }
12130 out:
12131         btrfs_release_path(&path);
12132         if (found_count != count) {
12133                 error(
12134 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12135                         bytenr, len, root_id, objectid, offset, count, found_count);
12136                 return REFERENCER_MISSING;
12137         }
12138         return 0;
12139 }
12140
12141 /*
12142  * Check if the referencer of a shared data backref exists
12143  */
12144 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12145                                      u64 parent, u64 bytenr)
12146 {
12147         struct extent_buffer *eb;
12148         struct btrfs_key key;
12149         struct btrfs_file_extent_item *fi;
12150         u32 nr;
12151         int found_parent = 0;
12152         int i;
12153
12154         eb = read_tree_block(fs_info, parent, 0);
12155         if (!extent_buffer_uptodate(eb))
12156                 goto out;
12157
12158         nr = btrfs_header_nritems(eb);
12159         for (i = 0; i < nr; i++) {
12160                 btrfs_item_key_to_cpu(eb, &key, i);
12161                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12162                         continue;
12163
12164                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12165                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12166                         continue;
12167
12168                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12169                         found_parent = 1;
12170                         break;
12171                 }
12172         }
12173
12174 out:
12175         free_extent_buffer(eb);
12176         if (!found_parent) {
12177                 error("shared extent %llu referencer lost (parent: %llu)",
12178                         bytenr, parent);
12179                 return REFERENCER_MISSING;
12180         }
12181         return 0;
12182 }
12183
12184 /*
12185  * This function will check a given extent item, including its backref and
12186  * itself (like crossing stripe boundary and type)
12187  *
12188  * Since we don't use extent_record anymore, introduce new error bit
12189  */
12190 static int check_extent_item(struct btrfs_fs_info *fs_info,
12191                              struct extent_buffer *eb, int slot)
12192 {
12193         struct btrfs_extent_item *ei;
12194         struct btrfs_extent_inline_ref *iref;
12195         struct btrfs_extent_data_ref *dref;
12196         unsigned long end;
12197         unsigned long ptr;
12198         int type;
12199         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12200         u32 item_size = btrfs_item_size_nr(eb, slot);
12201         u64 flags;
12202         u64 offset;
12203         int metadata = 0;
12204         int level;
12205         struct btrfs_key key;
12206         int ret;
12207         int err = 0;
12208
12209         btrfs_item_key_to_cpu(eb, &key, slot);
12210         if (key.type == BTRFS_EXTENT_ITEM_KEY)
12211                 bytes_used += key.offset;
12212         else
12213                 bytes_used += nodesize;
12214
12215         if (item_size < sizeof(*ei)) {
12216                 /*
12217                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12218                  * old thing when on disk format is still un-determined.
12219                  * No need to care about it anymore
12220                  */
12221                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12222                 return -ENOTTY;
12223         }
12224
12225         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12226         flags = btrfs_extent_flags(eb, ei);
12227
12228         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12229                 metadata = 1;
12230         if (metadata && check_crossing_stripes(global_info, key.objectid,
12231                                                eb->len)) {
12232                 error("bad metadata [%llu, %llu) crossing stripe boundary",
12233                       key.objectid, key.objectid + nodesize);
12234                 err |= CROSSING_STRIPE_BOUNDARY;
12235         }
12236
12237         ptr = (unsigned long)(ei + 1);
12238
12239         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12240                 /* Old EXTENT_ITEM metadata */
12241                 struct btrfs_tree_block_info *info;
12242
12243                 info = (struct btrfs_tree_block_info *)ptr;
12244                 level = btrfs_tree_block_level(eb, info);
12245                 ptr += sizeof(struct btrfs_tree_block_info);
12246         } else {
12247                 /* New METADATA_ITEM */
12248                 level = key.offset;
12249         }
12250         end = (unsigned long)ei + item_size;
12251
12252 next:
12253         /* Reached extent item end normally */
12254         if (ptr == end)
12255                 goto out;
12256
12257         /* Beyond extent item end, wrong item size */
12258         if (ptr > end) {
12259                 err |= ITEM_SIZE_MISMATCH;
12260                 error("extent item at bytenr %llu slot %d has wrong size",
12261                         eb->start, slot);
12262                 goto out;
12263         }
12264
12265         /* Now check every backref in this extent item */
12266         iref = (struct btrfs_extent_inline_ref *)ptr;
12267         type = btrfs_extent_inline_ref_type(eb, iref);
12268         offset = btrfs_extent_inline_ref_offset(eb, iref);
12269         switch (type) {
12270         case BTRFS_TREE_BLOCK_REF_KEY:
12271                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12272                                                level);
12273                 err |= ret;
12274                 break;
12275         case BTRFS_SHARED_BLOCK_REF_KEY:
12276                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12277                                                  level);
12278                 err |= ret;
12279                 break;
12280         case BTRFS_EXTENT_DATA_REF_KEY:
12281                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12282                 ret = check_extent_data_backref(fs_info,
12283                                 btrfs_extent_data_ref_root(eb, dref),
12284                                 btrfs_extent_data_ref_objectid(eb, dref),
12285                                 btrfs_extent_data_ref_offset(eb, dref),
12286                                 key.objectid, key.offset,
12287                                 btrfs_extent_data_ref_count(eb, dref));
12288                 err |= ret;
12289                 break;
12290         case BTRFS_SHARED_DATA_REF_KEY:
12291                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12292                 err |= ret;
12293                 break;
12294         default:
12295                 error("extent[%llu %d %llu] has unknown ref type: %d",
12296                         key.objectid, key.type, key.offset, type);
12297                 err |= UNKNOWN_TYPE;
12298                 goto out;
12299         }
12300
12301         ptr += btrfs_extent_inline_ref_size(type);
12302         goto next;
12303
12304 out:
12305         return err;
12306 }
12307
12308 /*
12309  * Check if a dev extent item is referred correctly by its chunk
12310  */
12311 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12312                                  struct extent_buffer *eb, int slot)
12313 {
12314         struct btrfs_root *chunk_root = fs_info->chunk_root;
12315         struct btrfs_dev_extent *ptr;
12316         struct btrfs_path path;
12317         struct btrfs_key chunk_key;
12318         struct btrfs_key devext_key;
12319         struct btrfs_chunk *chunk;
12320         struct extent_buffer *l;
12321         int num_stripes;
12322         u64 length;
12323         int i;
12324         int found_chunk = 0;
12325         int ret;
12326
12327         btrfs_item_key_to_cpu(eb, &devext_key, slot);
12328         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12329         length = btrfs_dev_extent_length(eb, ptr);
12330
12331         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12332         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12333         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12334
12335         btrfs_init_path(&path);
12336         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12337         if (ret)
12338                 goto out;
12339
12340         l = path.nodes[0];
12341         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12342         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12343                                       chunk_key.offset);
12344         if (ret < 0)
12345                 goto out;
12346
12347         if (btrfs_stripe_length(fs_info, l, chunk) != length)
12348                 goto out;
12349
12350         num_stripes = btrfs_chunk_num_stripes(l, chunk);
12351         for (i = 0; i < num_stripes; i++) {
12352                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12353                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12354
12355                 if (devid == devext_key.objectid &&
12356                     offset == devext_key.offset) {
12357                         found_chunk = 1;
12358                         break;
12359                 }
12360         }
12361 out:
12362         btrfs_release_path(&path);
12363         if (!found_chunk) {
12364                 error(
12365                 "device extent[%llu, %llu, %llu] did not find the related chunk",
12366                         devext_key.objectid, devext_key.offset, length);
12367                 return REFERENCER_MISSING;
12368         }
12369         return 0;
12370 }
12371
12372 /*
12373  * Check if the used space is correct with the dev item
12374  */
12375 static int check_dev_item(struct btrfs_fs_info *fs_info,
12376                           struct extent_buffer *eb, int slot)
12377 {
12378         struct btrfs_root *dev_root = fs_info->dev_root;
12379         struct btrfs_dev_item *dev_item;
12380         struct btrfs_path path;
12381         struct btrfs_key key;
12382         struct btrfs_dev_extent *ptr;
12383         u64 dev_id;
12384         u64 used;
12385         u64 total = 0;
12386         int ret;
12387
12388         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12389         dev_id = btrfs_device_id(eb, dev_item);
12390         used = btrfs_device_bytes_used(eb, dev_item);
12391
12392         key.objectid = dev_id;
12393         key.type = BTRFS_DEV_EXTENT_KEY;
12394         key.offset = 0;
12395
12396         btrfs_init_path(&path);
12397         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12398         if (ret < 0) {
12399                 btrfs_item_key_to_cpu(eb, &key, slot);
12400                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12401                         key.objectid, key.type, key.offset);
12402                 btrfs_release_path(&path);
12403                 return REFERENCER_MISSING;
12404         }
12405
12406         /* Iterate dev_extents to calculate the used space of a device */
12407         while (1) {
12408                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12409                         goto next;
12410
12411                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12412                 if (key.objectid > dev_id)
12413                         break;
12414                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12415                         goto next;
12416
12417                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12418                                      struct btrfs_dev_extent);
12419                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12420 next:
12421                 ret = btrfs_next_item(dev_root, &path);
12422                 if (ret)
12423                         break;
12424         }
12425         btrfs_release_path(&path);
12426
12427         if (used != total) {
12428                 btrfs_item_key_to_cpu(eb, &key, slot);
12429                 error(
12430 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12431                         total, used, BTRFS_ROOT_TREE_OBJECTID,
12432                         BTRFS_DEV_EXTENT_KEY, dev_id);
12433                 return ACCOUNTING_MISMATCH;
12434         }
12435         return 0;
12436 }
12437
12438 /*
12439  * Check a block group item with its referener (chunk) and its used space
12440  * with extent/metadata item
12441  */
12442 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12443                                   struct extent_buffer *eb, int slot)
12444 {
12445         struct btrfs_root *extent_root = fs_info->extent_root;
12446         struct btrfs_root *chunk_root = fs_info->chunk_root;
12447         struct btrfs_block_group_item *bi;
12448         struct btrfs_block_group_item bg_item;
12449         struct btrfs_path path;
12450         struct btrfs_key bg_key;
12451         struct btrfs_key chunk_key;
12452         struct btrfs_key extent_key;
12453         struct btrfs_chunk *chunk;
12454         struct extent_buffer *leaf;
12455         struct btrfs_extent_item *ei;
12456         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12457         u64 flags;
12458         u64 bg_flags;
12459         u64 used;
12460         u64 total = 0;
12461         int ret;
12462         int err = 0;
12463
12464         btrfs_item_key_to_cpu(eb, &bg_key, slot);
12465         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12466         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12467         used = btrfs_block_group_used(&bg_item);
12468         bg_flags = btrfs_block_group_flags(&bg_item);
12469
12470         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12471         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12472         chunk_key.offset = bg_key.objectid;
12473
12474         btrfs_init_path(&path);
12475         /* Search for the referencer chunk */
12476         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12477         if (ret) {
12478                 error(
12479                 "block group[%llu %llu] did not find the related chunk item",
12480                         bg_key.objectid, bg_key.offset);
12481                 err |= REFERENCER_MISSING;
12482         } else {
12483                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12484                                         struct btrfs_chunk);
12485                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12486                                                 bg_key.offset) {
12487                         error(
12488         "block group[%llu %llu] related chunk item length does not match",
12489                                 bg_key.objectid, bg_key.offset);
12490                         err |= REFERENCER_MISMATCH;
12491                 }
12492         }
12493         btrfs_release_path(&path);
12494
12495         /* Search from the block group bytenr */
12496         extent_key.objectid = bg_key.objectid;
12497         extent_key.type = 0;
12498         extent_key.offset = 0;
12499
12500         btrfs_init_path(&path);
12501         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12502         if (ret < 0)
12503                 goto out;
12504
12505         /* Iterate extent tree to account used space */
12506         while (1) {
12507                 leaf = path.nodes[0];
12508
12509                 /* Search slot can point to the last item beyond leaf nritems */
12510                 if (path.slots[0] >= btrfs_header_nritems(leaf))
12511                         goto next;
12512
12513                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12514                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12515                         break;
12516
12517                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12518                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12519                         goto next;
12520                 if (extent_key.objectid < bg_key.objectid)
12521                         goto next;
12522
12523                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12524                         total += nodesize;
12525                 else
12526                         total += extent_key.offset;
12527
12528                 ei = btrfs_item_ptr(leaf, path.slots[0],
12529                                     struct btrfs_extent_item);
12530                 flags = btrfs_extent_flags(leaf, ei);
12531                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12532                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12533                                 error(
12534                         "bad extent[%llu, %llu) type mismatch with chunk",
12535                                         extent_key.objectid,
12536                                         extent_key.objectid + extent_key.offset);
12537                                 err |= CHUNK_TYPE_MISMATCH;
12538                         }
12539                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12540                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12541                                     BTRFS_BLOCK_GROUP_METADATA))) {
12542                                 error(
12543                         "bad extent[%llu, %llu) type mismatch with chunk",
12544                                         extent_key.objectid,
12545                                         extent_key.objectid + nodesize);
12546                                 err |= CHUNK_TYPE_MISMATCH;
12547                         }
12548                 }
12549 next:
12550                 ret = btrfs_next_item(extent_root, &path);
12551                 if (ret)
12552                         break;
12553         }
12554
12555 out:
12556         btrfs_release_path(&path);
12557
12558         if (total != used) {
12559                 error(
12560                 "block group[%llu %llu] used %llu but extent items used %llu",
12561                         bg_key.objectid, bg_key.offset, used, total);
12562                 err |= ACCOUNTING_MISMATCH;
12563         }
12564         return err;
12565 }
12566
12567 /*
12568  * Check a chunk item.
12569  * Including checking all referred dev_extents and block group
12570  */
12571 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12572                             struct extent_buffer *eb, int slot)
12573 {
12574         struct btrfs_root *extent_root = fs_info->extent_root;
12575         struct btrfs_root *dev_root = fs_info->dev_root;
12576         struct btrfs_path path;
12577         struct btrfs_key chunk_key;
12578         struct btrfs_key bg_key;
12579         struct btrfs_key devext_key;
12580         struct btrfs_chunk *chunk;
12581         struct extent_buffer *leaf;
12582         struct btrfs_block_group_item *bi;
12583         struct btrfs_block_group_item bg_item;
12584         struct btrfs_dev_extent *ptr;
12585         u64 length;
12586         u64 chunk_end;
12587         u64 stripe_len;
12588         u64 type;
12589         int num_stripes;
12590         u64 offset;
12591         u64 objectid;
12592         int i;
12593         int ret;
12594         int err = 0;
12595
12596         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12597         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12598         length = btrfs_chunk_length(eb, chunk);
12599         chunk_end = chunk_key.offset + length;
12600         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12601                                       chunk_key.offset);
12602         if (ret < 0) {
12603                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12604                         chunk_end);
12605                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12606                 goto out;
12607         }
12608         type = btrfs_chunk_type(eb, chunk);
12609
12610         bg_key.objectid = chunk_key.offset;
12611         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12612         bg_key.offset = length;
12613
12614         btrfs_init_path(&path);
12615         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12616         if (ret) {
12617                 error(
12618                 "chunk[%llu %llu) did not find the related block group item",
12619                         chunk_key.offset, chunk_end);
12620                 err |= REFERENCER_MISSING;
12621         } else{
12622                 leaf = path.nodes[0];
12623                 bi = btrfs_item_ptr(leaf, path.slots[0],
12624                                     struct btrfs_block_group_item);
12625                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12626                                    sizeof(bg_item));
12627                 if (btrfs_block_group_flags(&bg_item) != type) {
12628                         error(
12629 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12630                                 chunk_key.offset, chunk_end, type,
12631                                 btrfs_block_group_flags(&bg_item));
12632                         err |= REFERENCER_MISSING;
12633                 }
12634         }
12635
12636         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12637         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12638         for (i = 0; i < num_stripes; i++) {
12639                 btrfs_release_path(&path);
12640                 btrfs_init_path(&path);
12641                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12642                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12643                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12644
12645                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12646                                         0, 0);
12647                 if (ret)
12648                         goto not_match_dev;
12649
12650                 leaf = path.nodes[0];
12651                 ptr = btrfs_item_ptr(leaf, path.slots[0],
12652                                      struct btrfs_dev_extent);
12653                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12654                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12655                 if (objectid != chunk_key.objectid ||
12656                     offset != chunk_key.offset ||
12657                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12658                         goto not_match_dev;
12659                 continue;
12660 not_match_dev:
12661                 err |= BACKREF_MISSING;
12662                 error(
12663                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12664                         chunk_key.objectid, chunk_end, i);
12665                 continue;
12666         }
12667         btrfs_release_path(&path);
12668 out:
12669         return err;
12670 }
12671
12672 /*
12673  * Main entry function to check known items and update related accounting info
12674  */
12675 static int check_leaf_items(struct btrfs_trans_handle *trans,
12676                             struct btrfs_root *root, struct btrfs_path *path,
12677                             struct node_refs *nrefs, int account_bytes)
12678 {
12679         struct btrfs_fs_info *fs_info = root->fs_info;
12680         struct btrfs_key key;
12681         struct extent_buffer *eb;
12682         int slot;
12683         int type;
12684         struct btrfs_extent_data_ref *dref;
12685         int ret = 0;
12686         int err = 0;
12687
12688 again:
12689         eb = path->nodes[0];
12690         slot = path->slots[0];
12691         if (slot >= btrfs_header_nritems(eb)) {
12692                 if (slot == 0) {
12693                         error("empty leaf [%llu %u] root %llu", eb->start,
12694                                 root->fs_info->nodesize, root->objectid);
12695                         err |= EIO;
12696                 }
12697                 goto out;
12698         }
12699
12700         btrfs_item_key_to_cpu(eb, &key, slot);
12701         type = key.type;
12702
12703         switch (type) {
12704         case BTRFS_EXTENT_DATA_KEY:
12705                 ret = check_extent_data_item(root, path, nrefs, account_bytes);
12706                 err |= ret;
12707                 break;
12708         case BTRFS_BLOCK_GROUP_ITEM_KEY:
12709                 ret = check_block_group_item(fs_info, eb, slot);
12710                 err |= ret;
12711                 break;
12712         case BTRFS_DEV_ITEM_KEY:
12713                 ret = check_dev_item(fs_info, eb, slot);
12714                 err |= ret;
12715                 break;
12716         case BTRFS_CHUNK_ITEM_KEY:
12717                 ret = check_chunk_item(fs_info, eb, slot);
12718                 err |= ret;
12719                 break;
12720         case BTRFS_DEV_EXTENT_KEY:
12721                 ret = check_dev_extent_item(fs_info, eb, slot);
12722                 err |= ret;
12723                 break;
12724         case BTRFS_EXTENT_ITEM_KEY:
12725         case BTRFS_METADATA_ITEM_KEY:
12726                 ret = check_extent_item(fs_info, eb, slot);
12727                 err |= ret;
12728                 break;
12729         case BTRFS_EXTENT_CSUM_KEY:
12730                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12731                 err |= ret;
12732                 break;
12733         case BTRFS_TREE_BLOCK_REF_KEY:
12734                 ret = check_tree_block_backref(fs_info, key.offset,
12735                                                key.objectid, -1);
12736                 err |= ret;
12737                 break;
12738         case BTRFS_EXTENT_DATA_REF_KEY:
12739                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12740                 ret = check_extent_data_backref(fs_info,
12741                                 btrfs_extent_data_ref_root(eb, dref),
12742                                 btrfs_extent_data_ref_objectid(eb, dref),
12743                                 btrfs_extent_data_ref_offset(eb, dref),
12744                                 key.objectid, 0,
12745                                 btrfs_extent_data_ref_count(eb, dref));
12746                 err |= ret;
12747                 break;
12748         case BTRFS_SHARED_BLOCK_REF_KEY:
12749                 ret = check_shared_block_backref(fs_info, key.offset,
12750                                                  key.objectid, -1);
12751                 err |= ret;
12752                 break;
12753         case BTRFS_SHARED_DATA_REF_KEY:
12754                 ret = check_shared_data_backref(fs_info, key.offset,
12755                                                 key.objectid);
12756                 err |= ret;
12757                 break;
12758         default:
12759                 break;
12760         }
12761
12762         ++path->slots[0];
12763         goto again;
12764 out:
12765         return err;
12766 }
12767
12768 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info);
12769
12770 /*
12771  * Low memory usage version check_chunks_and_extents.
12772  */
12773 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12774 {
12775         struct btrfs_trans_handle *trans = NULL;
12776         struct btrfs_path path;
12777         struct btrfs_key old_key;
12778         struct btrfs_key key;
12779         struct btrfs_root *root1;
12780         struct btrfs_root *root;
12781         struct btrfs_root *cur_root;
12782         int err = 0;
12783         int ret;
12784
12785         root = fs_info->fs_root;
12786
12787         if (repair) {
12788                 /* pin every tree block to avoid extent overwrite */
12789                 ret = pin_metadata_blocks(fs_info);
12790                 if (ret) {
12791                         error("failed to pin metadata blocks");
12792                         return ret;
12793                 }
12794                 trans = btrfs_start_transaction(fs_info->extent_root, 1);
12795                 if (IS_ERR(trans)) {
12796                         error("failed to start transaction before check");
12797                         return PTR_ERR(trans);
12798                 }
12799         }
12800
12801         root1 = root->fs_info->chunk_root;
12802         ret = check_btrfs_root(trans, root1, 0, 1);
12803         err |= ret;
12804
12805         root1 = root->fs_info->tree_root;
12806         ret = check_btrfs_root(trans, root1, 0, 1);
12807         err |= ret;
12808
12809         btrfs_init_path(&path);
12810         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12811         key.offset = 0;
12812         key.type = BTRFS_ROOT_ITEM_KEY;
12813
12814         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12815         if (ret) {
12816                 error("cannot find extent tree in tree_root");
12817                 goto out;
12818         }
12819
12820         while (1) {
12821                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12822                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12823                         goto next;
12824                 old_key = key;
12825                 key.offset = (u64)-1;
12826
12827                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12828                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12829                                         &key);
12830                 else
12831                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
12832                 if (IS_ERR(cur_root) || !cur_root) {
12833                         error("failed to read tree: %lld", key.objectid);
12834                         goto next;
12835                 }
12836
12837                 ret = check_btrfs_root(trans, cur_root, 0, 1);
12838                 err |= ret;
12839
12840                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12841                         btrfs_free_fs_root(cur_root);
12842
12843                 btrfs_release_path(&path);
12844                 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
12845                                         &old_key, &path, 0, 0);
12846                 if (ret)
12847                         goto out;
12848 next:
12849                 ret = btrfs_next_item(root1, &path);
12850                 if (ret)
12851                         goto out;
12852         }
12853 out:
12854
12855         /* if repair, update block accounting */
12856         if (repair) {
12857                 ret = btrfs_fix_block_accounting(trans, root);
12858                 if (ret)
12859                         err |= ret;
12860         }
12861
12862         if (trans)
12863                 btrfs_commit_transaction(trans, root->fs_info->extent_root);
12864
12865         btrfs_release_path(&path);
12866
12867         return err;
12868 }
12869
12870 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12871 {
12872         int ret;
12873
12874         if (!ctx.progress_enabled)
12875                 fprintf(stderr, "checking extents\n");
12876         if (check_mode == CHECK_MODE_LOWMEM)
12877                 ret = check_chunks_and_extents_v2(fs_info);
12878         else
12879                 ret = check_chunks_and_extents(fs_info);
12880
12881         return ret;
12882 }
12883
12884 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12885                            struct btrfs_root *root, int overwrite)
12886 {
12887         struct extent_buffer *c;
12888         struct extent_buffer *old = root->node;
12889         int level;
12890         int ret;
12891         struct btrfs_disk_key disk_key = {0,0,0};
12892
12893         level = 0;
12894
12895         if (overwrite) {
12896                 c = old;
12897                 extent_buffer_get(c);
12898                 goto init;
12899         }
12900         c = btrfs_alloc_free_block(trans, root,
12901                                    root->fs_info->nodesize,
12902                                    root->root_key.objectid,
12903                                    &disk_key, level, 0, 0);
12904         if (IS_ERR(c)) {
12905                 c = old;
12906                 extent_buffer_get(c);
12907                 overwrite = 1;
12908         }
12909 init:
12910         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12911         btrfs_set_header_level(c, level);
12912         btrfs_set_header_bytenr(c, c->start);
12913         btrfs_set_header_generation(c, trans->transid);
12914         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12915         btrfs_set_header_owner(c, root->root_key.objectid);
12916
12917         write_extent_buffer(c, root->fs_info->fsid,
12918                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
12919
12920         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12921                             btrfs_header_chunk_tree_uuid(c),
12922                             BTRFS_UUID_SIZE);
12923
12924         btrfs_mark_buffer_dirty(c);
12925         /*
12926          * this case can happen in the following case:
12927          *
12928          * 1.overwrite previous root.
12929          *
12930          * 2.reinit reloc data root, this is because we skip pin
12931          * down reloc data tree before which means we can allocate
12932          * same block bytenr here.
12933          */
12934         if (old->start == c->start) {
12935                 btrfs_set_root_generation(&root->root_item,
12936                                           trans->transid);
12937                 root->root_item.level = btrfs_header_level(root->node);
12938                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12939                                         &root->root_key, &root->root_item);
12940                 if (ret) {
12941                         free_extent_buffer(c);
12942                         return ret;
12943                 }
12944         }
12945         free_extent_buffer(old);
12946         root->node = c;
12947         add_root_to_dirty_list(root);
12948         return 0;
12949 }
12950
12951 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12952                                 struct extent_buffer *eb, int tree_root)
12953 {
12954         struct extent_buffer *tmp;
12955         struct btrfs_root_item *ri;
12956         struct btrfs_key key;
12957         u64 bytenr;
12958         int level = btrfs_header_level(eb);
12959         int nritems;
12960         int ret;
12961         int i;
12962
12963         /*
12964          * If we have pinned this block before, don't pin it again.
12965          * This can not only avoid forever loop with broken filesystem
12966          * but also give us some speedups.
12967          */
12968         if (test_range_bit(&fs_info->pinned_extents, eb->start,
12969                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12970                 return 0;
12971
12972         btrfs_pin_extent(fs_info, eb->start, eb->len);
12973
12974         nritems = btrfs_header_nritems(eb);
12975         for (i = 0; i < nritems; i++) {
12976                 if (level == 0) {
12977                         btrfs_item_key_to_cpu(eb, &key, i);
12978                         if (key.type != BTRFS_ROOT_ITEM_KEY)
12979                                 continue;
12980                         /* Skip the extent root and reloc roots */
12981                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12982                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12983                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12984                                 continue;
12985                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12986                         bytenr = btrfs_disk_root_bytenr(eb, ri);
12987
12988                         /*
12989                          * If at any point we start needing the real root we
12990                          * will have to build a stump root for the root we are
12991                          * in, but for now this doesn't actually use the root so
12992                          * just pass in extent_root.
12993                          */
12994                         tmp = read_tree_block(fs_info, bytenr, 0);
12995                         if (!extent_buffer_uptodate(tmp)) {
12996                                 fprintf(stderr, "Error reading root block\n");
12997                                 return -EIO;
12998                         }
12999                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
13000                         free_extent_buffer(tmp);
13001                         if (ret)
13002                                 return ret;
13003                 } else {
13004                         bytenr = btrfs_node_blockptr(eb, i);
13005
13006                         /* If we aren't the tree root don't read the block */
13007                         if (level == 1 && !tree_root) {
13008                                 btrfs_pin_extent(fs_info, bytenr,
13009                                                 fs_info->nodesize);
13010                                 continue;
13011                         }
13012
13013                         tmp = read_tree_block(fs_info, bytenr, 0);
13014                         if (!extent_buffer_uptodate(tmp)) {
13015                                 fprintf(stderr, "Error reading tree block\n");
13016                                 return -EIO;
13017                         }
13018                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13019                         free_extent_buffer(tmp);
13020                         if (ret)
13021                                 return ret;
13022                 }
13023         }
13024
13025         return 0;
13026 }
13027
13028 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13029 {
13030         int ret;
13031
13032         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13033         if (ret)
13034                 return ret;
13035
13036         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13037 }
13038
13039 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13040 {
13041         struct btrfs_block_group_cache *cache;
13042         struct btrfs_path path;
13043         struct extent_buffer *leaf;
13044         struct btrfs_chunk *chunk;
13045         struct btrfs_key key;
13046         int ret;
13047         u64 start;
13048
13049         btrfs_init_path(&path);
13050         key.objectid = 0;
13051         key.type = BTRFS_CHUNK_ITEM_KEY;
13052         key.offset = 0;
13053         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13054         if (ret < 0) {
13055                 btrfs_release_path(&path);
13056                 return ret;
13057         }
13058
13059         /*
13060          * We do this in case the block groups were screwed up and had alloc
13061          * bits that aren't actually set on the chunks.  This happens with
13062          * restored images every time and could happen in real life I guess.
13063          */
13064         fs_info->avail_data_alloc_bits = 0;
13065         fs_info->avail_metadata_alloc_bits = 0;
13066         fs_info->avail_system_alloc_bits = 0;
13067
13068         /* First we need to create the in-memory block groups */
13069         while (1) {
13070                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13071                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13072                         if (ret < 0) {
13073                                 btrfs_release_path(&path);
13074                                 return ret;
13075                         }
13076                         if (ret) {
13077                                 ret = 0;
13078                                 break;
13079                         }
13080                 }
13081                 leaf = path.nodes[0];
13082                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13083                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13084                         path.slots[0]++;
13085                         continue;
13086                 }
13087
13088                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13089                 btrfs_add_block_group(fs_info, 0,
13090                                       btrfs_chunk_type(leaf, chunk),
13091                                       key.objectid, key.offset,
13092                                       btrfs_chunk_length(leaf, chunk));
13093                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13094                                  key.offset + btrfs_chunk_length(leaf, chunk));
13095                 path.slots[0]++;
13096         }
13097         start = 0;
13098         while (1) {
13099                 cache = btrfs_lookup_first_block_group(fs_info, start);
13100                 if (!cache)
13101                         break;
13102                 cache->cached = 1;
13103                 start = cache->key.objectid + cache->key.offset;
13104         }
13105
13106         btrfs_release_path(&path);
13107         return 0;
13108 }
13109
13110 static int reset_balance(struct btrfs_trans_handle *trans,
13111                          struct btrfs_fs_info *fs_info)
13112 {
13113         struct btrfs_root *root = fs_info->tree_root;
13114         struct btrfs_path path;
13115         struct extent_buffer *leaf;
13116         struct btrfs_key key;
13117         int del_slot, del_nr = 0;
13118         int ret;
13119         int found = 0;
13120
13121         btrfs_init_path(&path);
13122         key.objectid = BTRFS_BALANCE_OBJECTID;
13123         key.type = BTRFS_BALANCE_ITEM_KEY;
13124         key.offset = 0;
13125         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13126         if (ret) {
13127                 if (ret > 0)
13128                         ret = 0;
13129                 if (!ret)
13130                         goto reinit_data_reloc;
13131                 else
13132                         goto out;
13133         }
13134
13135         ret = btrfs_del_item(trans, root, &path);
13136         if (ret)
13137                 goto out;
13138         btrfs_release_path(&path);
13139
13140         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13141         key.type = BTRFS_ROOT_ITEM_KEY;
13142         key.offset = 0;
13143         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13144         if (ret < 0)
13145                 goto out;
13146         while (1) {
13147                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13148                         if (!found)
13149                                 break;
13150
13151                         if (del_nr) {
13152                                 ret = btrfs_del_items(trans, root, &path,
13153                                                       del_slot, del_nr);
13154                                 del_nr = 0;
13155                                 if (ret)
13156                                         goto out;
13157                         }
13158                         key.offset++;
13159                         btrfs_release_path(&path);
13160
13161                         found = 0;
13162                         ret = btrfs_search_slot(trans, root, &key, &path,
13163                                                 -1, 1);
13164                         if (ret < 0)
13165                                 goto out;
13166                         continue;
13167                 }
13168                 found = 1;
13169                 leaf = path.nodes[0];
13170                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13171                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13172                         break;
13173                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13174                         path.slots[0]++;
13175                         continue;
13176                 }
13177                 if (!del_nr) {
13178                         del_slot = path.slots[0];
13179                         del_nr = 1;
13180                 } else {
13181                         del_nr++;
13182                 }
13183                 path.slots[0]++;
13184         }
13185
13186         if (del_nr) {
13187                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13188                 if (ret)
13189                         goto out;
13190         }
13191         btrfs_release_path(&path);
13192
13193 reinit_data_reloc:
13194         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13195         key.type = BTRFS_ROOT_ITEM_KEY;
13196         key.offset = (u64)-1;
13197         root = btrfs_read_fs_root(fs_info, &key);
13198         if (IS_ERR(root)) {
13199                 fprintf(stderr, "Error reading data reloc tree\n");
13200                 ret = PTR_ERR(root);
13201                 goto out;
13202         }
13203         record_root_in_trans(trans, root);
13204         ret = btrfs_fsck_reinit_root(trans, root, 0);
13205         if (ret)
13206                 goto out;
13207         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13208 out:
13209         btrfs_release_path(&path);
13210         return ret;
13211 }
13212
13213 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13214                               struct btrfs_fs_info *fs_info)
13215 {
13216         u64 start = 0;
13217         int ret;
13218
13219         /*
13220          * The only reason we don't do this is because right now we're just
13221          * walking the trees we find and pinning down their bytes, we don't look
13222          * at any of the leaves.  In order to do mixed groups we'd have to check
13223          * the leaves of any fs roots and pin down the bytes for any file
13224          * extents we find.  Not hard but why do it if we don't have to?
13225          */
13226         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13227                 fprintf(stderr, "We don't support re-initing the extent tree "
13228                         "for mixed block groups yet, please notify a btrfs "
13229                         "developer you want to do this so they can add this "
13230                         "functionality.\n");
13231                 return -EINVAL;
13232         }
13233
13234         /*
13235          * first we need to walk all of the trees except the extent tree and pin
13236          * down the bytes that are in use so we don't overwrite any existing
13237          * metadata.
13238          */
13239         ret = pin_metadata_blocks(fs_info);
13240         if (ret) {
13241                 fprintf(stderr, "error pinning down used bytes\n");
13242                 return ret;
13243         }
13244
13245         /*
13246          * Need to drop all the block groups since we're going to recreate all
13247          * of them again.
13248          */
13249         btrfs_free_block_groups(fs_info);
13250         ret = reset_block_groups(fs_info);
13251         if (ret) {
13252                 fprintf(stderr, "error resetting the block groups\n");
13253                 return ret;
13254         }
13255
13256         /* Ok we can allocate now, reinit the extent root */
13257         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13258         if (ret) {
13259                 fprintf(stderr, "extent root initialization failed\n");
13260                 /*
13261                  * When the transaction code is updated we should end the
13262                  * transaction, but for now progs only knows about commit so
13263                  * just return an error.
13264                  */
13265                 return ret;
13266         }
13267
13268         /*
13269          * Now we have all the in-memory block groups setup so we can make
13270          * allocations properly, and the metadata we care about is safe since we
13271          * pinned all of it above.
13272          */
13273         while (1) {
13274                 struct btrfs_block_group_cache *cache;
13275
13276                 cache = btrfs_lookup_first_block_group(fs_info, start);
13277                 if (!cache)
13278                         break;
13279                 start = cache->key.objectid + cache->key.offset;
13280                 ret = btrfs_insert_item(trans, fs_info->extent_root,
13281                                         &cache->key, &cache->item,
13282                                         sizeof(cache->item));
13283                 if (ret) {
13284                         fprintf(stderr, "Error adding block group\n");
13285                         return ret;
13286                 }
13287                 btrfs_extent_post_op(trans, fs_info->extent_root);
13288         }
13289
13290         ret = reset_balance(trans, fs_info);
13291         if (ret)
13292                 fprintf(stderr, "error resetting the pending balance\n");
13293
13294         return ret;
13295 }
13296
13297 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13298 {
13299         struct btrfs_path path;
13300         struct btrfs_trans_handle *trans;
13301         struct btrfs_key key;
13302         int ret;
13303
13304         printf("Recowing metadata block %llu\n", eb->start);
13305         key.objectid = btrfs_header_owner(eb);
13306         key.type = BTRFS_ROOT_ITEM_KEY;
13307         key.offset = (u64)-1;
13308
13309         root = btrfs_read_fs_root(root->fs_info, &key);
13310         if (IS_ERR(root)) {
13311                 fprintf(stderr, "Couldn't find owner root %llu\n",
13312                         key.objectid);
13313                 return PTR_ERR(root);
13314         }
13315
13316         trans = btrfs_start_transaction(root, 1);
13317         if (IS_ERR(trans))
13318                 return PTR_ERR(trans);
13319
13320         btrfs_init_path(&path);
13321         path.lowest_level = btrfs_header_level(eb);
13322         if (path.lowest_level)
13323                 btrfs_node_key_to_cpu(eb, &key, 0);
13324         else
13325                 btrfs_item_key_to_cpu(eb, &key, 0);
13326
13327         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13328         btrfs_commit_transaction(trans, root);
13329         btrfs_release_path(&path);
13330         return ret;
13331 }
13332
13333 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13334 {
13335         struct btrfs_path path;
13336         struct btrfs_trans_handle *trans;
13337         struct btrfs_key key;
13338         int ret;
13339
13340         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13341                bad->key.type, bad->key.offset);
13342         key.objectid = bad->root_id;
13343         key.type = BTRFS_ROOT_ITEM_KEY;
13344         key.offset = (u64)-1;
13345
13346         root = btrfs_read_fs_root(root->fs_info, &key);
13347         if (IS_ERR(root)) {
13348                 fprintf(stderr, "Couldn't find owner root %llu\n",
13349                         key.objectid);
13350                 return PTR_ERR(root);
13351         }
13352
13353         trans = btrfs_start_transaction(root, 1);
13354         if (IS_ERR(trans))
13355                 return PTR_ERR(trans);
13356
13357         btrfs_init_path(&path);
13358         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13359         if (ret) {
13360                 if (ret > 0)
13361                         ret = 0;
13362                 goto out;
13363         }
13364         ret = btrfs_del_item(trans, root, &path);
13365 out:
13366         btrfs_commit_transaction(trans, root);
13367         btrfs_release_path(&path);
13368         return ret;
13369 }
13370
13371 static int zero_log_tree(struct btrfs_root *root)
13372 {
13373         struct btrfs_trans_handle *trans;
13374         int ret;
13375
13376         trans = btrfs_start_transaction(root, 1);
13377         if (IS_ERR(trans)) {
13378                 ret = PTR_ERR(trans);
13379                 return ret;
13380         }
13381         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13382         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13383         ret = btrfs_commit_transaction(trans, root);
13384         return ret;
13385 }
13386
13387 static int populate_csum(struct btrfs_trans_handle *trans,
13388                          struct btrfs_root *csum_root, char *buf, u64 start,
13389                          u64 len)
13390 {
13391         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13392         u64 offset = 0;
13393         u64 sectorsize;
13394         int ret = 0;
13395
13396         while (offset < len) {
13397                 sectorsize = fs_info->sectorsize;
13398                 ret = read_extent_data(fs_info, buf, start + offset,
13399                                        &sectorsize, 0);
13400                 if (ret)
13401                         break;
13402                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13403                                             start + offset, buf, sectorsize);
13404                 if (ret)
13405                         break;
13406                 offset += sectorsize;
13407         }
13408         return ret;
13409 }
13410
13411 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13412                                       struct btrfs_root *csum_root,
13413                                       struct btrfs_root *cur_root)
13414 {
13415         struct btrfs_path path;
13416         struct btrfs_key key;
13417         struct extent_buffer *node;
13418         struct btrfs_file_extent_item *fi;
13419         char *buf = NULL;
13420         u64 start = 0;
13421         u64 len = 0;
13422         int slot = 0;
13423         int ret = 0;
13424
13425         buf = malloc(cur_root->fs_info->sectorsize);
13426         if (!buf)
13427                 return -ENOMEM;
13428
13429         btrfs_init_path(&path);
13430         key.objectid = 0;
13431         key.offset = 0;
13432         key.type = 0;
13433         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13434         if (ret < 0)
13435                 goto out;
13436         /* Iterate all regular file extents and fill its csum */
13437         while (1) {
13438                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13439
13440                 if (key.type != BTRFS_EXTENT_DATA_KEY)
13441                         goto next;
13442                 node = path.nodes[0];
13443                 slot = path.slots[0];
13444                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13445                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13446                         goto next;
13447                 start = btrfs_file_extent_disk_bytenr(node, fi);
13448                 len = btrfs_file_extent_disk_num_bytes(node, fi);
13449
13450                 ret = populate_csum(trans, csum_root, buf, start, len);
13451                 if (ret == -EEXIST)
13452                         ret = 0;
13453                 if (ret < 0)
13454                         goto out;
13455 next:
13456                 /*
13457                  * TODO: if next leaf is corrupted, jump to nearest next valid
13458                  * leaf.
13459                  */
13460                 ret = btrfs_next_item(cur_root, &path);
13461                 if (ret < 0)
13462                         goto out;
13463                 if (ret > 0) {
13464                         ret = 0;
13465                         goto out;
13466                 }
13467         }
13468
13469 out:
13470         btrfs_release_path(&path);
13471         free(buf);
13472         return ret;
13473 }
13474
13475 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13476                                   struct btrfs_root *csum_root)
13477 {
13478         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13479         struct btrfs_path path;
13480         struct btrfs_root *tree_root = fs_info->tree_root;
13481         struct btrfs_root *cur_root;
13482         struct extent_buffer *node;
13483         struct btrfs_key key;
13484         int slot = 0;
13485         int ret = 0;
13486
13487         btrfs_init_path(&path);
13488         key.objectid = BTRFS_FS_TREE_OBJECTID;
13489         key.offset = 0;
13490         key.type = BTRFS_ROOT_ITEM_KEY;
13491         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13492         if (ret < 0)
13493                 goto out;
13494         if (ret > 0) {
13495                 ret = -ENOENT;
13496                 goto out;
13497         }
13498
13499         while (1) {
13500                 node = path.nodes[0];
13501                 slot = path.slots[0];
13502                 btrfs_item_key_to_cpu(node, &key, slot);
13503                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13504                         goto out;
13505                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13506                         goto next;
13507                 if (!is_fstree(key.objectid))
13508                         goto next;
13509                 key.offset = (u64)-1;
13510
13511                 cur_root = btrfs_read_fs_root(fs_info, &key);
13512                 if (IS_ERR(cur_root) || !cur_root) {
13513                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13514                                 key.objectid);
13515                         goto out;
13516                 }
13517                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13518                                 cur_root);
13519                 if (ret < 0)
13520                         goto out;
13521 next:
13522                 ret = btrfs_next_item(tree_root, &path);
13523                 if (ret > 0) {
13524                         ret = 0;
13525                         goto out;
13526                 }
13527                 if (ret < 0)
13528                         goto out;
13529         }
13530
13531 out:
13532         btrfs_release_path(&path);
13533         return ret;
13534 }
13535
13536 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13537                                       struct btrfs_root *csum_root)
13538 {
13539         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13540         struct btrfs_path path;
13541         struct btrfs_extent_item *ei;
13542         struct extent_buffer *leaf;
13543         char *buf;
13544         struct btrfs_key key;
13545         int ret;
13546
13547         btrfs_init_path(&path);
13548         key.objectid = 0;
13549         key.type = BTRFS_EXTENT_ITEM_KEY;
13550         key.offset = 0;
13551         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13552         if (ret < 0) {
13553                 btrfs_release_path(&path);
13554                 return ret;
13555         }
13556
13557         buf = malloc(csum_root->fs_info->sectorsize);
13558         if (!buf) {
13559                 btrfs_release_path(&path);
13560                 return -ENOMEM;
13561         }
13562
13563         while (1) {
13564                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13565                         ret = btrfs_next_leaf(extent_root, &path);
13566                         if (ret < 0)
13567                                 break;
13568                         if (ret) {
13569                                 ret = 0;
13570                                 break;
13571                         }
13572                 }
13573                 leaf = path.nodes[0];
13574
13575                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13576                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13577                         path.slots[0]++;
13578                         continue;
13579                 }
13580
13581                 ei = btrfs_item_ptr(leaf, path.slots[0],
13582                                     struct btrfs_extent_item);
13583                 if (!(btrfs_extent_flags(leaf, ei) &
13584                       BTRFS_EXTENT_FLAG_DATA)) {
13585                         path.slots[0]++;
13586                         continue;
13587                 }
13588
13589                 ret = populate_csum(trans, csum_root, buf, key.objectid,
13590                                     key.offset);
13591                 if (ret)
13592                         break;
13593                 path.slots[0]++;
13594         }
13595
13596         btrfs_release_path(&path);
13597         free(buf);
13598         return ret;
13599 }
13600
13601 /*
13602  * Recalculate the csum and put it into the csum tree.
13603  *
13604  * Extent tree init will wipe out all the extent info, so in that case, we
13605  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
13606  * will use fs/subvol trees to init the csum tree.
13607  */
13608 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13609                           struct btrfs_root *csum_root,
13610                           int search_fs_tree)
13611 {
13612         if (search_fs_tree)
13613                 return fill_csum_tree_from_fs(trans, csum_root);
13614         else
13615                 return fill_csum_tree_from_extent(trans, csum_root);
13616 }
13617
13618 static void free_roots_info_cache(void)
13619 {
13620         if (!roots_info_cache)
13621                 return;
13622
13623         while (!cache_tree_empty(roots_info_cache)) {
13624                 struct cache_extent *entry;
13625                 struct root_item_info *rii;
13626
13627                 entry = first_cache_extent(roots_info_cache);
13628                 if (!entry)
13629                         break;
13630                 remove_cache_extent(roots_info_cache, entry);
13631                 rii = container_of(entry, struct root_item_info, cache_extent);
13632                 free(rii);
13633         }
13634
13635         free(roots_info_cache);
13636         roots_info_cache = NULL;
13637 }
13638
13639 static int build_roots_info_cache(struct btrfs_fs_info *info)
13640 {
13641         int ret = 0;
13642         struct btrfs_key key;
13643         struct extent_buffer *leaf;
13644         struct btrfs_path path;
13645
13646         if (!roots_info_cache) {
13647                 roots_info_cache = malloc(sizeof(*roots_info_cache));
13648                 if (!roots_info_cache)
13649                         return -ENOMEM;
13650                 cache_tree_init(roots_info_cache);
13651         }
13652
13653         btrfs_init_path(&path);
13654         key.objectid = 0;
13655         key.type = BTRFS_EXTENT_ITEM_KEY;
13656         key.offset = 0;
13657         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13658         if (ret < 0)
13659                 goto out;
13660         leaf = path.nodes[0];
13661
13662         while (1) {
13663                 struct btrfs_key found_key;
13664                 struct btrfs_extent_item *ei;
13665                 struct btrfs_extent_inline_ref *iref;
13666                 int slot = path.slots[0];
13667                 int type;
13668                 u64 flags;
13669                 u64 root_id;
13670                 u8 level;
13671                 struct cache_extent *entry;
13672                 struct root_item_info *rii;
13673
13674                 if (slot >= btrfs_header_nritems(leaf)) {
13675                         ret = btrfs_next_leaf(info->extent_root, &path);
13676                         if (ret < 0) {
13677                                 break;
13678                         } else if (ret) {
13679                                 ret = 0;
13680                                 break;
13681                         }
13682                         leaf = path.nodes[0];
13683                         slot = path.slots[0];
13684                 }
13685
13686                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13687
13688                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13689                     found_key.type != BTRFS_METADATA_ITEM_KEY)
13690                         goto next;
13691
13692                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13693                 flags = btrfs_extent_flags(leaf, ei);
13694
13695                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13696                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13697                         goto next;
13698
13699                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13700                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13701                         level = found_key.offset;
13702                 } else {
13703                         struct btrfs_tree_block_info *binfo;
13704
13705                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
13706                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13707                         level = btrfs_tree_block_level(leaf, binfo);
13708                 }
13709
13710                 /*
13711                  * For a root extent, it must be of the following type and the
13712                  * first (and only one) iref in the item.
13713                  */
13714                 type = btrfs_extent_inline_ref_type(leaf, iref);
13715                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13716                         goto next;
13717
13718                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13719                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13720                 if (!entry) {
13721                         rii = malloc(sizeof(struct root_item_info));
13722                         if (!rii) {
13723                                 ret = -ENOMEM;
13724                                 goto out;
13725                         }
13726                         rii->cache_extent.start = root_id;
13727                         rii->cache_extent.size = 1;
13728                         rii->level = (u8)-1;
13729                         entry = &rii->cache_extent;
13730                         ret = insert_cache_extent(roots_info_cache, entry);
13731                         ASSERT(ret == 0);
13732                 } else {
13733                         rii = container_of(entry, struct root_item_info,
13734                                            cache_extent);
13735                 }
13736
13737                 ASSERT(rii->cache_extent.start == root_id);
13738                 ASSERT(rii->cache_extent.size == 1);
13739
13740                 if (level > rii->level || rii->level == (u8)-1) {
13741                         rii->level = level;
13742                         rii->bytenr = found_key.objectid;
13743                         rii->gen = btrfs_extent_generation(leaf, ei);
13744                         rii->node_count = 1;
13745                 } else if (level == rii->level) {
13746                         rii->node_count++;
13747                 }
13748 next:
13749                 path.slots[0]++;
13750         }
13751
13752 out:
13753         btrfs_release_path(&path);
13754
13755         return ret;
13756 }
13757
13758 static int maybe_repair_root_item(struct btrfs_path *path,
13759                                   const struct btrfs_key *root_key,
13760                                   const int read_only_mode)
13761 {
13762         const u64 root_id = root_key->objectid;
13763         struct cache_extent *entry;
13764         struct root_item_info *rii;
13765         struct btrfs_root_item ri;
13766         unsigned long offset;
13767
13768         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13769         if (!entry) {
13770                 fprintf(stderr,
13771                         "Error: could not find extent items for root %llu\n",
13772                         root_key->objectid);
13773                 return -ENOENT;
13774         }
13775
13776         rii = container_of(entry, struct root_item_info, cache_extent);
13777         ASSERT(rii->cache_extent.start == root_id);
13778         ASSERT(rii->cache_extent.size == 1);
13779
13780         if (rii->node_count != 1) {
13781                 fprintf(stderr,
13782                         "Error: could not find btree root extent for root %llu\n",
13783                         root_id);
13784                 return -ENOENT;
13785         }
13786
13787         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13788         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13789
13790         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13791             btrfs_root_level(&ri) != rii->level ||
13792             btrfs_root_generation(&ri) != rii->gen) {
13793
13794                 /*
13795                  * If we're in repair mode but our caller told us to not update
13796                  * the root item, i.e. just check if it needs to be updated, don't
13797                  * print this message, since the caller will call us again shortly
13798                  * for the same root item without read only mode (the caller will
13799                  * open a transaction first).
13800                  */
13801                 if (!(read_only_mode && repair))
13802                         fprintf(stderr,
13803                                 "%sroot item for root %llu,"
13804                                 " current bytenr %llu, current gen %llu, current level %u,"
13805                                 " new bytenr %llu, new gen %llu, new level %u\n",
13806                                 (read_only_mode ? "" : "fixing "),
13807                                 root_id,
13808                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13809                                 btrfs_root_level(&ri),
13810                                 rii->bytenr, rii->gen, rii->level);
13811
13812                 if (btrfs_root_generation(&ri) > rii->gen) {
13813                         fprintf(stderr,
13814                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13815                                 root_id, btrfs_root_generation(&ri), rii->gen);
13816                         return -EINVAL;
13817                 }
13818
13819                 if (!read_only_mode) {
13820                         btrfs_set_root_bytenr(&ri, rii->bytenr);
13821                         btrfs_set_root_level(&ri, rii->level);
13822                         btrfs_set_root_generation(&ri, rii->gen);
13823                         write_extent_buffer(path->nodes[0], &ri,
13824                                             offset, sizeof(ri));
13825                 }
13826
13827                 return 1;
13828         }
13829
13830         return 0;
13831 }
13832
13833 /*
13834  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13835  * caused read-only snapshots to be corrupted if they were created at a moment
13836  * when the source subvolume/snapshot had orphan items. The issue was that the
13837  * on-disk root items became incorrect, referring to the pre orphan cleanup root
13838  * node instead of the post orphan cleanup root node.
13839  * So this function, and its callees, just detects and fixes those cases. Even
13840  * though the regression was for read-only snapshots, this function applies to
13841  * any snapshot/subvolume root.
13842  * This must be run before any other repair code - not doing it so, makes other
13843  * repair code delete or modify backrefs in the extent tree for example, which
13844  * will result in an inconsistent fs after repairing the root items.
13845  */
13846 static int repair_root_items(struct btrfs_fs_info *info)
13847 {
13848         struct btrfs_path path;
13849         struct btrfs_key key;
13850         struct extent_buffer *leaf;
13851         struct btrfs_trans_handle *trans = NULL;
13852         int ret = 0;
13853         int bad_roots = 0;
13854         int need_trans = 0;
13855
13856         btrfs_init_path(&path);
13857
13858         ret = build_roots_info_cache(info);
13859         if (ret)
13860                 goto out;
13861
13862         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13863         key.type = BTRFS_ROOT_ITEM_KEY;
13864         key.offset = 0;
13865
13866 again:
13867         /*
13868          * Avoid opening and committing transactions if a leaf doesn't have
13869          * any root items that need to be fixed, so that we avoid rotating
13870          * backup roots unnecessarily.
13871          */
13872         if (need_trans) {
13873                 trans = btrfs_start_transaction(info->tree_root, 1);
13874                 if (IS_ERR(trans)) {
13875                         ret = PTR_ERR(trans);
13876                         goto out;
13877                 }
13878         }
13879
13880         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13881                                 0, trans ? 1 : 0);
13882         if (ret < 0)
13883                 goto out;
13884         leaf = path.nodes[0];
13885
13886         while (1) {
13887                 struct btrfs_key found_key;
13888
13889                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13890                         int no_more_keys = find_next_key(&path, &key);
13891
13892                         btrfs_release_path(&path);
13893                         if (trans) {
13894                                 ret = btrfs_commit_transaction(trans,
13895                                                                info->tree_root);
13896                                 trans = NULL;
13897                                 if (ret < 0)
13898                                         goto out;
13899                         }
13900                         need_trans = 0;
13901                         if (no_more_keys)
13902                                 break;
13903                         goto again;
13904                 }
13905
13906                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13907
13908                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13909                         goto next;
13910                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13911                         goto next;
13912
13913                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13914                 if (ret < 0)
13915                         goto out;
13916                 if (ret) {
13917                         if (!trans && repair) {
13918                                 need_trans = 1;
13919                                 key = found_key;
13920                                 btrfs_release_path(&path);
13921                                 goto again;
13922                         }
13923                         bad_roots++;
13924                 }
13925 next:
13926                 path.slots[0]++;
13927         }
13928         ret = 0;
13929 out:
13930         free_roots_info_cache();
13931         btrfs_release_path(&path);
13932         if (trans)
13933                 btrfs_commit_transaction(trans, info->tree_root);
13934         if (ret < 0)
13935                 return ret;
13936
13937         return bad_roots;
13938 }
13939
13940 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13941 {
13942         struct btrfs_trans_handle *trans;
13943         struct btrfs_block_group_cache *bg_cache;
13944         u64 current = 0;
13945         int ret = 0;
13946
13947         /* Clear all free space cache inodes and its extent data */
13948         while (1) {
13949                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13950                 if (!bg_cache)
13951                         break;
13952                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13953                 if (ret < 0)
13954                         return ret;
13955                 current = bg_cache->key.objectid + bg_cache->key.offset;
13956         }
13957
13958         /* Don't forget to set cache_generation to -1 */
13959         trans = btrfs_start_transaction(fs_info->tree_root, 0);
13960         if (IS_ERR(trans)) {
13961                 error("failed to update super block cache generation");
13962                 return PTR_ERR(trans);
13963         }
13964         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13965         btrfs_commit_transaction(trans, fs_info->tree_root);
13966
13967         return ret;
13968 }
13969
13970 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13971                 int clear_version)
13972 {
13973         int ret = 0;
13974
13975         if (clear_version == 1) {
13976                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13977                         error(
13978                 "free space cache v2 detected, use --clear-space-cache v2");
13979                         ret = 1;
13980                         goto close_out;
13981                 }
13982                 printf("Clearing free space cache\n");
13983                 ret = clear_free_space_cache(fs_info);
13984                 if (ret) {
13985                         error("failed to clear free space cache");
13986                         ret = 1;
13987                 } else {
13988                         printf("Free space cache cleared\n");
13989                 }
13990         } else if (clear_version == 2) {
13991                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13992                         printf("no free space cache v2 to clear\n");
13993                         ret = 0;
13994                         goto close_out;
13995                 }
13996                 printf("Clear free space cache v2\n");
13997                 ret = btrfs_clear_free_space_tree(fs_info);
13998                 if (ret) {
13999                         error("failed to clear free space cache v2: %d", ret);
14000                         ret = 1;
14001                 } else {
14002                         printf("free space cache v2 cleared\n");
14003                 }
14004         }
14005 close_out:
14006         return ret;
14007 }
14008
14009 const char * const cmd_check_usage[] = {
14010         "btrfs check [options] <device>",
14011         "Check structural integrity of a filesystem (unmounted).",
14012         "Check structural integrity of an unmounted filesystem. Verify internal",
14013         "trees' consistency and item connectivity. In the repair mode try to",
14014         "fix the problems found. ",
14015         "WARNING: the repair mode is considered dangerous",
14016         "",
14017         "-s|--super <superblock>     use this superblock copy",
14018         "-b|--backup                 use the first valid backup root copy",
14019         "--force                     skip mount checks, repair is not possible",
14020         "--repair                    try to repair the filesystem",
14021         "--readonly                  run in read-only mode (default)",
14022         "--init-csum-tree            create a new CRC tree",
14023         "--init-extent-tree          create a new extent tree",
14024         "--mode <MODE>               allows choice of memory/IO trade-offs",
14025         "                            where MODE is one of:",
14026         "                            original - read inodes and extents to memory (requires",
14027         "                                       more memory, does less IO)",
14028         "                            lowmem   - try to use less memory but read blocks again",
14029         "                                       when needed",
14030         "--check-data-csum           verify checksums of data blocks",
14031         "-Q|--qgroup-report          print a report on qgroup consistency",
14032         "-E|--subvol-extents <subvolid>",
14033         "                            print subvolume extents and sharing state",
14034         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
14035         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
14036         "-p|--progress               indicate progress",
14037         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
14038         NULL
14039 };
14040
14041 int cmd_check(int argc, char **argv)
14042 {
14043         struct cache_tree root_cache;
14044         struct btrfs_root *root;
14045         struct btrfs_fs_info *info;
14046         u64 bytenr = 0;
14047         u64 subvolid = 0;
14048         u64 tree_root_bytenr = 0;
14049         u64 chunk_root_bytenr = 0;
14050         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14051         int ret = 0;
14052         int err = 0;
14053         u64 num;
14054         int init_csum_tree = 0;
14055         int readonly = 0;
14056         int clear_space_cache = 0;
14057         int qgroup_report = 0;
14058         int qgroups_repaired = 0;
14059         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14060         int force = 0;
14061
14062         while(1) {
14063                 int c;
14064                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14065                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14066                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14067                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14068                         GETOPT_VAL_FORCE };
14069                 static const struct option long_options[] = {
14070                         { "super", required_argument, NULL, 's' },
14071                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14072                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14073                         { "init-csum-tree", no_argument, NULL,
14074                                 GETOPT_VAL_INIT_CSUM },
14075                         { "init-extent-tree", no_argument, NULL,
14076                                 GETOPT_VAL_INIT_EXTENT },
14077                         { "check-data-csum", no_argument, NULL,
14078                                 GETOPT_VAL_CHECK_CSUM },
14079                         { "backup", no_argument, NULL, 'b' },
14080                         { "subvol-extents", required_argument, NULL, 'E' },
14081                         { "qgroup-report", no_argument, NULL, 'Q' },
14082                         { "tree-root", required_argument, NULL, 'r' },
14083                         { "chunk-root", required_argument, NULL,
14084                                 GETOPT_VAL_CHUNK_TREE },
14085                         { "progress", no_argument, NULL, 'p' },
14086                         { "mode", required_argument, NULL,
14087                                 GETOPT_VAL_MODE },
14088                         { "clear-space-cache", required_argument, NULL,
14089                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
14090                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14091                         { NULL, 0, NULL, 0}
14092                 };
14093
14094                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14095                 if (c < 0)
14096                         break;
14097                 switch(c) {
14098                         case 'a': /* ignored */ break;
14099                         case 'b':
14100                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14101                                 break;
14102                         case 's':
14103                                 num = arg_strtou64(optarg);
14104                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14105                                         error(
14106                                         "super mirror should be less than %d",
14107                                                 BTRFS_SUPER_MIRROR_MAX);
14108                                         exit(1);
14109                                 }
14110                                 bytenr = btrfs_sb_offset(((int)num));
14111                                 printf("using SB copy %llu, bytenr %llu\n", num,
14112                                        (unsigned long long)bytenr);
14113                                 break;
14114                         case 'Q':
14115                                 qgroup_report = 1;
14116                                 break;
14117                         case 'E':
14118                                 subvolid = arg_strtou64(optarg);
14119                                 break;
14120                         case 'r':
14121                                 tree_root_bytenr = arg_strtou64(optarg);
14122                                 break;
14123                         case GETOPT_VAL_CHUNK_TREE:
14124                                 chunk_root_bytenr = arg_strtou64(optarg);
14125                                 break;
14126                         case 'p':
14127                                 ctx.progress_enabled = true;
14128                                 break;
14129                         case '?':
14130                         case 'h':
14131                                 usage(cmd_check_usage);
14132                         case GETOPT_VAL_REPAIR:
14133                                 printf("enabling repair mode\n");
14134                                 repair = 1;
14135                                 ctree_flags |= OPEN_CTREE_WRITES;
14136                                 break;
14137                         case GETOPT_VAL_READONLY:
14138                                 readonly = 1;
14139                                 break;
14140                         case GETOPT_VAL_INIT_CSUM:
14141                                 printf("Creating a new CRC tree\n");
14142                                 init_csum_tree = 1;
14143                                 repair = 1;
14144                                 ctree_flags |= OPEN_CTREE_WRITES;
14145                                 break;
14146                         case GETOPT_VAL_INIT_EXTENT:
14147                                 init_extent_tree = 1;
14148                                 ctree_flags |= (OPEN_CTREE_WRITES |
14149                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
14150                                 repair = 1;
14151                                 break;
14152                         case GETOPT_VAL_CHECK_CSUM:
14153                                 check_data_csum = 1;
14154                                 break;
14155                         case GETOPT_VAL_MODE:
14156                                 check_mode = parse_check_mode(optarg);
14157                                 if (check_mode == CHECK_MODE_UNKNOWN) {
14158                                         error("unknown mode: %s", optarg);
14159                                         exit(1);
14160                                 }
14161                                 break;
14162                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
14163                                 if (strcmp(optarg, "v1") == 0) {
14164                                         clear_space_cache = 1;
14165                                 } else if (strcmp(optarg, "v2") == 0) {
14166                                         clear_space_cache = 2;
14167                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14168                                 } else {
14169                                         error(
14170                 "invalid argument to --clear-space-cache, must be v1 or v2");
14171                                         exit(1);
14172                                 }
14173                                 ctree_flags |= OPEN_CTREE_WRITES;
14174                                 break;
14175                         case GETOPT_VAL_FORCE:
14176                                 force = 1;
14177                                 break;
14178                 }
14179         }
14180
14181         if (check_argc_exact(argc - optind, 1))
14182                 usage(cmd_check_usage);
14183
14184         if (ctx.progress_enabled) {
14185                 ctx.tp = TASK_NOTHING;
14186                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14187         }
14188
14189         /* This check is the only reason for --readonly to exist */
14190         if (readonly && repair) {
14191                 error("repair options are not compatible with --readonly");
14192                 exit(1);
14193         }
14194
14195         /*
14196          * experimental and dangerous
14197          */
14198         if (repair && check_mode == CHECK_MODE_LOWMEM)
14199                 warning("low-memory mode repair support is only partial");
14200
14201         radix_tree_init();
14202         cache_tree_init(&root_cache);
14203
14204         ret = check_mounted(argv[optind]);
14205         if (!force) {
14206                 if (ret < 0) {
14207                         error("could not check mount status: %s",
14208                                         strerror(-ret));
14209                         err |= !!ret;
14210                         goto err_out;
14211                 } else if (ret) {
14212                         error(
14213 "%s is currently mounted, use --force if you really intend to check the filesystem",
14214                                 argv[optind]);
14215                         ret = -EBUSY;
14216                         err |= !!ret;
14217                         goto err_out;
14218                 }
14219         } else {
14220                 if (repair) {
14221                         error("repair and --force is not yet supported");
14222                         ret = 1;
14223                         err |= !!ret;
14224                         goto err_out;
14225                 }
14226                 if (ret < 0) {
14227                         warning(
14228 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14229                                 argv[optind]);
14230                 } else if (ret) {
14231                         warning(
14232                         "filesystem mounted, continuing because of --force");
14233                 }
14234                 /* A block device is mounted in exclusive mode by kernel */
14235                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14236         }
14237
14238         /* only allow partial opening under repair mode */
14239         if (repair)
14240                 ctree_flags |= OPEN_CTREE_PARTIAL;
14241
14242         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14243                                   chunk_root_bytenr, ctree_flags);
14244         if (!info) {
14245                 error("cannot open file system");
14246                 ret = -EIO;
14247                 err |= !!ret;
14248                 goto err_out;
14249         }
14250
14251         global_info = info;
14252         root = info->fs_root;
14253         uuid_unparse(info->super_copy->fsid, uuidbuf);
14254
14255         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14256
14257         /*
14258          * Check the bare minimum before starting anything else that could rely
14259          * on it, namely the tree roots, any local consistency checks
14260          */
14261         if (!extent_buffer_uptodate(info->tree_root->node) ||
14262             !extent_buffer_uptodate(info->dev_root->node) ||
14263             !extent_buffer_uptodate(info->chunk_root->node)) {
14264                 error("critical roots corrupted, unable to check the filesystem");
14265                 err |= !!ret;
14266                 ret = -EIO;
14267                 goto close_out;
14268         }
14269
14270         if (clear_space_cache) {
14271                 ret = do_clear_free_space_cache(info, clear_space_cache);
14272                 err |= !!ret;
14273                 goto close_out;
14274         }
14275
14276         /*
14277          * repair mode will force us to commit transaction which
14278          * will make us fail to load log tree when mounting.
14279          */
14280         if (repair && btrfs_super_log_root(info->super_copy)) {
14281                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14282                 if (!ret) {
14283                         ret = 1;
14284                         err |= !!ret;
14285                         goto close_out;
14286                 }
14287                 ret = zero_log_tree(root);
14288                 err |= !!ret;
14289                 if (ret) {
14290                         error("failed to zero log tree: %d", ret);
14291                         goto close_out;
14292                 }
14293         }
14294
14295         if (qgroup_report) {
14296                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14297                        uuidbuf);
14298                 ret = qgroup_verify_all(info);
14299                 err |= !!ret;
14300                 if (ret == 0)
14301                         report_qgroups(1);
14302                 goto close_out;
14303         }
14304         if (subvolid) {
14305                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14306                        subvolid, argv[optind], uuidbuf);
14307                 ret = print_extent_state(info, subvolid);
14308                 err |= !!ret;
14309                 goto close_out;
14310         }
14311
14312         if (init_extent_tree || init_csum_tree) {
14313                 struct btrfs_trans_handle *trans;
14314
14315                 trans = btrfs_start_transaction(info->extent_root, 0);
14316                 if (IS_ERR(trans)) {
14317                         error("error starting transaction");
14318                         ret = PTR_ERR(trans);
14319                         err |= !!ret;
14320                         goto close_out;
14321                 }
14322
14323                 if (init_extent_tree) {
14324                         printf("Creating a new extent tree\n");
14325                         ret = reinit_extent_tree(trans, info);
14326                         err |= !!ret;
14327                         if (ret)
14328                                 goto close_out;
14329                 }
14330
14331                 if (init_csum_tree) {
14332                         printf("Reinitialize checksum tree\n");
14333                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14334                         if (ret) {
14335                                 error("checksum tree initialization failed: %d",
14336                                                 ret);
14337                                 ret = -EIO;
14338                                 err |= !!ret;
14339                                 goto close_out;
14340                         }
14341
14342                         ret = fill_csum_tree(trans, info->csum_root,
14343                                              init_extent_tree);
14344                         err |= !!ret;
14345                         if (ret) {
14346                                 error("checksum tree refilling failed: %d", ret);
14347                                 return -EIO;
14348                         }
14349                 }
14350                 /*
14351                  * Ok now we commit and run the normal fsck, which will add
14352                  * extent entries for all of the items it finds.
14353                  */
14354                 ret = btrfs_commit_transaction(trans, info->extent_root);
14355                 err |= !!ret;
14356                 if (ret)
14357                         goto close_out;
14358         }
14359         if (!extent_buffer_uptodate(info->extent_root->node)) {
14360                 error("critical: extent_root, unable to check the filesystem");
14361                 ret = -EIO;
14362                 err |= !!ret;
14363                 goto close_out;
14364         }
14365         if (!extent_buffer_uptodate(info->csum_root->node)) {
14366                 error("critical: csum_root, unable to check the filesystem");
14367                 ret = -EIO;
14368                 err |= !!ret;
14369                 goto close_out;
14370         }
14371
14372         ret = do_check_chunks_and_extents(info);
14373         err |= !!ret;
14374         if (ret)
14375                 error(
14376                 "errors found in extent allocation tree or chunk allocation");
14377
14378         ret = repair_root_items(info);
14379         err |= !!ret;
14380         if (ret < 0) {
14381                 error("failed to repair root items: %s", strerror(-ret));
14382                 goto close_out;
14383         }
14384         if (repair) {
14385                 fprintf(stderr, "Fixed %d roots.\n", ret);
14386                 ret = 0;
14387         } else if (ret > 0) {
14388                 fprintf(stderr,
14389                        "Found %d roots with an outdated root item.\n",
14390                        ret);
14391                 fprintf(stderr,
14392                         "Please run a filesystem check with the option --repair to fix them.\n");
14393                 ret = 1;
14394                 err |= !!ret;
14395                 goto close_out;
14396         }
14397
14398         if (!ctx.progress_enabled) {
14399                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14400                         fprintf(stderr, "checking free space tree\n");
14401                 else
14402                         fprintf(stderr, "checking free space cache\n");
14403         }
14404         ret = check_space_cache(root);
14405         err |= !!ret;
14406         if (ret) {
14407                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14408                         error("errors found in free space tree");
14409                 else
14410                         error("errors found in free space cache");
14411                 goto out;
14412         }
14413
14414         /*
14415          * We used to have to have these hole extents in between our real
14416          * extents so if we don't have this flag set we need to make sure there
14417          * are no gaps in the file extents for inodes, otherwise we can just
14418          * ignore it when this happens.
14419          */
14420         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14421         ret = do_check_fs_roots(info, &root_cache);
14422         err |= !!ret;
14423         if (ret) {
14424                 error("errors found in fs roots");
14425                 goto out;
14426         }
14427
14428         fprintf(stderr, "checking csums\n");
14429         ret = check_csums(root);
14430         err |= !!ret;
14431         if (ret) {
14432                 error("errors found in csum tree");
14433                 goto out;
14434         }
14435
14436         fprintf(stderr, "checking root refs\n");
14437         /* For low memory mode, check_fs_roots_v2 handles root refs */
14438         if (check_mode != CHECK_MODE_LOWMEM) {
14439                 ret = check_root_refs(root, &root_cache);
14440                 err |= !!ret;
14441                 if (ret) {
14442                         error("errors found in root refs");
14443                         goto out;
14444                 }
14445         }
14446
14447         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14448                 struct extent_buffer *eb;
14449
14450                 eb = list_first_entry(&root->fs_info->recow_ebs,
14451                                       struct extent_buffer, recow);
14452                 list_del_init(&eb->recow);
14453                 ret = recow_extent_buffer(root, eb);
14454                 err |= !!ret;
14455                 if (ret) {
14456                         error("fails to fix transid errors");
14457                         break;
14458                 }
14459         }
14460
14461         while (!list_empty(&delete_items)) {
14462                 struct bad_item *bad;
14463
14464                 bad = list_first_entry(&delete_items, struct bad_item, list);
14465                 list_del_init(&bad->list);
14466                 if (repair) {
14467                         ret = delete_bad_item(root, bad);
14468                         err |= !!ret;
14469                 }
14470                 free(bad);
14471         }
14472
14473         if (info->quota_enabled) {
14474                 fprintf(stderr, "checking quota groups\n");
14475                 ret = qgroup_verify_all(info);
14476                 err |= !!ret;
14477                 if (ret) {
14478                         error("failed to check quota groups");
14479                         goto out;
14480                 }
14481                 report_qgroups(0);
14482                 ret = repair_qgroups(info, &qgroups_repaired);
14483                 err |= !!ret;
14484                 if (err) {
14485                         error("failed to repair quota groups");
14486                         goto out;
14487                 }
14488                 ret = 0;
14489         }
14490
14491         if (!list_empty(&root->fs_info->recow_ebs)) {
14492                 error("transid errors in file system");
14493                 ret = 1;
14494                 err |= !!ret;
14495         }
14496 out:
14497         printf("found %llu bytes used, ",
14498                (unsigned long long)bytes_used);
14499         if (err)
14500                 printf("error(s) found\n");
14501         else
14502                 printf("no error found\n");
14503         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14504         printf("total tree bytes: %llu\n",
14505                (unsigned long long)total_btree_bytes);
14506         printf("total fs tree bytes: %llu\n",
14507                (unsigned long long)total_fs_tree_bytes);
14508         printf("total extent tree bytes: %llu\n",
14509                (unsigned long long)total_extent_tree_bytes);
14510         printf("btree space waste bytes: %llu\n",
14511                (unsigned long long)btree_space_waste);
14512         printf("file data blocks allocated: %llu\n referenced %llu\n",
14513                 (unsigned long long)data_bytes_allocated,
14514                 (unsigned long long)data_bytes_referenced);
14515
14516         free_qgroup_counts();
14517         free_root_recs_tree(&root_cache);
14518 close_out:
14519         close_ctree(root);
14520 err_out:
14521         if (ctx.progress_enabled)
14522                 task_deinit(ctx.info);
14523
14524         return err;
14525 }