437a084211a2c166a8e03a31be2a5379c97789a0
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING       (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH      (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN         (1<<20) /* DIR isize should be recalculated */
136
137 static inline struct data_backref* to_data_backref(struct extent_backref *back)
138 {
139         return container_of(back, struct data_backref, node);
140 }
141
142 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
143 {
144         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
145         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
146         struct data_backref *back1 = to_data_backref(ext1);
147         struct data_backref *back2 = to_data_backref(ext2);
148
149         WARN_ON(!ext1->is_data);
150         WARN_ON(!ext2->is_data);
151
152         /* parent and root are a union, so this covers both */
153         if (back1->parent > back2->parent)
154                 return 1;
155         if (back1->parent < back2->parent)
156                 return -1;
157
158         /* This is a full backref and the parents match. */
159         if (back1->node.full_backref)
160                 return 0;
161
162         if (back1->owner > back2->owner)
163                 return 1;
164         if (back1->owner < back2->owner)
165                 return -1;
166
167         if (back1->offset > back2->offset)
168                 return 1;
169         if (back1->offset < back2->offset)
170                 return -1;
171
172         if (back1->found_ref && back2->found_ref) {
173                 if (back1->disk_bytenr > back2->disk_bytenr)
174                         return 1;
175                 if (back1->disk_bytenr < back2->disk_bytenr)
176                         return -1;
177
178                 if (back1->bytes > back2->bytes)
179                         return 1;
180                 if (back1->bytes < back2->bytes)
181                         return -1;
182         }
183
184         return 0;
185 }
186
187 /*
188  * Much like data_backref, just removed the undetermined members
189  * and change it to use list_head.
190  * During extent scan, it is stored in root->orphan_data_extent.
191  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
192  */
193 struct orphan_data_extent {
194         struct list_head list;
195         u64 root;
196         u64 objectid;
197         u64 offset;
198         u64 disk_bytenr;
199         u64 disk_len;
200 };
201
202 struct tree_backref {
203         struct extent_backref node;
204         union {
205                 u64 parent;
206                 u64 root;
207         };
208 };
209
210 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
211 {
212         return container_of(back, struct tree_backref, node);
213 }
214
215 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
216 {
217         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
218         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
219         struct tree_backref *back1 = to_tree_backref(ext1);
220         struct tree_backref *back2 = to_tree_backref(ext2);
221
222         WARN_ON(ext1->is_data);
223         WARN_ON(ext2->is_data);
224
225         /* parent and root are a union, so this covers both */
226         if (back1->parent > back2->parent)
227                 return 1;
228         if (back1->parent < back2->parent)
229                 return -1;
230
231         return 0;
232 }
233
234 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
235 {
236         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
237         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
238
239         if (ext1->is_data > ext2->is_data)
240                 return 1;
241
242         if (ext1->is_data < ext2->is_data)
243                 return -1;
244
245         if (ext1->full_backref > ext2->full_backref)
246                 return 1;
247         if (ext1->full_backref < ext2->full_backref)
248                 return -1;
249
250         if (ext1->is_data)
251                 return compare_data_backref(node1, node2);
252         else
253                 return compare_tree_backref(node1, node2);
254 }
255
256 /* Explicit initialization for extent_record::flag_block_full_backref */
257 enum { FLAG_UNSET = 2 };
258
259 struct extent_record {
260         struct list_head backrefs;
261         struct list_head dups;
262         struct rb_root backref_tree;
263         struct list_head list;
264         struct cache_extent cache;
265         struct btrfs_disk_key parent_key;
266         u64 start;
267         u64 max_size;
268         u64 nr;
269         u64 refs;
270         u64 extent_item_refs;
271         u64 generation;
272         u64 parent_generation;
273         u64 info_objectid;
274         u32 num_duplicates;
275         u8 info_level;
276         unsigned int flag_block_full_backref:2;
277         unsigned int found_rec:1;
278         unsigned int content_checked:1;
279         unsigned int owner_ref_checked:1;
280         unsigned int is_root:1;
281         unsigned int metadata:1;
282         unsigned int bad_full_backref:1;
283         unsigned int crossing_stripes:1;
284         unsigned int wrong_chunk_type:1;
285 };
286
287 static inline struct extent_record* to_extent_record(struct list_head *entry)
288 {
289         return container_of(entry, struct extent_record, list);
290 }
291
292 struct inode_backref {
293         struct list_head list;
294         unsigned int found_dir_item:1;
295         unsigned int found_dir_index:1;
296         unsigned int found_inode_ref:1;
297         u8 filetype;
298         u8 ref_type;
299         int errors;
300         u64 dir;
301         u64 index;
302         u16 namelen;
303         char name[0];
304 };
305
306 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
307 {
308         return list_entry(entry, struct inode_backref, list);
309 }
310
311 struct root_item_record {
312         struct list_head list;
313         u64 objectid;
314         u64 bytenr;
315         u64 last_snapshot;
316         u8 level;
317         u8 drop_level;
318         struct btrfs_key drop_key;
319 };
320
321 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
322 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
323 #define REF_ERR_NO_INODE_REF            (1 << 2)
324 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
325 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
326 #define REF_ERR_DUP_INODE_REF           (1 << 5)
327 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
328 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
329 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
330 #define REF_ERR_NO_ROOT_REF             (1 << 9)
331 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
332 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
333 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
334
335 struct file_extent_hole {
336         struct rb_node node;
337         u64 start;
338         u64 len;
339 };
340
341 struct inode_record {
342         struct list_head backrefs;
343         unsigned int checked:1;
344         unsigned int merging:1;
345         unsigned int found_inode_item:1;
346         unsigned int found_dir_item:1;
347         unsigned int found_file_extent:1;
348         unsigned int found_csum_item:1;
349         unsigned int some_csum_missing:1;
350         unsigned int nodatasum:1;
351         int errors;
352
353         u64 ino;
354         u32 nlink;
355         u32 imode;
356         u64 isize;
357         u64 nbytes;
358
359         u32 found_link;
360         u64 found_size;
361         u64 extent_start;
362         u64 extent_end;
363         struct rb_root holes;
364         struct list_head orphan_extents;
365
366         u32 refs;
367 };
368
369 #define I_ERR_NO_INODE_ITEM             (1 << 0)
370 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
371 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
372 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
373 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
374 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
375 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
376 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
377 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
378 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
379 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
380 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
381 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
382 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
383 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
384
385 struct root_backref {
386         struct list_head list;
387         unsigned int found_dir_item:1;
388         unsigned int found_dir_index:1;
389         unsigned int found_back_ref:1;
390         unsigned int found_forward_ref:1;
391         unsigned int reachable:1;
392         int errors;
393         u64 ref_root;
394         u64 dir;
395         u64 index;
396         u16 namelen;
397         char name[0];
398 };
399
400 static inline struct root_backref* to_root_backref(struct list_head *entry)
401 {
402         return list_entry(entry, struct root_backref, list);
403 }
404
405 struct root_record {
406         struct list_head backrefs;
407         struct cache_extent cache;
408         unsigned int found_root_item:1;
409         u64 objectid;
410         u32 found_ref;
411 };
412
413 struct ptr_node {
414         struct cache_extent cache;
415         void *data;
416 };
417
418 struct shared_node {
419         struct cache_extent cache;
420         struct cache_tree root_cache;
421         struct cache_tree inode_cache;
422         struct inode_record *current;
423         u32 refs;
424 };
425
426 struct block_info {
427         u64 start;
428         u32 size;
429 };
430
431 struct walk_control {
432         struct cache_tree shared;
433         struct shared_node *nodes[BTRFS_MAX_LEVEL];
434         int active_node;
435         int root_level;
436 };
437
438 struct bad_item {
439         struct btrfs_key key;
440         u64 root_id;
441         struct list_head list;
442 };
443
444 struct extent_entry {
445         u64 bytenr;
446         u64 bytes;
447         int count;
448         int broken;
449         struct list_head list;
450 };
451
452 struct root_item_info {
453         /* level of the root */
454         u8 level;
455         /* number of nodes at this level, must be 1 for a root */
456         int node_count;
457         u64 bytenr;
458         u64 gen;
459         struct cache_extent cache_extent;
460 };
461
462 /*
463  * Error bit for low memory mode check.
464  *
465  * Currently no caller cares about it yet.  Just internal use for error
466  * classification.
467  */
468 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
469 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
470 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
471 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
472 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
473 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
474 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
475 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
476 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
477 #define CHUNK_TYPE_MISMATCH     (1 << 8)
478
479 static void *print_status_check(void *p)
480 {
481         struct task_ctx *priv = p;
482         const char work_indicator[] = { '.', 'o', 'O', 'o' };
483         uint32_t count = 0;
484         static char *task_position_string[] = {
485                 "checking extents",
486                 "checking free space cache",
487                 "checking fs roots",
488         };
489
490         task_period_start(priv->info, 1000 /* 1s */);
491
492         if (priv->tp == TASK_NOTHING)
493                 return NULL;
494
495         while (1) {
496                 printf("%s [%c]\r", task_position_string[priv->tp],
497                                 work_indicator[count % 4]);
498                 count++;
499                 fflush(stdout);
500                 task_period_wait(priv->info);
501         }
502         return NULL;
503 }
504
505 static int print_status_return(void *p)
506 {
507         printf("\n");
508         fflush(stdout);
509
510         return 0;
511 }
512
513 static enum btrfs_check_mode parse_check_mode(const char *str)
514 {
515         if (strcmp(str, "lowmem") == 0)
516                 return CHECK_MODE_LOWMEM;
517         if (strcmp(str, "orig") == 0)
518                 return CHECK_MODE_ORIGINAL;
519         if (strcmp(str, "original") == 0)
520                 return CHECK_MODE_ORIGINAL;
521
522         return CHECK_MODE_UNKNOWN;
523 }
524
525 /* Compatible function to allow reuse of old codes */
526 static u64 first_extent_gap(struct rb_root *holes)
527 {
528         struct file_extent_hole *hole;
529
530         if (RB_EMPTY_ROOT(holes))
531                 return (u64)-1;
532
533         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
534         return hole->start;
535 }
536
537 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
538 {
539         struct file_extent_hole *hole1;
540         struct file_extent_hole *hole2;
541
542         hole1 = rb_entry(node1, struct file_extent_hole, node);
543         hole2 = rb_entry(node2, struct file_extent_hole, node);
544
545         if (hole1->start > hole2->start)
546                 return -1;
547         if (hole1->start < hole2->start)
548                 return 1;
549         /* Now hole1->start == hole2->start */
550         if (hole1->len >= hole2->len)
551                 /*
552                  * Hole 1 will be merge center
553                  * Same hole will be merged later
554                  */
555                 return -1;
556         /* Hole 2 will be merge center */
557         return 1;
558 }
559
560 /*
561  * Add a hole to the record
562  *
563  * This will do hole merge for copy_file_extent_holes(),
564  * which will ensure there won't be continuous holes.
565  */
566 static int add_file_extent_hole(struct rb_root *holes,
567                                 u64 start, u64 len)
568 {
569         struct file_extent_hole *hole;
570         struct file_extent_hole *prev = NULL;
571         struct file_extent_hole *next = NULL;
572
573         hole = malloc(sizeof(*hole));
574         if (!hole)
575                 return -ENOMEM;
576         hole->start = start;
577         hole->len = len;
578         /* Since compare will not return 0, no -EEXIST will happen */
579         rb_insert(holes, &hole->node, compare_hole);
580
581         /* simple merge with previous hole */
582         if (rb_prev(&hole->node))
583                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
584                                 node);
585         if (prev && prev->start + prev->len >= hole->start) {
586                 hole->len = hole->start + hole->len - prev->start;
587                 hole->start = prev->start;
588                 rb_erase(&prev->node, holes);
589                 free(prev);
590                 prev = NULL;
591         }
592
593         /* iterate merge with next holes */
594         while (1) {
595                 if (!rb_next(&hole->node))
596                         break;
597                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
598                                         node);
599                 if (hole->start + hole->len >= next->start) {
600                         if (hole->start + hole->len <= next->start + next->len)
601                                 hole->len = next->start + next->len -
602                                             hole->start;
603                         rb_erase(&next->node, holes);
604                         free(next);
605                         next = NULL;
606                 } else
607                         break;
608         }
609         return 0;
610 }
611
612 static int compare_hole_range(struct rb_node *node, void *data)
613 {
614         struct file_extent_hole *hole;
615         u64 start;
616
617         hole = (struct file_extent_hole *)data;
618         start = hole->start;
619
620         hole = rb_entry(node, struct file_extent_hole, node);
621         if (start < hole->start)
622                 return -1;
623         if (start >= hole->start && start < hole->start + hole->len)
624                 return 0;
625         return 1;
626 }
627
628 /*
629  * Delete a hole in the record
630  *
631  * This will do the hole split and is much restrict than add.
632  */
633 static int del_file_extent_hole(struct rb_root *holes,
634                                 u64 start, u64 len)
635 {
636         struct file_extent_hole *hole;
637         struct file_extent_hole tmp;
638         u64 prev_start = 0;
639         u64 prev_len = 0;
640         u64 next_start = 0;
641         u64 next_len = 0;
642         struct rb_node *node;
643         int have_prev = 0;
644         int have_next = 0;
645         int ret = 0;
646
647         tmp.start = start;
648         tmp.len = len;
649         node = rb_search(holes, &tmp, compare_hole_range, NULL);
650         if (!node)
651                 return -EEXIST;
652         hole = rb_entry(node, struct file_extent_hole, node);
653         if (start + len > hole->start + hole->len)
654                 return -EEXIST;
655
656         /*
657          * Now there will be no overlap, delete the hole and re-add the
658          * split(s) if they exists.
659          */
660         if (start > hole->start) {
661                 prev_start = hole->start;
662                 prev_len = start - hole->start;
663                 have_prev = 1;
664         }
665         if (hole->start + hole->len > start + len) {
666                 next_start = start + len;
667                 next_len = hole->start + hole->len - start - len;
668                 have_next = 1;
669         }
670         rb_erase(node, holes);
671         free(hole);
672         if (have_prev) {
673                 ret = add_file_extent_hole(holes, prev_start, prev_len);
674                 if (ret < 0)
675                         return ret;
676         }
677         if (have_next) {
678                 ret = add_file_extent_hole(holes, next_start, next_len);
679                 if (ret < 0)
680                         return ret;
681         }
682         return 0;
683 }
684
685 static int copy_file_extent_holes(struct rb_root *dst,
686                                   struct rb_root *src)
687 {
688         struct file_extent_hole *hole;
689         struct rb_node *node;
690         int ret = 0;
691
692         node = rb_first(src);
693         while (node) {
694                 hole = rb_entry(node, struct file_extent_hole, node);
695                 ret = add_file_extent_hole(dst, hole->start, hole->len);
696                 if (ret)
697                         break;
698                 node = rb_next(node);
699         }
700         return ret;
701 }
702
703 static void free_file_extent_holes(struct rb_root *holes)
704 {
705         struct rb_node *node;
706         struct file_extent_hole *hole;
707
708         node = rb_first(holes);
709         while (node) {
710                 hole = rb_entry(node, struct file_extent_hole, node);
711                 rb_erase(node, holes);
712                 free(hole);
713                 node = rb_first(holes);
714         }
715 }
716
717 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
718
719 static void record_root_in_trans(struct btrfs_trans_handle *trans,
720                                  struct btrfs_root *root)
721 {
722         if (root->last_trans != trans->transid) {
723                 root->track_dirty = 1;
724                 root->last_trans = trans->transid;
725                 root->commit_root = root->node;
726                 extent_buffer_get(root->node);
727         }
728 }
729
730 static u8 imode_to_type(u32 imode)
731 {
732 #define S_SHIFT 12
733         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
734                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
735                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
736                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
737                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
738                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
739                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
740                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
741         };
742
743         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
744 #undef S_SHIFT
745 }
746
747 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
748 {
749         struct device_record *rec1;
750         struct device_record *rec2;
751
752         rec1 = rb_entry(node1, struct device_record, node);
753         rec2 = rb_entry(node2, struct device_record, node);
754         if (rec1->devid > rec2->devid)
755                 return -1;
756         else if (rec1->devid < rec2->devid)
757                 return 1;
758         else
759                 return 0;
760 }
761
762 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
763 {
764         struct inode_record *rec;
765         struct inode_backref *backref;
766         struct inode_backref *orig;
767         struct inode_backref *tmp;
768         struct orphan_data_extent *src_orphan;
769         struct orphan_data_extent *dst_orphan;
770         struct rb_node *rb;
771         size_t size;
772         int ret;
773
774         rec = malloc(sizeof(*rec));
775         if (!rec)
776                 return ERR_PTR(-ENOMEM);
777         memcpy(rec, orig_rec, sizeof(*rec));
778         rec->refs = 1;
779         INIT_LIST_HEAD(&rec->backrefs);
780         INIT_LIST_HEAD(&rec->orphan_extents);
781         rec->holes = RB_ROOT;
782
783         list_for_each_entry(orig, &orig_rec->backrefs, list) {
784                 size = sizeof(*orig) + orig->namelen + 1;
785                 backref = malloc(size);
786                 if (!backref) {
787                         ret = -ENOMEM;
788                         goto cleanup;
789                 }
790                 memcpy(backref, orig, size);
791                 list_add_tail(&backref->list, &rec->backrefs);
792         }
793         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
794                 dst_orphan = malloc(sizeof(*dst_orphan));
795                 if (!dst_orphan) {
796                         ret = -ENOMEM;
797                         goto cleanup;
798                 }
799                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
800                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
801         }
802         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
803         if (ret < 0)
804                 goto cleanup_rb;
805
806         return rec;
807
808 cleanup_rb:
809         rb = rb_first(&rec->holes);
810         while (rb) {
811                 struct file_extent_hole *hole;
812
813                 hole = rb_entry(rb, struct file_extent_hole, node);
814                 rb = rb_next(rb);
815                 free(hole);
816         }
817
818 cleanup:
819         if (!list_empty(&rec->backrefs))
820                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
821                         list_del(&orig->list);
822                         free(orig);
823                 }
824
825         if (!list_empty(&rec->orphan_extents))
826                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
827                         list_del(&orig->list);
828                         free(orig);
829                 }
830
831         free(rec);
832
833         return ERR_PTR(ret);
834 }
835
836 static void print_orphan_data_extents(struct list_head *orphan_extents,
837                                       u64 objectid)
838 {
839         struct orphan_data_extent *orphan;
840
841         if (list_empty(orphan_extents))
842                 return;
843         printf("The following data extent is lost in tree %llu:\n",
844                objectid);
845         list_for_each_entry(orphan, orphan_extents, list) {
846                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
847                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
848                        orphan->disk_len);
849         }
850 }
851
852 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
853 {
854         u64 root_objectid = root->root_key.objectid;
855         int errors = rec->errors;
856
857         if (!errors)
858                 return;
859         /* reloc root errors, we print its corresponding fs root objectid*/
860         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
861                 root_objectid = root->root_key.offset;
862                 fprintf(stderr, "reloc");
863         }
864         fprintf(stderr, "root %llu inode %llu errors %x",
865                 (unsigned long long) root_objectid,
866                 (unsigned long long) rec->ino, rec->errors);
867
868         if (errors & I_ERR_NO_INODE_ITEM)
869                 fprintf(stderr, ", no inode item");
870         if (errors & I_ERR_NO_ORPHAN_ITEM)
871                 fprintf(stderr, ", no orphan item");
872         if (errors & I_ERR_DUP_INODE_ITEM)
873                 fprintf(stderr, ", dup inode item");
874         if (errors & I_ERR_DUP_DIR_INDEX)
875                 fprintf(stderr, ", dup dir index");
876         if (errors & I_ERR_ODD_DIR_ITEM)
877                 fprintf(stderr, ", odd dir item");
878         if (errors & I_ERR_ODD_FILE_EXTENT)
879                 fprintf(stderr, ", odd file extent");
880         if (errors & I_ERR_BAD_FILE_EXTENT)
881                 fprintf(stderr, ", bad file extent");
882         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
883                 fprintf(stderr, ", file extent overlap");
884         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
885                 fprintf(stderr, ", file extent discount");
886         if (errors & I_ERR_DIR_ISIZE_WRONG)
887                 fprintf(stderr, ", dir isize wrong");
888         if (errors & I_ERR_FILE_NBYTES_WRONG)
889                 fprintf(stderr, ", nbytes wrong");
890         if (errors & I_ERR_ODD_CSUM_ITEM)
891                 fprintf(stderr, ", odd csum item");
892         if (errors & I_ERR_SOME_CSUM_MISSING)
893                 fprintf(stderr, ", some csum missing");
894         if (errors & I_ERR_LINK_COUNT_WRONG)
895                 fprintf(stderr, ", link count wrong");
896         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
897                 fprintf(stderr, ", orphan file extent");
898         fprintf(stderr, "\n");
899         /* Print the orphan extents if needed */
900         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
901                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
902
903         /* Print the holes if needed */
904         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
905                 struct file_extent_hole *hole;
906                 struct rb_node *node;
907                 int found = 0;
908
909                 node = rb_first(&rec->holes);
910                 fprintf(stderr, "Found file extent holes:\n");
911                 while (node) {
912                         found = 1;
913                         hole = rb_entry(node, struct file_extent_hole, node);
914                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
915                                 hole->start, hole->len);
916                         node = rb_next(node);
917                 }
918                 if (!found)
919                         fprintf(stderr, "\tstart: 0, len: %llu\n",
920                                 round_up(rec->isize,
921                                          root->fs_info->sectorsize));
922         }
923 }
924
925 static void print_ref_error(int errors)
926 {
927         if (errors & REF_ERR_NO_DIR_ITEM)
928                 fprintf(stderr, ", no dir item");
929         if (errors & REF_ERR_NO_DIR_INDEX)
930                 fprintf(stderr, ", no dir index");
931         if (errors & REF_ERR_NO_INODE_REF)
932                 fprintf(stderr, ", no inode ref");
933         if (errors & REF_ERR_DUP_DIR_ITEM)
934                 fprintf(stderr, ", dup dir item");
935         if (errors & REF_ERR_DUP_DIR_INDEX)
936                 fprintf(stderr, ", dup dir index");
937         if (errors & REF_ERR_DUP_INODE_REF)
938                 fprintf(stderr, ", dup inode ref");
939         if (errors & REF_ERR_INDEX_UNMATCH)
940                 fprintf(stderr, ", index mismatch");
941         if (errors & REF_ERR_FILETYPE_UNMATCH)
942                 fprintf(stderr, ", filetype mismatch");
943         if (errors & REF_ERR_NAME_TOO_LONG)
944                 fprintf(stderr, ", name too long");
945         if (errors & REF_ERR_NO_ROOT_REF)
946                 fprintf(stderr, ", no root ref");
947         if (errors & REF_ERR_NO_ROOT_BACKREF)
948                 fprintf(stderr, ", no root backref");
949         if (errors & REF_ERR_DUP_ROOT_REF)
950                 fprintf(stderr, ", dup root ref");
951         if (errors & REF_ERR_DUP_ROOT_BACKREF)
952                 fprintf(stderr, ", dup root backref");
953         fprintf(stderr, "\n");
954 }
955
956 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
957                                           u64 ino, int mod)
958 {
959         struct ptr_node *node;
960         struct cache_extent *cache;
961         struct inode_record *rec = NULL;
962         int ret;
963
964         cache = lookup_cache_extent(inode_cache, ino, 1);
965         if (cache) {
966                 node = container_of(cache, struct ptr_node, cache);
967                 rec = node->data;
968                 if (mod && rec->refs > 1) {
969                         node->data = clone_inode_rec(rec);
970                         if (IS_ERR(node->data))
971                                 return node->data;
972                         rec->refs--;
973                         rec = node->data;
974                 }
975         } else if (mod) {
976                 rec = calloc(1, sizeof(*rec));
977                 if (!rec)
978                         return ERR_PTR(-ENOMEM);
979                 rec->ino = ino;
980                 rec->extent_start = (u64)-1;
981                 rec->refs = 1;
982                 INIT_LIST_HEAD(&rec->backrefs);
983                 INIT_LIST_HEAD(&rec->orphan_extents);
984                 rec->holes = RB_ROOT;
985
986                 node = malloc(sizeof(*node));
987                 if (!node) {
988                         free(rec);
989                         return ERR_PTR(-ENOMEM);
990                 }
991                 node->cache.start = ino;
992                 node->cache.size = 1;
993                 node->data = rec;
994
995                 if (ino == BTRFS_FREE_INO_OBJECTID)
996                         rec->found_link = 1;
997
998                 ret = insert_cache_extent(inode_cache, &node->cache);
999                 if (ret)
1000                         return ERR_PTR(-EEXIST);
1001         }
1002         return rec;
1003 }
1004
1005 static void free_orphan_data_extents(struct list_head *orphan_extents)
1006 {
1007         struct orphan_data_extent *orphan;
1008
1009         while (!list_empty(orphan_extents)) {
1010                 orphan = list_entry(orphan_extents->next,
1011                                     struct orphan_data_extent, list);
1012                 list_del(&orphan->list);
1013                 free(orphan);
1014         }
1015 }
1016
1017 static void free_inode_rec(struct inode_record *rec)
1018 {
1019         struct inode_backref *backref;
1020
1021         if (--rec->refs > 0)
1022                 return;
1023
1024         while (!list_empty(&rec->backrefs)) {
1025                 backref = to_inode_backref(rec->backrefs.next);
1026                 list_del(&backref->list);
1027                 free(backref);
1028         }
1029         free_orphan_data_extents(&rec->orphan_extents);
1030         free_file_extent_holes(&rec->holes);
1031         free(rec);
1032 }
1033
1034 static int can_free_inode_rec(struct inode_record *rec)
1035 {
1036         if (!rec->errors && rec->checked && rec->found_inode_item &&
1037             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1038                 return 1;
1039         return 0;
1040 }
1041
1042 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1043                                  struct inode_record *rec)
1044 {
1045         struct cache_extent *cache;
1046         struct inode_backref *tmp, *backref;
1047         struct ptr_node *node;
1048         u8 filetype;
1049
1050         if (!rec->found_inode_item)
1051                 return;
1052
1053         filetype = imode_to_type(rec->imode);
1054         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1055                 if (backref->found_dir_item && backref->found_dir_index) {
1056                         if (backref->filetype != filetype)
1057                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1058                         if (!backref->errors && backref->found_inode_ref &&
1059                             rec->nlink == rec->found_link) {
1060                                 list_del(&backref->list);
1061                                 free(backref);
1062                         }
1063                 }
1064         }
1065
1066         if (!rec->checked || rec->merging)
1067                 return;
1068
1069         if (S_ISDIR(rec->imode)) {
1070                 if (rec->found_size != rec->isize)
1071                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1072                 if (rec->found_file_extent)
1073                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1074         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1075                 if (rec->found_dir_item)
1076                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1077                 if (rec->found_size != rec->nbytes)
1078                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1079                 if (rec->nlink > 0 && !no_holes &&
1080                     (rec->extent_end < rec->isize ||
1081                      first_extent_gap(&rec->holes) < rec->isize))
1082                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1083         }
1084
1085         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1086                 if (rec->found_csum_item && rec->nodatasum)
1087                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1088                 if (rec->some_csum_missing && !rec->nodatasum)
1089                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1090         }
1091
1092         BUG_ON(rec->refs != 1);
1093         if (can_free_inode_rec(rec)) {
1094                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1095                 node = container_of(cache, struct ptr_node, cache);
1096                 BUG_ON(node->data != rec);
1097                 remove_cache_extent(inode_cache, &node->cache);
1098                 free(node);
1099                 free_inode_rec(rec);
1100         }
1101 }
1102
1103 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1104 {
1105         struct btrfs_path path;
1106         struct btrfs_key key;
1107         int ret;
1108
1109         key.objectid = BTRFS_ORPHAN_OBJECTID;
1110         key.type = BTRFS_ORPHAN_ITEM_KEY;
1111         key.offset = ino;
1112
1113         btrfs_init_path(&path);
1114         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1115         btrfs_release_path(&path);
1116         if (ret > 0)
1117                 ret = -ENOENT;
1118         return ret;
1119 }
1120
1121 static int process_inode_item(struct extent_buffer *eb,
1122                               int slot, struct btrfs_key *key,
1123                               struct shared_node *active_node)
1124 {
1125         struct inode_record *rec;
1126         struct btrfs_inode_item *item;
1127
1128         rec = active_node->current;
1129         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1130         if (rec->found_inode_item) {
1131                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1132                 return 1;
1133         }
1134         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1135         rec->nlink = btrfs_inode_nlink(eb, item);
1136         rec->isize = btrfs_inode_size(eb, item);
1137         rec->nbytes = btrfs_inode_nbytes(eb, item);
1138         rec->imode = btrfs_inode_mode(eb, item);
1139         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1140                 rec->nodatasum = 1;
1141         rec->found_inode_item = 1;
1142         if (rec->nlink == 0)
1143                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1144         maybe_free_inode_rec(&active_node->inode_cache, rec);
1145         return 0;
1146 }
1147
1148 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1149                                                 const char *name,
1150                                                 int namelen, u64 dir)
1151 {
1152         struct inode_backref *backref;
1153
1154         list_for_each_entry(backref, &rec->backrefs, list) {
1155                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1156                         break;
1157                 if (backref->dir != dir || backref->namelen != namelen)
1158                         continue;
1159                 if (memcmp(name, backref->name, namelen))
1160                         continue;
1161                 return backref;
1162         }
1163
1164         backref = malloc(sizeof(*backref) + namelen + 1);
1165         if (!backref)
1166                 return NULL;
1167         memset(backref, 0, sizeof(*backref));
1168         backref->dir = dir;
1169         backref->namelen = namelen;
1170         memcpy(backref->name, name, namelen);
1171         backref->name[namelen] = '\0';
1172         list_add_tail(&backref->list, &rec->backrefs);
1173         return backref;
1174 }
1175
1176 static int add_inode_backref(struct cache_tree *inode_cache,
1177                              u64 ino, u64 dir, u64 index,
1178                              const char *name, int namelen,
1179                              u8 filetype, u8 itemtype, int errors)
1180 {
1181         struct inode_record *rec;
1182         struct inode_backref *backref;
1183
1184         rec = get_inode_rec(inode_cache, ino, 1);
1185         BUG_ON(IS_ERR(rec));
1186         backref = get_inode_backref(rec, name, namelen, dir);
1187         BUG_ON(!backref);
1188         if (errors)
1189                 backref->errors |= errors;
1190         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1191                 if (backref->found_dir_index)
1192                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1193                 if (backref->found_inode_ref && backref->index != index)
1194                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1195                 if (backref->found_dir_item && backref->filetype != filetype)
1196                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1197
1198                 backref->index = index;
1199                 backref->filetype = filetype;
1200                 backref->found_dir_index = 1;
1201         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1202                 rec->found_link++;
1203                 if (backref->found_dir_item)
1204                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1205                 if (backref->found_dir_index && backref->filetype != filetype)
1206                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1207
1208                 backref->filetype = filetype;
1209                 backref->found_dir_item = 1;
1210         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1211                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1212                 if (backref->found_inode_ref)
1213                         backref->errors |= REF_ERR_DUP_INODE_REF;
1214                 if (backref->found_dir_index && backref->index != index)
1215                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1216                 else
1217                         backref->index = index;
1218
1219                 backref->ref_type = itemtype;
1220                 backref->found_inode_ref = 1;
1221         } else {
1222                 BUG_ON(1);
1223         }
1224
1225         maybe_free_inode_rec(inode_cache, rec);
1226         return 0;
1227 }
1228
1229 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1230                             struct cache_tree *dst_cache)
1231 {
1232         struct inode_backref *backref;
1233         u32 dir_count = 0;
1234         int ret = 0;
1235
1236         dst->merging = 1;
1237         list_for_each_entry(backref, &src->backrefs, list) {
1238                 if (backref->found_dir_index) {
1239                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1240                                         backref->index, backref->name,
1241                                         backref->namelen, backref->filetype,
1242                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1243                 }
1244                 if (backref->found_dir_item) {
1245                         dir_count++;
1246                         add_inode_backref(dst_cache, dst->ino,
1247                                         backref->dir, 0, backref->name,
1248                                         backref->namelen, backref->filetype,
1249                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1250                 }
1251                 if (backref->found_inode_ref) {
1252                         add_inode_backref(dst_cache, dst->ino,
1253                                         backref->dir, backref->index,
1254                                         backref->name, backref->namelen, 0,
1255                                         backref->ref_type, backref->errors);
1256                 }
1257         }
1258
1259         if (src->found_dir_item)
1260                 dst->found_dir_item = 1;
1261         if (src->found_file_extent)
1262                 dst->found_file_extent = 1;
1263         if (src->found_csum_item)
1264                 dst->found_csum_item = 1;
1265         if (src->some_csum_missing)
1266                 dst->some_csum_missing = 1;
1267         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1268                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1269                 if (ret < 0)
1270                         return ret;
1271         }
1272
1273         BUG_ON(src->found_link < dir_count);
1274         dst->found_link += src->found_link - dir_count;
1275         dst->found_size += src->found_size;
1276         if (src->extent_start != (u64)-1) {
1277                 if (dst->extent_start == (u64)-1) {
1278                         dst->extent_start = src->extent_start;
1279                         dst->extent_end = src->extent_end;
1280                 } else {
1281                         if (dst->extent_end > src->extent_start)
1282                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1283                         else if (dst->extent_end < src->extent_start) {
1284                                 ret = add_file_extent_hole(&dst->holes,
1285                                         dst->extent_end,
1286                                         src->extent_start - dst->extent_end);
1287                         }
1288                         if (dst->extent_end < src->extent_end)
1289                                 dst->extent_end = src->extent_end;
1290                 }
1291         }
1292
1293         dst->errors |= src->errors;
1294         if (src->found_inode_item) {
1295                 if (!dst->found_inode_item) {
1296                         dst->nlink = src->nlink;
1297                         dst->isize = src->isize;
1298                         dst->nbytes = src->nbytes;
1299                         dst->imode = src->imode;
1300                         dst->nodatasum = src->nodatasum;
1301                         dst->found_inode_item = 1;
1302                 } else {
1303                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1304                 }
1305         }
1306         dst->merging = 0;
1307
1308         return 0;
1309 }
1310
1311 static int splice_shared_node(struct shared_node *src_node,
1312                               struct shared_node *dst_node)
1313 {
1314         struct cache_extent *cache;
1315         struct ptr_node *node, *ins;
1316         struct cache_tree *src, *dst;
1317         struct inode_record *rec, *conflict;
1318         u64 current_ino = 0;
1319         int splice = 0;
1320         int ret;
1321
1322         if (--src_node->refs == 0)
1323                 splice = 1;
1324         if (src_node->current)
1325                 current_ino = src_node->current->ino;
1326
1327         src = &src_node->root_cache;
1328         dst = &dst_node->root_cache;
1329 again:
1330         cache = search_cache_extent(src, 0);
1331         while (cache) {
1332                 node = container_of(cache, struct ptr_node, cache);
1333                 rec = node->data;
1334                 cache = next_cache_extent(cache);
1335
1336                 if (splice) {
1337                         remove_cache_extent(src, &node->cache);
1338                         ins = node;
1339                 } else {
1340                         ins = malloc(sizeof(*ins));
1341                         BUG_ON(!ins);
1342                         ins->cache.start = node->cache.start;
1343                         ins->cache.size = node->cache.size;
1344                         ins->data = rec;
1345                         rec->refs++;
1346                 }
1347                 ret = insert_cache_extent(dst, &ins->cache);
1348                 if (ret == -EEXIST) {
1349                         conflict = get_inode_rec(dst, rec->ino, 1);
1350                         BUG_ON(IS_ERR(conflict));
1351                         merge_inode_recs(rec, conflict, dst);
1352                         if (rec->checked) {
1353                                 conflict->checked = 1;
1354                                 if (dst_node->current == conflict)
1355                                         dst_node->current = NULL;
1356                         }
1357                         maybe_free_inode_rec(dst, conflict);
1358                         free_inode_rec(rec);
1359                         free(ins);
1360                 } else {
1361                         BUG_ON(ret);
1362                 }
1363         }
1364
1365         if (src == &src_node->root_cache) {
1366                 src = &src_node->inode_cache;
1367                 dst = &dst_node->inode_cache;
1368                 goto again;
1369         }
1370
1371         if (current_ino > 0 && (!dst_node->current ||
1372             current_ino > dst_node->current->ino)) {
1373                 if (dst_node->current) {
1374                         dst_node->current->checked = 1;
1375                         maybe_free_inode_rec(dst, dst_node->current);
1376                 }
1377                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1378                 BUG_ON(IS_ERR(dst_node->current));
1379         }
1380         return 0;
1381 }
1382
1383 static void free_inode_ptr(struct cache_extent *cache)
1384 {
1385         struct ptr_node *node;
1386         struct inode_record *rec;
1387
1388         node = container_of(cache, struct ptr_node, cache);
1389         rec = node->data;
1390         free_inode_rec(rec);
1391         free(node);
1392 }
1393
1394 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1395
1396 static struct shared_node *find_shared_node(struct cache_tree *shared,
1397                                             u64 bytenr)
1398 {
1399         struct cache_extent *cache;
1400         struct shared_node *node;
1401
1402         cache = lookup_cache_extent(shared, bytenr, 1);
1403         if (cache) {
1404                 node = container_of(cache, struct shared_node, cache);
1405                 return node;
1406         }
1407         return NULL;
1408 }
1409
1410 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1411 {
1412         int ret;
1413         struct shared_node *node;
1414
1415         node = calloc(1, sizeof(*node));
1416         if (!node)
1417                 return -ENOMEM;
1418         node->cache.start = bytenr;
1419         node->cache.size = 1;
1420         cache_tree_init(&node->root_cache);
1421         cache_tree_init(&node->inode_cache);
1422         node->refs = refs;
1423
1424         ret = insert_cache_extent(shared, &node->cache);
1425
1426         return ret;
1427 }
1428
1429 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1430                              struct walk_control *wc, int level)
1431 {
1432         struct shared_node *node;
1433         struct shared_node *dest;
1434         int ret;
1435
1436         if (level == wc->active_node)
1437                 return 0;
1438
1439         BUG_ON(wc->active_node <= level);
1440         node = find_shared_node(&wc->shared, bytenr);
1441         if (!node) {
1442                 ret = add_shared_node(&wc->shared, bytenr, refs);
1443                 BUG_ON(ret);
1444                 node = find_shared_node(&wc->shared, bytenr);
1445                 wc->nodes[level] = node;
1446                 wc->active_node = level;
1447                 return 0;
1448         }
1449
1450         if (wc->root_level == wc->active_node &&
1451             btrfs_root_refs(&root->root_item) == 0) {
1452                 if (--node->refs == 0) {
1453                         free_inode_recs_tree(&node->root_cache);
1454                         free_inode_recs_tree(&node->inode_cache);
1455                         remove_cache_extent(&wc->shared, &node->cache);
1456                         free(node);
1457                 }
1458                 return 1;
1459         }
1460
1461         dest = wc->nodes[wc->active_node];
1462         splice_shared_node(node, dest);
1463         if (node->refs == 0) {
1464                 remove_cache_extent(&wc->shared, &node->cache);
1465                 free(node);
1466         }
1467         return 1;
1468 }
1469
1470 static int leave_shared_node(struct btrfs_root *root,
1471                              struct walk_control *wc, int level)
1472 {
1473         struct shared_node *node;
1474         struct shared_node *dest;
1475         int i;
1476
1477         if (level == wc->root_level)
1478                 return 0;
1479
1480         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1481                 if (wc->nodes[i])
1482                         break;
1483         }
1484         BUG_ON(i >= BTRFS_MAX_LEVEL);
1485
1486         node = wc->nodes[wc->active_node];
1487         wc->nodes[wc->active_node] = NULL;
1488         wc->active_node = i;
1489
1490         dest = wc->nodes[wc->active_node];
1491         if (wc->active_node < wc->root_level ||
1492             btrfs_root_refs(&root->root_item) > 0) {
1493                 BUG_ON(node->refs <= 1);
1494                 splice_shared_node(node, dest);
1495         } else {
1496                 BUG_ON(node->refs < 2);
1497                 node->refs--;
1498         }
1499         return 0;
1500 }
1501
1502 /*
1503  * Returns:
1504  * < 0 - on error
1505  * 1   - if the root with id child_root_id is a child of root parent_root_id
1506  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1507  *       has other root(s) as parent(s)
1508  * 2   - if the root child_root_id doesn't have any parent roots
1509  */
1510 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1511                          u64 child_root_id)
1512 {
1513         struct btrfs_path path;
1514         struct btrfs_key key;
1515         struct extent_buffer *leaf;
1516         int has_parent = 0;
1517         int ret;
1518
1519         btrfs_init_path(&path);
1520
1521         key.objectid = parent_root_id;
1522         key.type = BTRFS_ROOT_REF_KEY;
1523         key.offset = child_root_id;
1524         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1525                                 0, 0);
1526         if (ret < 0)
1527                 return ret;
1528         btrfs_release_path(&path);
1529         if (!ret)
1530                 return 1;
1531
1532         key.objectid = child_root_id;
1533         key.type = BTRFS_ROOT_BACKREF_KEY;
1534         key.offset = 0;
1535         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1536                                 0, 0);
1537         if (ret < 0)
1538                 goto out;
1539
1540         while (1) {
1541                 leaf = path.nodes[0];
1542                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1543                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1544                         if (ret)
1545                                 break;
1546                         leaf = path.nodes[0];
1547                 }
1548
1549                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1550                 if (key.objectid != child_root_id ||
1551                     key.type != BTRFS_ROOT_BACKREF_KEY)
1552                         break;
1553
1554                 has_parent = 1;
1555
1556                 if (key.offset == parent_root_id) {
1557                         btrfs_release_path(&path);
1558                         return 1;
1559                 }
1560
1561                 path.slots[0]++;
1562         }
1563 out:
1564         btrfs_release_path(&path);
1565         if (ret < 0)
1566                 return ret;
1567         return has_parent ? 0 : 2;
1568 }
1569
1570 static int process_dir_item(struct extent_buffer *eb,
1571                             int slot, struct btrfs_key *key,
1572                             struct shared_node *active_node)
1573 {
1574         u32 total;
1575         u32 cur = 0;
1576         u32 len;
1577         u32 name_len;
1578         u32 data_len;
1579         int error;
1580         int nritems = 0;
1581         u8 filetype;
1582         struct btrfs_dir_item *di;
1583         struct inode_record *rec;
1584         struct cache_tree *root_cache;
1585         struct cache_tree *inode_cache;
1586         struct btrfs_key location;
1587         char namebuf[BTRFS_NAME_LEN];
1588
1589         root_cache = &active_node->root_cache;
1590         inode_cache = &active_node->inode_cache;
1591         rec = active_node->current;
1592         rec->found_dir_item = 1;
1593
1594         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1595         total = btrfs_item_size_nr(eb, slot);
1596         while (cur < total) {
1597                 nritems++;
1598                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1599                 name_len = btrfs_dir_name_len(eb, di);
1600                 data_len = btrfs_dir_data_len(eb, di);
1601                 filetype = btrfs_dir_type(eb, di);
1602
1603                 rec->found_size += name_len;
1604                 if (cur + sizeof(*di) + name_len > total ||
1605                     name_len > BTRFS_NAME_LEN) {
1606                         error = REF_ERR_NAME_TOO_LONG;
1607
1608                         if (cur + sizeof(*di) > total)
1609                                 break;
1610                         len = min_t(u32, total - cur - sizeof(*di),
1611                                     BTRFS_NAME_LEN);
1612                 } else {
1613                         len = name_len;
1614                         error = 0;
1615                 }
1616
1617                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1618
1619                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1620                     key->offset != btrfs_name_hash(namebuf, len)) {
1621                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1622                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1623                         key->objectid, key->offset, namebuf, len, filetype,
1624                         key->offset, btrfs_name_hash(namebuf, len));
1625                 }
1626
1627                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1628                         add_inode_backref(inode_cache, location.objectid,
1629                                           key->objectid, key->offset, namebuf,
1630                                           len, filetype, key->type, error);
1631                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1632                         add_inode_backref(root_cache, location.objectid,
1633                                           key->objectid, key->offset,
1634                                           namebuf, len, filetype,
1635                                           key->type, error);
1636                 } else {
1637                         fprintf(stderr, "invalid location in dir item %u\n",
1638                                 location.type);
1639                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1640                                           key->objectid, key->offset, namebuf,
1641                                           len, filetype, key->type, error);
1642                 }
1643
1644                 len = sizeof(*di) + name_len + data_len;
1645                 di = (struct btrfs_dir_item *)((char *)di + len);
1646                 cur += len;
1647         }
1648         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1649                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1650
1651         return 0;
1652 }
1653
1654 static int process_inode_ref(struct extent_buffer *eb,
1655                              int slot, struct btrfs_key *key,
1656                              struct shared_node *active_node)
1657 {
1658         u32 total;
1659         u32 cur = 0;
1660         u32 len;
1661         u32 name_len;
1662         u64 index;
1663         int error;
1664         struct cache_tree *inode_cache;
1665         struct btrfs_inode_ref *ref;
1666         char namebuf[BTRFS_NAME_LEN];
1667
1668         inode_cache = &active_node->inode_cache;
1669
1670         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1671         total = btrfs_item_size_nr(eb, slot);
1672         while (cur < total) {
1673                 name_len = btrfs_inode_ref_name_len(eb, ref);
1674                 index = btrfs_inode_ref_index(eb, ref);
1675
1676                 /* inode_ref + namelen should not cross item boundary */
1677                 if (cur + sizeof(*ref) + name_len > total ||
1678                     name_len > BTRFS_NAME_LEN) {
1679                         if (total < cur + sizeof(*ref))
1680                                 break;
1681
1682                         /* Still try to read out the remaining part */
1683                         len = min_t(u32, total - cur - sizeof(*ref),
1684                                     BTRFS_NAME_LEN);
1685                         error = REF_ERR_NAME_TOO_LONG;
1686                 } else {
1687                         len = name_len;
1688                         error = 0;
1689                 }
1690
1691                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1692                 add_inode_backref(inode_cache, key->objectid, key->offset,
1693                                   index, namebuf, len, 0, key->type, error);
1694
1695                 len = sizeof(*ref) + name_len;
1696                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1697                 cur += len;
1698         }
1699         return 0;
1700 }
1701
1702 static int process_inode_extref(struct extent_buffer *eb,
1703                                 int slot, struct btrfs_key *key,
1704                                 struct shared_node *active_node)
1705 {
1706         u32 total;
1707         u32 cur = 0;
1708         u32 len;
1709         u32 name_len;
1710         u64 index;
1711         u64 parent;
1712         int error;
1713         struct cache_tree *inode_cache;
1714         struct btrfs_inode_extref *extref;
1715         char namebuf[BTRFS_NAME_LEN];
1716
1717         inode_cache = &active_node->inode_cache;
1718
1719         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1720         total = btrfs_item_size_nr(eb, slot);
1721         while (cur < total) {
1722                 name_len = btrfs_inode_extref_name_len(eb, extref);
1723                 index = btrfs_inode_extref_index(eb, extref);
1724                 parent = btrfs_inode_extref_parent(eb, extref);
1725                 if (name_len <= BTRFS_NAME_LEN) {
1726                         len = name_len;
1727                         error = 0;
1728                 } else {
1729                         len = BTRFS_NAME_LEN;
1730                         error = REF_ERR_NAME_TOO_LONG;
1731                 }
1732                 read_extent_buffer(eb, namebuf,
1733                                    (unsigned long)(extref + 1), len);
1734                 add_inode_backref(inode_cache, key->objectid, parent,
1735                                   index, namebuf, len, 0, key->type, error);
1736
1737                 len = sizeof(*extref) + name_len;
1738                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1739                 cur += len;
1740         }
1741         return 0;
1742
1743 }
1744
1745 static int count_csum_range(struct btrfs_root *root, u64 start,
1746                             u64 len, u64 *found)
1747 {
1748         struct btrfs_key key;
1749         struct btrfs_path path;
1750         struct extent_buffer *leaf;
1751         int ret;
1752         size_t size;
1753         *found = 0;
1754         u64 csum_end;
1755         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1756
1757         btrfs_init_path(&path);
1758
1759         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1760         key.offset = start;
1761         key.type = BTRFS_EXTENT_CSUM_KEY;
1762
1763         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1764                                 &key, &path, 0, 0);
1765         if (ret < 0)
1766                 goto out;
1767         if (ret > 0 && path.slots[0] > 0) {
1768                 leaf = path.nodes[0];
1769                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1770                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1771                     key.type == BTRFS_EXTENT_CSUM_KEY)
1772                         path.slots[0]--;
1773         }
1774
1775         while (len > 0) {
1776                 leaf = path.nodes[0];
1777                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1778                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1779                         if (ret > 0)
1780                                 break;
1781                         else if (ret < 0)
1782                                 goto out;
1783                         leaf = path.nodes[0];
1784                 }
1785
1786                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1787                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1788                     key.type != BTRFS_EXTENT_CSUM_KEY)
1789                         break;
1790
1791                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1792                 if (key.offset >= start + len)
1793                         break;
1794
1795                 if (key.offset > start)
1796                         start = key.offset;
1797
1798                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1799                 csum_end = key.offset + (size / csum_size) *
1800                            root->fs_info->sectorsize;
1801                 if (csum_end > start) {
1802                         size = min(csum_end - start, len);
1803                         len -= size;
1804                         start += size;
1805                         *found += size;
1806                 }
1807
1808                 path.slots[0]++;
1809         }
1810 out:
1811         btrfs_release_path(&path);
1812         if (ret < 0)
1813                 return ret;
1814         return 0;
1815 }
1816
1817 static int process_file_extent(struct btrfs_root *root,
1818                                 struct extent_buffer *eb,
1819                                 int slot, struct btrfs_key *key,
1820                                 struct shared_node *active_node)
1821 {
1822         struct inode_record *rec;
1823         struct btrfs_file_extent_item *fi;
1824         u64 num_bytes = 0;
1825         u64 disk_bytenr = 0;
1826         u64 extent_offset = 0;
1827         u64 mask = root->fs_info->sectorsize - 1;
1828         int extent_type;
1829         int ret;
1830
1831         rec = active_node->current;
1832         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1833         rec->found_file_extent = 1;
1834
1835         if (rec->extent_start == (u64)-1) {
1836                 rec->extent_start = key->offset;
1837                 rec->extent_end = key->offset;
1838         }
1839
1840         if (rec->extent_end > key->offset)
1841                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1842         else if (rec->extent_end < key->offset) {
1843                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1844                                            key->offset - rec->extent_end);
1845                 if (ret < 0)
1846                         return ret;
1847         }
1848
1849         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1850         extent_type = btrfs_file_extent_type(eb, fi);
1851
1852         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1853                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1854                 if (num_bytes == 0)
1855                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1856                 rec->found_size += num_bytes;
1857                 num_bytes = (num_bytes + mask) & ~mask;
1858         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1859                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1860                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1861                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1862                 extent_offset = btrfs_file_extent_offset(eb, fi);
1863                 if (num_bytes == 0 || (num_bytes & mask))
1864                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865                 if (num_bytes + extent_offset >
1866                     btrfs_file_extent_ram_bytes(eb, fi))
1867                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1868                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1869                     (btrfs_file_extent_compression(eb, fi) ||
1870                      btrfs_file_extent_encryption(eb, fi) ||
1871                      btrfs_file_extent_other_encoding(eb, fi)))
1872                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1873                 if (disk_bytenr > 0)
1874                         rec->found_size += num_bytes;
1875         } else {
1876                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1877         }
1878         rec->extent_end = key->offset + num_bytes;
1879
1880         /*
1881          * The data reloc tree will copy full extents into its inode and then
1882          * copy the corresponding csums.  Because the extent it copied could be
1883          * a preallocated extent that hasn't been written to yet there may be no
1884          * csums to copy, ergo we won't have csums for our file extent.  This is
1885          * ok so just don't bother checking csums if the inode belongs to the
1886          * data reloc tree.
1887          */
1888         if (disk_bytenr > 0 &&
1889             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1890                 u64 found;
1891                 if (btrfs_file_extent_compression(eb, fi))
1892                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1893                 else
1894                         disk_bytenr += extent_offset;
1895
1896                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1897                 if (ret < 0)
1898                         return ret;
1899                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1900                         if (found > 0)
1901                                 rec->found_csum_item = 1;
1902                         if (found < num_bytes)
1903                                 rec->some_csum_missing = 1;
1904                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1905                         if (found > 0)
1906                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1907                 }
1908         }
1909         return 0;
1910 }
1911
1912 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1913                             struct walk_control *wc)
1914 {
1915         struct btrfs_key key;
1916         u32 nritems;
1917         int i;
1918         int ret = 0;
1919         struct cache_tree *inode_cache;
1920         struct shared_node *active_node;
1921
1922         if (wc->root_level == wc->active_node &&
1923             btrfs_root_refs(&root->root_item) == 0)
1924                 return 0;
1925
1926         active_node = wc->nodes[wc->active_node];
1927         inode_cache = &active_node->inode_cache;
1928         nritems = btrfs_header_nritems(eb);
1929         for (i = 0; i < nritems; i++) {
1930                 btrfs_item_key_to_cpu(eb, &key, i);
1931
1932                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1933                         continue;
1934                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1935                         continue;
1936
1937                 if (active_node->current == NULL ||
1938                     active_node->current->ino < key.objectid) {
1939                         if (active_node->current) {
1940                                 active_node->current->checked = 1;
1941                                 maybe_free_inode_rec(inode_cache,
1942                                                      active_node->current);
1943                         }
1944                         active_node->current = get_inode_rec(inode_cache,
1945                                                              key.objectid, 1);
1946                         BUG_ON(IS_ERR(active_node->current));
1947                 }
1948                 switch (key.type) {
1949                 case BTRFS_DIR_ITEM_KEY:
1950                 case BTRFS_DIR_INDEX_KEY:
1951                         ret = process_dir_item(eb, i, &key, active_node);
1952                         break;
1953                 case BTRFS_INODE_REF_KEY:
1954                         ret = process_inode_ref(eb, i, &key, active_node);
1955                         break;
1956                 case BTRFS_INODE_EXTREF_KEY:
1957                         ret = process_inode_extref(eb, i, &key, active_node);
1958                         break;
1959                 case BTRFS_INODE_ITEM_KEY:
1960                         ret = process_inode_item(eb, i, &key, active_node);
1961                         break;
1962                 case BTRFS_EXTENT_DATA_KEY:
1963                         ret = process_file_extent(root, eb, i, &key,
1964                                                   active_node);
1965                         break;
1966                 default:
1967                         break;
1968                 };
1969         }
1970         return ret;
1971 }
1972
1973 struct node_refs {
1974         u64 bytenr[BTRFS_MAX_LEVEL];
1975         u64 refs[BTRFS_MAX_LEVEL];
1976         int need_check[BTRFS_MAX_LEVEL];
1977 };
1978
1979 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1980                              struct node_refs *nrefs, u64 level);
1981 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1982                             unsigned int ext_ref);
1983
1984 /*
1985  * Returns >0  Found error, not fatal, should continue
1986  * Returns <0  Fatal error, must exit the whole check
1987  * Returns 0   No errors found
1988  */
1989 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1990                                struct node_refs *nrefs, int *level, int ext_ref)
1991 {
1992         struct extent_buffer *cur = path->nodes[0];
1993         struct btrfs_key key;
1994         u64 cur_bytenr;
1995         u32 nritems;
1996         u64 first_ino = 0;
1997         int root_level = btrfs_header_level(root->node);
1998         int i;
1999         int ret = 0; /* Final return value */
2000         int err = 0; /* Positive error bitmap */
2001
2002         cur_bytenr = cur->start;
2003
2004         /* skip to first inode item or the first inode number change */
2005         nritems = btrfs_header_nritems(cur);
2006         for (i = 0; i < nritems; i++) {
2007                 btrfs_item_key_to_cpu(cur, &key, i);
2008                 if (i == 0)
2009                         first_ino = key.objectid;
2010                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2011                     (first_ino && first_ino != key.objectid))
2012                         break;
2013         }
2014         if (i == nritems) {
2015                 path->slots[0] = nritems;
2016                 return 0;
2017         }
2018         path->slots[0] = i;
2019
2020 again:
2021         err |= check_inode_item(root, path, ext_ref);
2022
2023         /* modify cur since check_inode_item may change path */
2024         cur = path->nodes[0];
2025
2026         if (err & LAST_ITEM)
2027                 goto out;
2028
2029         /* still have inode items in thie leaf */
2030         if (cur->start == cur_bytenr)
2031                 goto again;
2032
2033         /*
2034          * we have switched to another leaf, above nodes may
2035          * have changed, here walk down the path, if a node
2036          * or leaf is shared, check whether we can skip this
2037          * node or leaf.
2038          */
2039         for (i = root_level; i >= 0; i--) {
2040                 if (path->nodes[i]->start == nrefs->bytenr[i])
2041                         continue;
2042
2043                 ret = update_nodes_refs(root,
2044                                 path->nodes[i]->start,
2045                                 nrefs, i);
2046                 if (ret)
2047                         goto out;
2048
2049                 if (!nrefs->need_check[i]) {
2050                         *level += 1;
2051                         break;
2052                 }
2053         }
2054
2055         for (i = 0; i < *level; i++) {
2056                 free_extent_buffer(path->nodes[i]);
2057                 path->nodes[i] = NULL;
2058         }
2059 out:
2060         err &= ~LAST_ITEM;
2061         if (err && !ret)
2062                 ret = err;
2063         return ret;
2064 }
2065
2066 static void reada_walk_down(struct btrfs_root *root,
2067                             struct extent_buffer *node, int slot)
2068 {
2069         struct btrfs_fs_info *fs_info = root->fs_info;
2070         u64 bytenr;
2071         u64 ptr_gen;
2072         u32 nritems;
2073         int i;
2074         int level;
2075
2076         level = btrfs_header_level(node);
2077         if (level != 1)
2078                 return;
2079
2080         nritems = btrfs_header_nritems(node);
2081         for (i = slot; i < nritems; i++) {
2082                 bytenr = btrfs_node_blockptr(node, i);
2083                 ptr_gen = btrfs_node_ptr_generation(node, i);
2084                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2085         }
2086 }
2087
2088 /*
2089  * Check the child node/leaf by the following condition:
2090  * 1. the first item key of the node/leaf should be the same with the one
2091  *    in parent.
2092  * 2. block in parent node should match the child node/leaf.
2093  * 3. generation of parent node and child's header should be consistent.
2094  *
2095  * Or the child node/leaf pointed by the key in parent is not valid.
2096  *
2097  * We hope to check leaf owner too, but since subvol may share leaves,
2098  * which makes leaf owner check not so strong, key check should be
2099  * sufficient enough for that case.
2100  */
2101 static int check_child_node(struct extent_buffer *parent, int slot,
2102                             struct extent_buffer *child)
2103 {
2104         struct btrfs_key parent_key;
2105         struct btrfs_key child_key;
2106         int ret = 0;
2107
2108         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2109         if (btrfs_header_level(child) == 0)
2110                 btrfs_item_key_to_cpu(child, &child_key, 0);
2111         else
2112                 btrfs_node_key_to_cpu(child, &child_key, 0);
2113
2114         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2115                 ret = -EINVAL;
2116                 fprintf(stderr,
2117                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2118                         parent_key.objectid, parent_key.type, parent_key.offset,
2119                         child_key.objectid, child_key.type, child_key.offset);
2120         }
2121         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2122                 ret = -EINVAL;
2123                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2124                         btrfs_node_blockptr(parent, slot),
2125                         btrfs_header_bytenr(child));
2126         }
2127         if (btrfs_node_ptr_generation(parent, slot) !=
2128             btrfs_header_generation(child)) {
2129                 ret = -EINVAL;
2130                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2131                         btrfs_header_generation(child),
2132                         btrfs_node_ptr_generation(parent, slot));
2133         }
2134         return ret;
2135 }
2136
2137 /*
2138  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2139  * in every fs or file tree check. Here we find its all root ids, and only check
2140  * it in the fs or file tree which has the smallest root id.
2141  */
2142 static int need_check(struct btrfs_root *root, struct ulist *roots)
2143 {
2144         struct rb_node *node;
2145         struct ulist_node *u;
2146
2147         if (roots->nnodes == 1)
2148                 return 1;
2149
2150         node = rb_first(&roots->root);
2151         u = rb_entry(node, struct ulist_node, rb_node);
2152         /*
2153          * current root id is not smallest, we skip it and let it be checked
2154          * in the fs or file tree who hash the smallest root id.
2155          */
2156         if (root->objectid != u->val)
2157                 return 0;
2158
2159         return 1;
2160 }
2161
2162 /*
2163  * for a tree node or leaf, we record its reference count, so later if we still
2164  * process this node or leaf, don't need to compute its reference count again.
2165  */
2166 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2167                              struct node_refs *nrefs, u64 level)
2168 {
2169         int check, ret;
2170         u64 refs;
2171         struct ulist *roots;
2172
2173         if (nrefs->bytenr[level] != bytenr) {
2174                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2175                                        level, 1, &refs, NULL);
2176                 if (ret < 0)
2177                         return ret;
2178
2179                 nrefs->bytenr[level] = bytenr;
2180                 nrefs->refs[level] = refs;
2181                 if (refs > 1) {
2182                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2183                                                    0, &roots);
2184                         if (ret)
2185                                 return -EIO;
2186
2187                         check = need_check(root, roots);
2188                         ulist_free(roots);
2189                         nrefs->need_check[level] = check;
2190                 } else {
2191                         nrefs->need_check[level] = 1;
2192                 }
2193         }
2194
2195         return 0;
2196 }
2197
2198 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2199                           struct walk_control *wc, int *level,
2200                           struct node_refs *nrefs)
2201 {
2202         enum btrfs_tree_block_status status;
2203         u64 bytenr;
2204         u64 ptr_gen;
2205         struct btrfs_fs_info *fs_info = root->fs_info;
2206         struct extent_buffer *next;
2207         struct extent_buffer *cur;
2208         int ret, err = 0;
2209         u64 refs;
2210
2211         WARN_ON(*level < 0);
2212         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2213
2214         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2215                 refs = nrefs->refs[*level];
2216                 ret = 0;
2217         } else {
2218                 ret = btrfs_lookup_extent_info(NULL, root,
2219                                        path->nodes[*level]->start,
2220                                        *level, 1, &refs, NULL);
2221                 if (ret < 0) {
2222                         err = ret;
2223                         goto out;
2224                 }
2225                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2226                 nrefs->refs[*level] = refs;
2227         }
2228
2229         if (refs > 1) {
2230                 ret = enter_shared_node(root, path->nodes[*level]->start,
2231                                         refs, wc, *level);
2232                 if (ret > 0) {
2233                         err = ret;
2234                         goto out;
2235                 }
2236         }
2237
2238         while (*level >= 0) {
2239                 WARN_ON(*level < 0);
2240                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2241                 cur = path->nodes[*level];
2242
2243                 if (btrfs_header_level(cur) != *level)
2244                         WARN_ON(1);
2245
2246                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2247                         break;
2248                 if (*level == 0) {
2249                         ret = process_one_leaf(root, cur, wc);
2250                         if (ret < 0)
2251                                 err = ret;
2252                         break;
2253                 }
2254                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2255                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2256
2257                 if (bytenr == nrefs->bytenr[*level - 1]) {
2258                         refs = nrefs->refs[*level - 1];
2259                 } else {
2260                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2261                                         *level - 1, 1, &refs, NULL);
2262                         if (ret < 0) {
2263                                 refs = 0;
2264                         } else {
2265                                 nrefs->bytenr[*level - 1] = bytenr;
2266                                 nrefs->refs[*level - 1] = refs;
2267                         }
2268                 }
2269
2270                 if (refs > 1) {
2271                         ret = enter_shared_node(root, bytenr, refs,
2272                                                 wc, *level - 1);
2273                         if (ret > 0) {
2274                                 path->slots[*level]++;
2275                                 continue;
2276                         }
2277                 }
2278
2279                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2280                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2281                         free_extent_buffer(next);
2282                         reada_walk_down(root, cur, path->slots[*level]);
2283                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2284                         if (!extent_buffer_uptodate(next)) {
2285                                 struct btrfs_key node_key;
2286
2287                                 btrfs_node_key_to_cpu(path->nodes[*level],
2288                                                       &node_key,
2289                                                       path->slots[*level]);
2290                                 btrfs_add_corrupt_extent_record(root->fs_info,
2291                                                 &node_key,
2292                                                 path->nodes[*level]->start,
2293                                                 root->fs_info->nodesize,
2294                                                 *level);
2295                                 err = -EIO;
2296                                 goto out;
2297                         }
2298                 }
2299
2300                 ret = check_child_node(cur, path->slots[*level], next);
2301                 if (ret) {
2302                         free_extent_buffer(next);
2303                         err = ret;
2304                         goto out;
2305                 }
2306
2307                 if (btrfs_is_leaf(next))
2308                         status = btrfs_check_leaf(root, NULL, next);
2309                 else
2310                         status = btrfs_check_node(root, NULL, next);
2311                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2312                         free_extent_buffer(next);
2313                         err = -EIO;
2314                         goto out;
2315                 }
2316
2317                 *level = *level - 1;
2318                 free_extent_buffer(path->nodes[*level]);
2319                 path->nodes[*level] = next;
2320                 path->slots[*level] = 0;
2321         }
2322 out:
2323         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2324         return err;
2325 }
2326
2327 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2328                             unsigned int ext_ref);
2329
2330 /*
2331  * Returns >0  Found error, should continue
2332  * Returns <0  Fatal error, must exit the whole check
2333  * Returns 0   No errors found
2334  */
2335 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2336                              int *level, struct node_refs *nrefs, int ext_ref)
2337 {
2338         enum btrfs_tree_block_status status;
2339         u64 bytenr;
2340         u64 ptr_gen;
2341         struct btrfs_fs_info *fs_info = root->fs_info;
2342         struct extent_buffer *next;
2343         struct extent_buffer *cur;
2344         int ret;
2345
2346         WARN_ON(*level < 0);
2347         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2348
2349         ret = update_nodes_refs(root, path->nodes[*level]->start,
2350                                 nrefs, *level);
2351         if (ret < 0)
2352                 return ret;
2353
2354         while (*level >= 0) {
2355                 WARN_ON(*level < 0);
2356                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2357                 cur = path->nodes[*level];
2358
2359                 if (btrfs_header_level(cur) != *level)
2360                         WARN_ON(1);
2361
2362                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2363                         break;
2364                 /* Don't forgot to check leaf/node validation */
2365                 if (*level == 0) {
2366                         ret = btrfs_check_leaf(root, NULL, cur);
2367                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2368                                 ret = -EIO;
2369                                 break;
2370                         }
2371                         ret = process_one_leaf_v2(root, path, nrefs,
2372                                                   level, ext_ref);
2373                         cur = path->nodes[*level];
2374                         break;
2375                 } else {
2376                         ret = btrfs_check_node(root, NULL, cur);
2377                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2378                                 ret = -EIO;
2379                                 break;
2380                         }
2381                 }
2382                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2383                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2384
2385                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2386                 if (ret)
2387                         break;
2388                 if (!nrefs->need_check[*level - 1]) {
2389                         path->slots[*level]++;
2390                         continue;
2391                 }
2392
2393                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2394                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2395                         free_extent_buffer(next);
2396                         reada_walk_down(root, cur, path->slots[*level]);
2397                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2398                         if (!extent_buffer_uptodate(next)) {
2399                                 struct btrfs_key node_key;
2400
2401                                 btrfs_node_key_to_cpu(path->nodes[*level],
2402                                                       &node_key,
2403                                                       path->slots[*level]);
2404                                 btrfs_add_corrupt_extent_record(fs_info,
2405                                                 &node_key,
2406                                                 path->nodes[*level]->start,
2407                                                 fs_info->nodesize,
2408                                                 *level);
2409                                 ret = -EIO;
2410                                 break;
2411                         }
2412                 }
2413
2414                 ret = check_child_node(cur, path->slots[*level], next);
2415                 if (ret < 0) 
2416                         break;
2417
2418                 if (btrfs_is_leaf(next))
2419                         status = btrfs_check_leaf(root, NULL, next);
2420                 else
2421                         status = btrfs_check_node(root, NULL, next);
2422                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2423                         free_extent_buffer(next);
2424                         ret = -EIO;
2425                         break;
2426                 }
2427
2428                 *level = *level - 1;
2429                 free_extent_buffer(path->nodes[*level]);
2430                 path->nodes[*level] = next;
2431                 path->slots[*level] = 0;
2432         }
2433         return ret;
2434 }
2435
2436 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2437                         struct walk_control *wc, int *level)
2438 {
2439         int i;
2440         struct extent_buffer *leaf;
2441
2442         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2443                 leaf = path->nodes[i];
2444                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2445                         path->slots[i]++;
2446                         *level = i;
2447                         return 0;
2448                 } else {
2449                         free_extent_buffer(path->nodes[*level]);
2450                         path->nodes[*level] = NULL;
2451                         BUG_ON(*level > wc->active_node);
2452                         if (*level == wc->active_node)
2453                                 leave_shared_node(root, wc, *level);
2454                         *level = i + 1;
2455                 }
2456         }
2457         return 1;
2458 }
2459
2460 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2461                            int *level)
2462 {
2463         int i;
2464         struct extent_buffer *leaf;
2465
2466         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2467                 leaf = path->nodes[i];
2468                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2469                         path->slots[i]++;
2470                         *level = i;
2471                         return 0;
2472                 } else {
2473                         free_extent_buffer(path->nodes[*level]);
2474                         path->nodes[*level] = NULL;
2475                         *level = i + 1;
2476                 }
2477         }
2478         return 1;
2479 }
2480
2481 static int check_root_dir(struct inode_record *rec)
2482 {
2483         struct inode_backref *backref;
2484         int ret = -1;
2485
2486         if (!rec->found_inode_item || rec->errors)
2487                 goto out;
2488         if (rec->nlink != 1 || rec->found_link != 0)
2489                 goto out;
2490         if (list_empty(&rec->backrefs))
2491                 goto out;
2492         backref = to_inode_backref(rec->backrefs.next);
2493         if (!backref->found_inode_ref)
2494                 goto out;
2495         if (backref->index != 0 || backref->namelen != 2 ||
2496             memcmp(backref->name, "..", 2))
2497                 goto out;
2498         if (backref->found_dir_index || backref->found_dir_item)
2499                 goto out;
2500         ret = 0;
2501 out:
2502         return ret;
2503 }
2504
2505 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2506                               struct btrfs_root *root, struct btrfs_path *path,
2507                               struct inode_record *rec)
2508 {
2509         struct btrfs_inode_item *ei;
2510         struct btrfs_key key;
2511         int ret;
2512
2513         key.objectid = rec->ino;
2514         key.type = BTRFS_INODE_ITEM_KEY;
2515         key.offset = (u64)-1;
2516
2517         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2518         if (ret < 0)
2519                 goto out;
2520         if (ret) {
2521                 if (!path->slots[0]) {
2522                         ret = -ENOENT;
2523                         goto out;
2524                 }
2525                 path->slots[0]--;
2526                 ret = 0;
2527         }
2528         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2529         if (key.objectid != rec->ino) {
2530                 ret = -ENOENT;
2531                 goto out;
2532         }
2533
2534         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2535                             struct btrfs_inode_item);
2536         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2537         btrfs_mark_buffer_dirty(path->nodes[0]);
2538         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2539         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2540                root->root_key.objectid);
2541 out:
2542         btrfs_release_path(path);
2543         return ret;
2544 }
2545
2546 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2547                                     struct btrfs_root *root,
2548                                     struct btrfs_path *path,
2549                                     struct inode_record *rec)
2550 {
2551         int ret;
2552
2553         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2554         btrfs_release_path(path);
2555         if (!ret)
2556                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2557         return ret;
2558 }
2559
2560 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2561                                struct btrfs_root *root,
2562                                struct btrfs_path *path,
2563                                struct inode_record *rec)
2564 {
2565         struct btrfs_inode_item *ei;
2566         struct btrfs_key key;
2567         int ret = 0;
2568
2569         key.objectid = rec->ino;
2570         key.type = BTRFS_INODE_ITEM_KEY;
2571         key.offset = 0;
2572
2573         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2574         if (ret) {
2575                 if (ret > 0)
2576                         ret = -ENOENT;
2577                 goto out;
2578         }
2579
2580         /* Since ret == 0, no need to check anything */
2581         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2582                             struct btrfs_inode_item);
2583         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2584         btrfs_mark_buffer_dirty(path->nodes[0]);
2585         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2586         printf("reset nbytes for ino %llu root %llu\n",
2587                rec->ino, root->root_key.objectid);
2588 out:
2589         btrfs_release_path(path);
2590         return ret;
2591 }
2592
2593 static int add_missing_dir_index(struct btrfs_root *root,
2594                                  struct cache_tree *inode_cache,
2595                                  struct inode_record *rec,
2596                                  struct inode_backref *backref)
2597 {
2598         struct btrfs_path path;
2599         struct btrfs_trans_handle *trans;
2600         struct btrfs_dir_item *dir_item;
2601         struct extent_buffer *leaf;
2602         struct btrfs_key key;
2603         struct btrfs_disk_key disk_key;
2604         struct inode_record *dir_rec;
2605         unsigned long name_ptr;
2606         u32 data_size = sizeof(*dir_item) + backref->namelen;
2607         int ret;
2608
2609         trans = btrfs_start_transaction(root, 1);
2610         if (IS_ERR(trans))
2611                 return PTR_ERR(trans);
2612
2613         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2614                 (unsigned long long)rec->ino);
2615
2616         btrfs_init_path(&path);
2617         key.objectid = backref->dir;
2618         key.type = BTRFS_DIR_INDEX_KEY;
2619         key.offset = backref->index;
2620         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2621         BUG_ON(ret);
2622
2623         leaf = path.nodes[0];
2624         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2625
2626         disk_key.objectid = cpu_to_le64(rec->ino);
2627         disk_key.type = BTRFS_INODE_ITEM_KEY;
2628         disk_key.offset = 0;
2629
2630         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2631         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2632         btrfs_set_dir_data_len(leaf, dir_item, 0);
2633         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2634         name_ptr = (unsigned long)(dir_item + 1);
2635         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2636         btrfs_mark_buffer_dirty(leaf);
2637         btrfs_release_path(&path);
2638         btrfs_commit_transaction(trans, root);
2639
2640         backref->found_dir_index = 1;
2641         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2642         BUG_ON(IS_ERR(dir_rec));
2643         if (!dir_rec)
2644                 return 0;
2645         dir_rec->found_size += backref->namelen;
2646         if (dir_rec->found_size == dir_rec->isize &&
2647             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2648                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2649         if (dir_rec->found_size != dir_rec->isize)
2650                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2651
2652         return 0;
2653 }
2654
2655 static int delete_dir_index(struct btrfs_root *root,
2656                             struct inode_backref *backref)
2657 {
2658         struct btrfs_trans_handle *trans;
2659         struct btrfs_dir_item *di;
2660         struct btrfs_path path;
2661         int ret = 0;
2662
2663         trans = btrfs_start_transaction(root, 1);
2664         if (IS_ERR(trans))
2665                 return PTR_ERR(trans);
2666
2667         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2668                 (unsigned long long)backref->dir,
2669                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2670                 (unsigned long long)root->objectid);
2671
2672         btrfs_init_path(&path);
2673         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2674                                     backref->name, backref->namelen,
2675                                     backref->index, -1);
2676         if (IS_ERR(di)) {
2677                 ret = PTR_ERR(di);
2678                 btrfs_release_path(&path);
2679                 btrfs_commit_transaction(trans, root);
2680                 if (ret == -ENOENT)
2681                         return 0;
2682                 return ret;
2683         }
2684
2685         if (!di)
2686                 ret = btrfs_del_item(trans, root, &path);
2687         else
2688                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2689         BUG_ON(ret);
2690         btrfs_release_path(&path);
2691         btrfs_commit_transaction(trans, root);
2692         return ret;
2693 }
2694
2695 static int __create_inode_item(struct btrfs_trans_handle *trans,
2696                                struct btrfs_root *root, u64 ino, u64 size,
2697                                u64 nbytes, u64 nlink, u32 mode)
2698 {
2699         struct btrfs_inode_item ii;
2700         time_t now = time(NULL);
2701         int ret;
2702
2703         btrfs_set_stack_inode_size(&ii, size);
2704         btrfs_set_stack_inode_nbytes(&ii, nbytes);
2705         btrfs_set_stack_inode_nlink(&ii, nlink);
2706         btrfs_set_stack_inode_mode(&ii, mode);
2707         btrfs_set_stack_inode_generation(&ii, trans->transid);
2708         btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2709         btrfs_set_stack_timespec_sec(&ii.ctime, now);
2710         btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2711         btrfs_set_stack_timespec_sec(&ii.mtime, now);
2712         btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2713         btrfs_set_stack_timespec_sec(&ii.otime, 0);
2714         btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2715
2716         ret = btrfs_insert_inode(trans, root, ino, &ii);
2717         ASSERT(!ret);
2718
2719         warning("root %llu inode %llu recreating inode item, this may "
2720                 "be incomplete, please check permissions and content after "
2721                 "the fsck completes.\n", (unsigned long long)root->objectid,
2722                 (unsigned long long)ino);
2723
2724         return 0;
2725 }
2726
2727 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2728                                     struct btrfs_root *root, u64 ino,
2729                                     u8 filetype)
2730 {
2731         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2732
2733         return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2734 }
2735
2736 static int create_inode_item(struct btrfs_root *root,
2737                              struct inode_record *rec, int root_dir)
2738 {
2739         struct btrfs_trans_handle *trans;
2740         u64 nlink = 0;
2741         u32 mode = 0;
2742         u64 size = 0;
2743         int ret;
2744
2745         trans = btrfs_start_transaction(root, 1);
2746         if (IS_ERR(trans)) {
2747                 ret = PTR_ERR(trans);
2748                 return ret;
2749         }
2750
2751         nlink = root_dir ? 1 : rec->found_link;
2752         if (rec->found_dir_item) {
2753                 if (rec->found_file_extent)
2754                         fprintf(stderr, "root %llu inode %llu has both a dir "
2755                                 "item and extents, unsure if it is a dir or a "
2756                                 "regular file so setting it as a directory\n",
2757                                 (unsigned long long)root->objectid,
2758                                 (unsigned long long)rec->ino);
2759                 mode = S_IFDIR | 0755;
2760                 size = rec->found_size;
2761         } else if (!rec->found_dir_item) {
2762                 size = rec->extent_end;
2763                 mode =  S_IFREG | 0755;
2764         }
2765
2766         ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
2767                                   nlink, mode);
2768         btrfs_commit_transaction(trans, root);
2769         return 0;
2770 }
2771
2772 static int repair_inode_backrefs(struct btrfs_root *root,
2773                                  struct inode_record *rec,
2774                                  struct cache_tree *inode_cache,
2775                                  int delete)
2776 {
2777         struct inode_backref *tmp, *backref;
2778         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2779         int ret = 0;
2780         int repaired = 0;
2781
2782         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2783                 if (!delete && rec->ino == root_dirid) {
2784                         if (!rec->found_inode_item) {
2785                                 ret = create_inode_item(root, rec, 1);
2786                                 if (ret)
2787                                         break;
2788                                 repaired++;
2789                         }
2790                 }
2791
2792                 /* Index 0 for root dir's are special, don't mess with it */
2793                 if (rec->ino == root_dirid && backref->index == 0)
2794                         continue;
2795
2796                 if (delete &&
2797                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2798                      (backref->found_dir_index && backref->found_inode_ref &&
2799                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2800                         ret = delete_dir_index(root, backref);
2801                         if (ret)
2802                                 break;
2803                         repaired++;
2804                         list_del(&backref->list);
2805                         free(backref);
2806                         continue;
2807                 }
2808
2809                 if (!delete && !backref->found_dir_index &&
2810                     backref->found_dir_item && backref->found_inode_ref) {
2811                         ret = add_missing_dir_index(root, inode_cache, rec,
2812                                                     backref);
2813                         if (ret)
2814                                 break;
2815                         repaired++;
2816                         if (backref->found_dir_item &&
2817                             backref->found_dir_index) {
2818                                 if (!backref->errors &&
2819                                     backref->found_inode_ref) {
2820                                         list_del(&backref->list);
2821                                         free(backref);
2822                                         continue;
2823                                 }
2824                         }
2825                 }
2826
2827                 if (!delete && (!backref->found_dir_index &&
2828                                 !backref->found_dir_item &&
2829                                 backref->found_inode_ref)) {
2830                         struct btrfs_trans_handle *trans;
2831                         struct btrfs_key location;
2832
2833                         ret = check_dir_conflict(root, backref->name,
2834                                                  backref->namelen,
2835                                                  backref->dir,
2836                                                  backref->index);
2837                         if (ret) {
2838                                 /*
2839                                  * let nlink fixing routine to handle it,
2840                                  * which can do it better.
2841                                  */
2842                                 ret = 0;
2843                                 break;
2844                         }
2845                         location.objectid = rec->ino;
2846                         location.type = BTRFS_INODE_ITEM_KEY;
2847                         location.offset = 0;
2848
2849                         trans = btrfs_start_transaction(root, 1);
2850                         if (IS_ERR(trans)) {
2851                                 ret = PTR_ERR(trans);
2852                                 break;
2853                         }
2854                         fprintf(stderr, "adding missing dir index/item pair "
2855                                 "for inode %llu\n",
2856                                 (unsigned long long)rec->ino);
2857                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2858                                                     backref->namelen,
2859                                                     backref->dir, &location,
2860                                                     imode_to_type(rec->imode),
2861                                                     backref->index);
2862                         BUG_ON(ret);
2863                         btrfs_commit_transaction(trans, root);
2864                         repaired++;
2865                 }
2866
2867                 if (!delete && (backref->found_inode_ref &&
2868                                 backref->found_dir_index &&
2869                                 backref->found_dir_item &&
2870                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2871                                 !rec->found_inode_item)) {
2872                         ret = create_inode_item(root, rec, 0);
2873                         if (ret)
2874                                 break;
2875                         repaired++;
2876                 }
2877
2878         }
2879         return ret ? ret : repaired;
2880 }
2881
2882 /*
2883  * To determine the file type for nlink/inode_item repair
2884  *
2885  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2886  * Return -ENOENT if file type is not found.
2887  */
2888 static int find_file_type(struct inode_record *rec, u8 *type)
2889 {
2890         struct inode_backref *backref;
2891
2892         /* For inode item recovered case */
2893         if (rec->found_inode_item) {
2894                 *type = imode_to_type(rec->imode);
2895                 return 0;
2896         }
2897
2898         list_for_each_entry(backref, &rec->backrefs, list) {
2899                 if (backref->found_dir_index || backref->found_dir_item) {
2900                         *type = backref->filetype;
2901                         return 0;
2902                 }
2903         }
2904         return -ENOENT;
2905 }
2906
2907 /*
2908  * To determine the file name for nlink repair
2909  *
2910  * Return 0 if file name is found, set name and namelen.
2911  * Return -ENOENT if file name is not found.
2912  */
2913 static int find_file_name(struct inode_record *rec,
2914                           char *name, int *namelen)
2915 {
2916         struct inode_backref *backref;
2917
2918         list_for_each_entry(backref, &rec->backrefs, list) {
2919                 if (backref->found_dir_index || backref->found_dir_item ||
2920                     backref->found_inode_ref) {
2921                         memcpy(name, backref->name, backref->namelen);
2922                         *namelen = backref->namelen;
2923                         return 0;
2924                 }
2925         }
2926         return -ENOENT;
2927 }
2928
2929 /* Reset the nlink of the inode to the correct one */
2930 static int reset_nlink(struct btrfs_trans_handle *trans,
2931                        struct btrfs_root *root,
2932                        struct btrfs_path *path,
2933                        struct inode_record *rec)
2934 {
2935         struct inode_backref *backref;
2936         struct inode_backref *tmp;
2937         struct btrfs_key key;
2938         struct btrfs_inode_item *inode_item;
2939         int ret = 0;
2940
2941         /* We don't believe this either, reset it and iterate backref */
2942         rec->found_link = 0;
2943
2944         /* Remove all backref including the valid ones */
2945         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2946                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2947                                    backref->index, backref->name,
2948                                    backref->namelen, 0);
2949                 if (ret < 0)
2950                         goto out;
2951
2952                 /* remove invalid backref, so it won't be added back */
2953                 if (!(backref->found_dir_index &&
2954                       backref->found_dir_item &&
2955                       backref->found_inode_ref)) {
2956                         list_del(&backref->list);
2957                         free(backref);
2958                 } else {
2959                         rec->found_link++;
2960                 }
2961         }
2962
2963         /* Set nlink to 0 */
2964         key.objectid = rec->ino;
2965         key.type = BTRFS_INODE_ITEM_KEY;
2966         key.offset = 0;
2967         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2968         if (ret < 0)
2969                 goto out;
2970         if (ret > 0) {
2971                 ret = -ENOENT;
2972                 goto out;
2973         }
2974         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2975                                     struct btrfs_inode_item);
2976         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2977         btrfs_mark_buffer_dirty(path->nodes[0]);
2978         btrfs_release_path(path);
2979
2980         /*
2981          * Add back valid inode_ref/dir_item/dir_index,
2982          * add_link() will handle the nlink inc, so new nlink must be correct
2983          */
2984         list_for_each_entry(backref, &rec->backrefs, list) {
2985                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2986                                      backref->name, backref->namelen,
2987                                      backref->filetype, &backref->index, 1, 0);
2988                 if (ret < 0)
2989                         goto out;
2990         }
2991 out:
2992         btrfs_release_path(path);
2993         return ret;
2994 }
2995
2996 static int get_highest_inode(struct btrfs_trans_handle *trans,
2997                                 struct btrfs_root *root,
2998                                 struct btrfs_path *path,
2999                                 u64 *highest_ino)
3000 {
3001         struct btrfs_key key, found_key;
3002         int ret;
3003
3004         btrfs_init_path(path);
3005         key.objectid = BTRFS_LAST_FREE_OBJECTID;
3006         key.offset = -1;
3007         key.type = BTRFS_INODE_ITEM_KEY;
3008         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3009         if (ret == 1) {
3010                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3011                                 path->slots[0] - 1);
3012                 *highest_ino = found_key.objectid;
3013                 ret = 0;
3014         }
3015         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3016                 ret = -EOVERFLOW;
3017         btrfs_release_path(path);
3018         return ret;
3019 }
3020
3021 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3022                                struct btrfs_root *root,
3023                                struct btrfs_path *path,
3024                                struct inode_record *rec)
3025 {
3026         char *dir_name = "lost+found";
3027         char namebuf[BTRFS_NAME_LEN] = {0};
3028         u64 lost_found_ino;
3029         u32 mode = 0700;
3030         u8 type = 0;
3031         int namelen = 0;
3032         int name_recovered = 0;
3033         int type_recovered = 0;
3034         int ret = 0;
3035
3036         /*
3037          * Get file name and type first before these invalid inode ref
3038          * are deleted by remove_all_invalid_backref()
3039          */
3040         name_recovered = !find_file_name(rec, namebuf, &namelen);
3041         type_recovered = !find_file_type(rec, &type);
3042
3043         if (!name_recovered) {
3044                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3045                        rec->ino, rec->ino);
3046                 namelen = count_digits(rec->ino);
3047                 sprintf(namebuf, "%llu", rec->ino);
3048                 name_recovered = 1;
3049         }
3050         if (!type_recovered) {
3051                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3052                        rec->ino);
3053                 type = BTRFS_FT_REG_FILE;
3054                 type_recovered = 1;
3055         }
3056
3057         ret = reset_nlink(trans, root, path, rec);
3058         if (ret < 0) {
3059                 fprintf(stderr,
3060                         "Failed to reset nlink for inode %llu: %s\n",
3061                         rec->ino, strerror(-ret));
3062                 goto out;
3063         }
3064
3065         if (rec->found_link == 0) {
3066                 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3067                 if (ret < 0)
3068                         goto out;
3069                 lost_found_ino++;
3070                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3071                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3072                                   mode);
3073                 if (ret < 0) {
3074                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
3075                                 dir_name, strerror(-ret));
3076                         goto out;
3077                 }
3078                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3079                                      namebuf, namelen, type, NULL, 1, 0);
3080                 /*
3081                  * Add ".INO" suffix several times to handle case where
3082                  * "FILENAME.INO" is already taken by another file.
3083                  */
3084                 while (ret == -EEXIST) {
3085                         /*
3086                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
3087                          */
3088                         if (namelen + count_digits(rec->ino) + 1 >
3089                             BTRFS_NAME_LEN) {
3090                                 ret = -EFBIG;
3091                                 goto out;
3092                         }
3093                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3094                                  ".%llu", rec->ino);
3095                         namelen += count_digits(rec->ino) + 1;
3096                         ret = btrfs_add_link(trans, root, rec->ino,
3097                                              lost_found_ino, namebuf,
3098                                              namelen, type, NULL, 1, 0);
3099                 }
3100                 if (ret < 0) {
3101                         fprintf(stderr,
3102                                 "Failed to link the inode %llu to %s dir: %s\n",
3103                                 rec->ino, dir_name, strerror(-ret));
3104                         goto out;
3105                 }
3106                 /*
3107                  * Just increase the found_link, don't actually add the
3108                  * backref. This will make things easier and this inode
3109                  * record will be freed after the repair is done.
3110                  * So fsck will not report problem about this inode.
3111                  */
3112                 rec->found_link++;
3113                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3114                        namelen, namebuf, dir_name);
3115         }
3116         printf("Fixed the nlink of inode %llu\n", rec->ino);
3117 out:
3118         /*
3119          * Clear the flag anyway, or we will loop forever for the same inode
3120          * as it will not be removed from the bad inode list and the dead loop
3121          * happens.
3122          */
3123         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3124         btrfs_release_path(path);
3125         return ret;
3126 }
3127
3128 /*
3129  * Check if there is any normal(reg or prealloc) file extent for given
3130  * ino.
3131  * This is used to determine the file type when neither its dir_index/item or
3132  * inode_item exists.
3133  *
3134  * This will *NOT* report error, if any error happens, just consider it does
3135  * not have any normal file extent.
3136  */
3137 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3138 {
3139         struct btrfs_path path;
3140         struct btrfs_key key;
3141         struct btrfs_key found_key;
3142         struct btrfs_file_extent_item *fi;
3143         u8 type;
3144         int ret = 0;
3145
3146         btrfs_init_path(&path);
3147         key.objectid = ino;
3148         key.type = BTRFS_EXTENT_DATA_KEY;
3149         key.offset = 0;
3150
3151         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3152         if (ret < 0) {
3153                 ret = 0;
3154                 goto out;
3155         }
3156         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3157                 ret = btrfs_next_leaf(root, &path);
3158                 if (ret) {
3159                         ret = 0;
3160                         goto out;
3161                 }
3162         }
3163         while (1) {
3164                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3165                                       path.slots[0]);
3166                 if (found_key.objectid != ino ||
3167                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3168                         break;
3169                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3170                                     struct btrfs_file_extent_item);
3171                 type = btrfs_file_extent_type(path.nodes[0], fi);
3172                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3173                         ret = 1;
3174                         goto out;
3175                 }
3176         }
3177 out:
3178         btrfs_release_path(&path);
3179         return ret;
3180 }
3181
3182 static u32 btrfs_type_to_imode(u8 type)
3183 {
3184         static u32 imode_by_btrfs_type[] = {
3185                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3186                 [BTRFS_FT_DIR]          = S_IFDIR,
3187                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3188                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3189                 [BTRFS_FT_FIFO]         = S_IFIFO,
3190                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3191                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3192         };
3193
3194         return imode_by_btrfs_type[(type)];
3195 }
3196
3197 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3198                                 struct btrfs_root *root,
3199                                 struct btrfs_path *path,
3200                                 struct inode_record *rec)
3201 {
3202         u8 filetype;
3203         u32 mode = 0700;
3204         int type_recovered = 0;
3205         int ret = 0;
3206
3207         printf("Trying to rebuild inode:%llu\n", rec->ino);
3208
3209         type_recovered = !find_file_type(rec, &filetype);
3210
3211         /*
3212          * Try to determine inode type if type not found.
3213          *
3214          * For found regular file extent, it must be FILE.
3215          * For found dir_item/index, it must be DIR.
3216          *
3217          * For undetermined one, use FILE as fallback.
3218          *
3219          * TODO:
3220          * 1. If found backref(inode_index/item is already handled) to it,
3221          *    it must be DIR.
3222          *    Need new inode-inode ref structure to allow search for that.
3223          */
3224         if (!type_recovered) {
3225                 if (rec->found_file_extent &&
3226                     find_normal_file_extent(root, rec->ino)) {
3227                         type_recovered = 1;
3228                         filetype = BTRFS_FT_REG_FILE;
3229                 } else if (rec->found_dir_item) {
3230                         type_recovered = 1;
3231                         filetype = BTRFS_FT_DIR;
3232                 } else if (!list_empty(&rec->orphan_extents)) {
3233                         type_recovered = 1;
3234                         filetype = BTRFS_FT_REG_FILE;
3235                 } else{
3236                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3237                                rec->ino);
3238                         type_recovered = 1;
3239                         filetype = BTRFS_FT_REG_FILE;
3240                 }
3241         }
3242
3243         ret = btrfs_new_inode(trans, root, rec->ino,
3244                               mode | btrfs_type_to_imode(filetype));
3245         if (ret < 0)
3246                 goto out;
3247
3248         /*
3249          * Here inode rebuild is done, we only rebuild the inode item,
3250          * don't repair the nlink(like move to lost+found).
3251          * That is the job of nlink repair.
3252          *
3253          * We just fill the record and return
3254          */
3255         rec->found_dir_item = 1;
3256         rec->imode = mode | btrfs_type_to_imode(filetype);
3257         rec->nlink = 0;
3258         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3259         /* Ensure the inode_nlinks repair function will be called */
3260         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3261 out:
3262         return ret;
3263 }
3264
3265 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3266                                       struct btrfs_root *root,
3267                                       struct btrfs_path *path,
3268                                       struct inode_record *rec)
3269 {
3270         struct orphan_data_extent *orphan;
3271         struct orphan_data_extent *tmp;
3272         int ret = 0;
3273
3274         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3275                 /*
3276                  * Check for conflicting file extents
3277                  *
3278                  * Here we don't know whether the extents is compressed or not,
3279                  * so we can only assume it not compressed nor data offset,
3280                  * and use its disk_len as extent length.
3281                  */
3282                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3283                                        orphan->offset, orphan->disk_len, 0);
3284                 btrfs_release_path(path);
3285                 if (ret < 0)
3286                         goto out;
3287                 if (!ret) {
3288                         fprintf(stderr,
3289                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3290                                 orphan->disk_bytenr, orphan->disk_len);
3291                         ret = btrfs_free_extent(trans,
3292                                         root->fs_info->extent_root,
3293                                         orphan->disk_bytenr, orphan->disk_len,
3294                                         0, root->objectid, orphan->objectid,
3295                                         orphan->offset);
3296                         if (ret < 0)
3297                                 goto out;
3298                 }
3299                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3300                                 orphan->offset, orphan->disk_bytenr,
3301                                 orphan->disk_len, orphan->disk_len);
3302                 if (ret < 0)
3303                         goto out;
3304
3305                 /* Update file size info */
3306                 rec->found_size += orphan->disk_len;
3307                 if (rec->found_size == rec->nbytes)
3308                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3309
3310                 /* Update the file extent hole info too */
3311                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3312                                            orphan->disk_len);
3313                 if (ret < 0)
3314                         goto out;
3315                 if (RB_EMPTY_ROOT(&rec->holes))
3316                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3317
3318                 list_del(&orphan->list);
3319                 free(orphan);
3320         }
3321         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3322 out:
3323         return ret;
3324 }
3325
3326 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3327                                         struct btrfs_root *root,
3328                                         struct btrfs_path *path,
3329                                         struct inode_record *rec)
3330 {
3331         struct rb_node *node;
3332         struct file_extent_hole *hole;
3333         int found = 0;
3334         int ret = 0;
3335
3336         node = rb_first(&rec->holes);
3337
3338         while (node) {
3339                 found = 1;
3340                 hole = rb_entry(node, struct file_extent_hole, node);
3341                 ret = btrfs_punch_hole(trans, root, rec->ino,
3342                                        hole->start, hole->len);
3343                 if (ret < 0)
3344                         goto out;
3345                 ret = del_file_extent_hole(&rec->holes, hole->start,
3346                                            hole->len);
3347                 if (ret < 0)
3348                         goto out;
3349                 if (RB_EMPTY_ROOT(&rec->holes))
3350                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3351                 node = rb_first(&rec->holes);
3352         }
3353         /* special case for a file losing all its file extent */
3354         if (!found) {
3355                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3356                                        round_up(rec->isize,
3357                                                 root->fs_info->sectorsize));
3358                 if (ret < 0)
3359                         goto out;
3360         }
3361         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3362                rec->ino, root->objectid);
3363 out:
3364         return ret;
3365 }
3366
3367 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3368 {
3369         struct btrfs_trans_handle *trans;
3370         struct btrfs_path path;
3371         int ret = 0;
3372
3373         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3374                              I_ERR_NO_ORPHAN_ITEM |
3375                              I_ERR_LINK_COUNT_WRONG |
3376                              I_ERR_NO_INODE_ITEM |
3377                              I_ERR_FILE_EXTENT_ORPHAN |
3378                              I_ERR_FILE_EXTENT_DISCOUNT|
3379                              I_ERR_FILE_NBYTES_WRONG)))
3380                 return rec->errors;
3381
3382         /*
3383          * For nlink repair, it may create a dir and add link, so
3384          * 2 for parent(256)'s dir_index and dir_item
3385          * 2 for lost+found dir's inode_item and inode_ref
3386          * 1 for the new inode_ref of the file
3387          * 2 for lost+found dir's dir_index and dir_item for the file
3388          */
3389         trans = btrfs_start_transaction(root, 7);
3390         if (IS_ERR(trans))
3391                 return PTR_ERR(trans);
3392
3393         btrfs_init_path(&path);
3394         if (rec->errors & I_ERR_NO_INODE_ITEM)
3395                 ret = repair_inode_no_item(trans, root, &path, rec);
3396         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3397                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3398         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3399                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3400         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3401                 ret = repair_inode_isize(trans, root, &path, rec);
3402         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3403                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3404         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3405                 ret = repair_inode_nlinks(trans, root, &path, rec);
3406         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3407                 ret = repair_inode_nbytes(trans, root, &path, rec);
3408         btrfs_commit_transaction(trans, root);
3409         btrfs_release_path(&path);
3410         return ret;
3411 }
3412
3413 static int check_inode_recs(struct btrfs_root *root,
3414                             struct cache_tree *inode_cache)
3415 {
3416         struct cache_extent *cache;
3417         struct ptr_node *node;
3418         struct inode_record *rec;
3419         struct inode_backref *backref;
3420         int stage = 0;
3421         int ret = 0;
3422         int err = 0;
3423         u64 error = 0;
3424         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3425
3426         if (btrfs_root_refs(&root->root_item) == 0) {
3427                 if (!cache_tree_empty(inode_cache))
3428                         fprintf(stderr, "warning line %d\n", __LINE__);
3429                 return 0;
3430         }
3431
3432         /*
3433          * We need to repair backrefs first because we could change some of the
3434          * errors in the inode recs.
3435          *
3436          * We also need to go through and delete invalid backrefs first and then
3437          * add the correct ones second.  We do this because we may get EEXIST
3438          * when adding back the correct index because we hadn't yet deleted the
3439          * invalid index.
3440          *
3441          * For example, if we were missing a dir index then the directories
3442          * isize would be wrong, so if we fixed the isize to what we thought it
3443          * would be and then fixed the backref we'd still have a invalid fs, so
3444          * we need to add back the dir index and then check to see if the isize
3445          * is still wrong.
3446          */
3447         while (stage < 3) {
3448                 stage++;
3449                 if (stage == 3 && !err)
3450                         break;
3451
3452                 cache = search_cache_extent(inode_cache, 0);
3453                 while (repair && cache) {
3454                         node = container_of(cache, struct ptr_node, cache);
3455                         rec = node->data;
3456                         cache = next_cache_extent(cache);
3457
3458                         /* Need to free everything up and rescan */
3459                         if (stage == 3) {
3460                                 remove_cache_extent(inode_cache, &node->cache);
3461                                 free(node);
3462                                 free_inode_rec(rec);
3463                                 continue;
3464                         }
3465
3466                         if (list_empty(&rec->backrefs))
3467                                 continue;
3468
3469                         ret = repair_inode_backrefs(root, rec, inode_cache,
3470                                                     stage == 1);
3471                         if (ret < 0) {
3472                                 err = ret;
3473                                 stage = 2;
3474                                 break;
3475                         } if (ret > 0) {
3476                                 err = -EAGAIN;
3477                         }
3478                 }
3479         }
3480         if (err)
3481                 return err;
3482
3483         rec = get_inode_rec(inode_cache, root_dirid, 0);
3484         BUG_ON(IS_ERR(rec));
3485         if (rec) {
3486                 ret = check_root_dir(rec);
3487                 if (ret) {
3488                         fprintf(stderr, "root %llu root dir %llu error\n",
3489                                 (unsigned long long)root->root_key.objectid,
3490                                 (unsigned long long)root_dirid);
3491                         print_inode_error(root, rec);
3492                         error++;
3493                 }
3494         } else {
3495                 if (repair) {
3496                         struct btrfs_trans_handle *trans;
3497
3498                         trans = btrfs_start_transaction(root, 1);
3499                         if (IS_ERR(trans)) {
3500                                 err = PTR_ERR(trans);
3501                                 return err;
3502                         }
3503
3504                         fprintf(stderr,
3505                                 "root %llu missing its root dir, recreating\n",
3506                                 (unsigned long long)root->objectid);
3507
3508                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3509                         BUG_ON(ret);
3510
3511                         btrfs_commit_transaction(trans, root);
3512                         return -EAGAIN;
3513                 }
3514
3515                 fprintf(stderr, "root %llu root dir %llu not found\n",
3516                         (unsigned long long)root->root_key.objectid,
3517                         (unsigned long long)root_dirid);
3518         }
3519
3520         while (1) {
3521                 cache = search_cache_extent(inode_cache, 0);
3522                 if (!cache)
3523                         break;
3524                 node = container_of(cache, struct ptr_node, cache);
3525                 rec = node->data;
3526                 remove_cache_extent(inode_cache, &node->cache);
3527                 free(node);
3528                 if (rec->ino == root_dirid ||
3529                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3530                         free_inode_rec(rec);
3531                         continue;
3532                 }
3533
3534                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3535                         ret = check_orphan_item(root, rec->ino);
3536                         if (ret == 0)
3537                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3538                         if (can_free_inode_rec(rec)) {
3539                                 free_inode_rec(rec);
3540                                 continue;
3541                         }
3542                 }
3543
3544                 if (!rec->found_inode_item)
3545                         rec->errors |= I_ERR_NO_INODE_ITEM;
3546                 if (rec->found_link != rec->nlink)
3547                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3548                 if (repair) {
3549                         ret = try_repair_inode(root, rec);
3550                         if (ret == 0 && can_free_inode_rec(rec)) {
3551                                 free_inode_rec(rec);
3552                                 continue;
3553                         }
3554                         ret = 0;
3555                 }
3556
3557                 if (!(repair && ret == 0))
3558                         error++;
3559                 print_inode_error(root, rec);
3560                 list_for_each_entry(backref, &rec->backrefs, list) {
3561                         if (!backref->found_dir_item)
3562                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3563                         if (!backref->found_dir_index)
3564                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3565                         if (!backref->found_inode_ref)
3566                                 backref->errors |= REF_ERR_NO_INODE_REF;
3567                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3568                                 " namelen %u name %s filetype %d errors %x",
3569                                 (unsigned long long)backref->dir,
3570                                 (unsigned long long)backref->index,
3571                                 backref->namelen, backref->name,
3572                                 backref->filetype, backref->errors);
3573                         print_ref_error(backref->errors);
3574                 }
3575                 free_inode_rec(rec);
3576         }
3577         return (error > 0) ? -1 : 0;
3578 }
3579
3580 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3581                                         u64 objectid)
3582 {
3583         struct cache_extent *cache;
3584         struct root_record *rec = NULL;
3585         int ret;
3586
3587         cache = lookup_cache_extent(root_cache, objectid, 1);
3588         if (cache) {
3589                 rec = container_of(cache, struct root_record, cache);
3590         } else {
3591                 rec = calloc(1, sizeof(*rec));
3592                 if (!rec)
3593                         return ERR_PTR(-ENOMEM);
3594                 rec->objectid = objectid;
3595                 INIT_LIST_HEAD(&rec->backrefs);
3596                 rec->cache.start = objectid;
3597                 rec->cache.size = 1;
3598
3599                 ret = insert_cache_extent(root_cache, &rec->cache);
3600                 if (ret)
3601                         return ERR_PTR(-EEXIST);
3602         }
3603         return rec;
3604 }
3605
3606 static struct root_backref *get_root_backref(struct root_record *rec,
3607                                              u64 ref_root, u64 dir, u64 index,
3608                                              const char *name, int namelen)
3609 {
3610         struct root_backref *backref;
3611
3612         list_for_each_entry(backref, &rec->backrefs, list) {
3613                 if (backref->ref_root != ref_root || backref->dir != dir ||
3614                     backref->namelen != namelen)
3615                         continue;
3616                 if (memcmp(name, backref->name, namelen))
3617                         continue;
3618                 return backref;
3619         }
3620
3621         backref = calloc(1, sizeof(*backref) + namelen + 1);
3622         if (!backref)
3623                 return NULL;
3624         backref->ref_root = ref_root;
3625         backref->dir = dir;
3626         backref->index = index;
3627         backref->namelen = namelen;
3628         memcpy(backref->name, name, namelen);
3629         backref->name[namelen] = '\0';
3630         list_add_tail(&backref->list, &rec->backrefs);
3631         return backref;
3632 }
3633
3634 static void free_root_record(struct cache_extent *cache)
3635 {
3636         struct root_record *rec;
3637         struct root_backref *backref;
3638
3639         rec = container_of(cache, struct root_record, cache);
3640         while (!list_empty(&rec->backrefs)) {
3641                 backref = to_root_backref(rec->backrefs.next);
3642                 list_del(&backref->list);
3643                 free(backref);
3644         }
3645
3646         free(rec);
3647 }
3648
3649 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3650
3651 static int add_root_backref(struct cache_tree *root_cache,
3652                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3653                             const char *name, int namelen,
3654                             int item_type, int errors)
3655 {
3656         struct root_record *rec;
3657         struct root_backref *backref;
3658
3659         rec = get_root_rec(root_cache, root_id);
3660         BUG_ON(IS_ERR(rec));
3661         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3662         BUG_ON(!backref);
3663
3664         backref->errors |= errors;
3665
3666         if (item_type != BTRFS_DIR_ITEM_KEY) {
3667                 if (backref->found_dir_index || backref->found_back_ref ||
3668                     backref->found_forward_ref) {
3669                         if (backref->index != index)
3670                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3671                 } else {
3672                         backref->index = index;
3673                 }
3674         }
3675
3676         if (item_type == BTRFS_DIR_ITEM_KEY) {
3677                 if (backref->found_forward_ref)
3678                         rec->found_ref++;
3679                 backref->found_dir_item = 1;
3680         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3681                 backref->found_dir_index = 1;
3682         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3683                 if (backref->found_forward_ref)
3684                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3685                 else if (backref->found_dir_item)
3686                         rec->found_ref++;
3687                 backref->found_forward_ref = 1;
3688         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3689                 if (backref->found_back_ref)
3690                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3691                 backref->found_back_ref = 1;
3692         } else {
3693                 BUG_ON(1);
3694         }
3695
3696         if (backref->found_forward_ref && backref->found_dir_item)
3697                 backref->reachable = 1;
3698         return 0;
3699 }
3700
3701 static int merge_root_recs(struct btrfs_root *root,
3702                            struct cache_tree *src_cache,
3703                            struct cache_tree *dst_cache)
3704 {
3705         struct cache_extent *cache;
3706         struct ptr_node *node;
3707         struct inode_record *rec;
3708         struct inode_backref *backref;
3709         int ret = 0;
3710
3711         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3712                 free_inode_recs_tree(src_cache);
3713                 return 0;
3714         }
3715
3716         while (1) {
3717                 cache = search_cache_extent(src_cache, 0);
3718                 if (!cache)
3719                         break;
3720                 node = container_of(cache, struct ptr_node, cache);
3721                 rec = node->data;
3722                 remove_cache_extent(src_cache, &node->cache);
3723                 free(node);
3724
3725                 ret = is_child_root(root, root->objectid, rec->ino);
3726                 if (ret < 0)
3727                         break;
3728                 else if (ret == 0)
3729                         goto skip;
3730
3731                 list_for_each_entry(backref, &rec->backrefs, list) {
3732                         BUG_ON(backref->found_inode_ref);
3733                         if (backref->found_dir_item)
3734                                 add_root_backref(dst_cache, rec->ino,
3735                                         root->root_key.objectid, backref->dir,
3736                                         backref->index, backref->name,
3737                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3738                                         backref->errors);
3739                         if (backref->found_dir_index)
3740                                 add_root_backref(dst_cache, rec->ino,
3741                                         root->root_key.objectid, backref->dir,
3742                                         backref->index, backref->name,
3743                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3744                                         backref->errors);
3745                 }
3746 skip:
3747                 free_inode_rec(rec);
3748         }
3749         if (ret < 0)
3750                 return ret;
3751         return 0;
3752 }
3753
3754 static int check_root_refs(struct btrfs_root *root,
3755                            struct cache_tree *root_cache)
3756 {
3757         struct root_record *rec;
3758         struct root_record *ref_root;
3759         struct root_backref *backref;
3760         struct cache_extent *cache;
3761         int loop = 1;
3762         int ret;
3763         int error;
3764         int errors = 0;
3765
3766         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3767         BUG_ON(IS_ERR(rec));
3768         rec->found_ref = 1;
3769
3770         /* fixme: this can not detect circular references */
3771         while (loop) {
3772                 loop = 0;
3773                 cache = search_cache_extent(root_cache, 0);
3774                 while (1) {
3775                         if (!cache)
3776                                 break;
3777                         rec = container_of(cache, struct root_record, cache);
3778                         cache = next_cache_extent(cache);
3779
3780                         if (rec->found_ref == 0)
3781                                 continue;
3782
3783                         list_for_each_entry(backref, &rec->backrefs, list) {
3784                                 if (!backref->reachable)
3785                                         continue;
3786
3787                                 ref_root = get_root_rec(root_cache,
3788                                                         backref->ref_root);
3789                                 BUG_ON(IS_ERR(ref_root));
3790                                 if (ref_root->found_ref > 0)
3791                                         continue;
3792
3793                                 backref->reachable = 0;
3794                                 rec->found_ref--;
3795                                 if (rec->found_ref == 0)
3796                                         loop = 1;
3797                         }
3798                 }
3799         }
3800
3801         cache = search_cache_extent(root_cache, 0);
3802         while (1) {
3803                 if (!cache)
3804                         break;
3805                 rec = container_of(cache, struct root_record, cache);
3806                 cache = next_cache_extent(cache);
3807
3808                 if (rec->found_ref == 0 &&
3809                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3810                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3811                         ret = check_orphan_item(root->fs_info->tree_root,
3812                                                 rec->objectid);
3813                         if (ret == 0)
3814                                 continue;
3815
3816                         /*
3817                          * If we don't have a root item then we likely just have
3818                          * a dir item in a snapshot for this root but no actual
3819                          * ref key or anything so it's meaningless.
3820                          */
3821                         if (!rec->found_root_item)
3822                                 continue;
3823                         errors++;
3824                         fprintf(stderr, "fs tree %llu not referenced\n",
3825                                 (unsigned long long)rec->objectid);
3826                 }
3827
3828                 error = 0;
3829                 if (rec->found_ref > 0 && !rec->found_root_item)
3830                         error = 1;
3831                 list_for_each_entry(backref, &rec->backrefs, list) {
3832                         if (!backref->found_dir_item)
3833                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3834                         if (!backref->found_dir_index)
3835                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3836                         if (!backref->found_back_ref)
3837                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3838                         if (!backref->found_forward_ref)
3839                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3840                         if (backref->reachable && backref->errors)
3841                                 error = 1;
3842                 }
3843                 if (!error)
3844                         continue;
3845
3846                 errors++;
3847                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3848                         (unsigned long long)rec->objectid, rec->found_ref,
3849                          rec->found_root_item ? "" : "not found");
3850
3851                 list_for_each_entry(backref, &rec->backrefs, list) {
3852                         if (!backref->reachable)
3853                                 continue;
3854                         if (!backref->errors && rec->found_root_item)
3855                                 continue;
3856                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3857                                 " index %llu namelen %u name %s errors %x\n",
3858                                 (unsigned long long)backref->ref_root,
3859                                 (unsigned long long)backref->dir,
3860                                 (unsigned long long)backref->index,
3861                                 backref->namelen, backref->name,
3862                                 backref->errors);
3863                         print_ref_error(backref->errors);
3864                 }
3865         }
3866         return errors > 0 ? 1 : 0;
3867 }
3868
3869 static int process_root_ref(struct extent_buffer *eb, int slot,
3870                             struct btrfs_key *key,
3871                             struct cache_tree *root_cache)
3872 {
3873         u64 dirid;
3874         u64 index;
3875         u32 len;
3876         u32 name_len;
3877         struct btrfs_root_ref *ref;
3878         char namebuf[BTRFS_NAME_LEN];
3879         int error;
3880
3881         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3882
3883         dirid = btrfs_root_ref_dirid(eb, ref);
3884         index = btrfs_root_ref_sequence(eb, ref);
3885         name_len = btrfs_root_ref_name_len(eb, ref);
3886
3887         if (name_len <= BTRFS_NAME_LEN) {
3888                 len = name_len;
3889                 error = 0;
3890         } else {
3891                 len = BTRFS_NAME_LEN;
3892                 error = REF_ERR_NAME_TOO_LONG;
3893         }
3894         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3895
3896         if (key->type == BTRFS_ROOT_REF_KEY) {
3897                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3898                                  index, namebuf, len, key->type, error);
3899         } else {
3900                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3901                                  index, namebuf, len, key->type, error);
3902         }
3903         return 0;
3904 }
3905
3906 static void free_corrupt_block(struct cache_extent *cache)
3907 {
3908         struct btrfs_corrupt_block *corrupt;
3909
3910         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3911         free(corrupt);
3912 }
3913
3914 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3915
3916 /*
3917  * Repair the btree of the given root.
3918  *
3919  * The fix is to remove the node key in corrupt_blocks cache_tree.
3920  * and rebalance the tree.
3921  * After the fix, the btree should be writeable.
3922  */
3923 static int repair_btree(struct btrfs_root *root,
3924                         struct cache_tree *corrupt_blocks)
3925 {
3926         struct btrfs_trans_handle *trans;
3927         struct btrfs_path path;
3928         struct btrfs_corrupt_block *corrupt;
3929         struct cache_extent *cache;
3930         struct btrfs_key key;
3931         u64 offset;
3932         int level;
3933         int ret = 0;
3934
3935         if (cache_tree_empty(corrupt_blocks))
3936                 return 0;
3937
3938         trans = btrfs_start_transaction(root, 1);
3939         if (IS_ERR(trans)) {
3940                 ret = PTR_ERR(trans);
3941                 fprintf(stderr, "Error starting transaction: %s\n",
3942                         strerror(-ret));
3943                 return ret;
3944         }
3945         btrfs_init_path(&path);
3946         cache = first_cache_extent(corrupt_blocks);
3947         while (cache) {
3948                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3949                                        cache);
3950                 level = corrupt->level;
3951                 path.lowest_level = level;
3952                 key.objectid = corrupt->key.objectid;
3953                 key.type = corrupt->key.type;
3954                 key.offset = corrupt->key.offset;
3955
3956                 /*
3957                  * Here we don't want to do any tree balance, since it may
3958                  * cause a balance with corrupted brother leaf/node,
3959                  * so ins_len set to 0 here.
3960                  * Balance will be done after all corrupt node/leaf is deleted.
3961                  */
3962                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3963                 if (ret < 0)
3964                         goto out;
3965                 offset = btrfs_node_blockptr(path.nodes[level],
3966                                              path.slots[level]);
3967
3968                 /* Remove the ptr */
3969                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3970                 if (ret < 0)
3971                         goto out;
3972                 /*
3973                  * Remove the corresponding extent
3974                  * return value is not concerned.
3975                  */
3976                 btrfs_release_path(&path);
3977                 ret = btrfs_free_extent(trans, root, offset,
3978                                 root->fs_info->nodesize, 0,
3979                                 root->root_key.objectid, level - 1, 0);
3980                 cache = next_cache_extent(cache);
3981         }
3982
3983         /* Balance the btree using btrfs_search_slot() */
3984         cache = first_cache_extent(corrupt_blocks);
3985         while (cache) {
3986                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3987                                        cache);
3988                 memcpy(&key, &corrupt->key, sizeof(key));
3989                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3990                 if (ret < 0)
3991                         goto out;
3992                 /* return will always >0 since it won't find the item */
3993                 ret = 0;
3994                 btrfs_release_path(&path);
3995                 cache = next_cache_extent(cache);
3996         }
3997 out:
3998         btrfs_commit_transaction(trans, root);
3999         btrfs_release_path(&path);
4000         return ret;
4001 }
4002
4003 static int check_fs_root(struct btrfs_root *root,
4004                          struct cache_tree *root_cache,
4005                          struct walk_control *wc)
4006 {
4007         int ret = 0;
4008         int err = 0;
4009         int wret;
4010         int level;
4011         struct btrfs_path path;
4012         struct shared_node root_node;
4013         struct root_record *rec;
4014         struct btrfs_root_item *root_item = &root->root_item;
4015         struct cache_tree corrupt_blocks;
4016         struct orphan_data_extent *orphan;
4017         struct orphan_data_extent *tmp;
4018         enum btrfs_tree_block_status status;
4019         struct node_refs nrefs;
4020
4021         /*
4022          * Reuse the corrupt_block cache tree to record corrupted tree block
4023          *
4024          * Unlike the usage in extent tree check, here we do it in a per
4025          * fs/subvol tree base.
4026          */
4027         cache_tree_init(&corrupt_blocks);
4028         root->fs_info->corrupt_blocks = &corrupt_blocks;
4029
4030         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4031                 rec = get_root_rec(root_cache, root->root_key.objectid);
4032                 BUG_ON(IS_ERR(rec));
4033                 if (btrfs_root_refs(root_item) > 0)
4034                         rec->found_root_item = 1;
4035         }
4036
4037         btrfs_init_path(&path);
4038         memset(&root_node, 0, sizeof(root_node));
4039         cache_tree_init(&root_node.root_cache);
4040         cache_tree_init(&root_node.inode_cache);
4041         memset(&nrefs, 0, sizeof(nrefs));
4042
4043         /* Move the orphan extent record to corresponding inode_record */
4044         list_for_each_entry_safe(orphan, tmp,
4045                                  &root->orphan_data_extents, list) {
4046                 struct inode_record *inode;
4047
4048                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4049                                       1);
4050                 BUG_ON(IS_ERR(inode));
4051                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4052                 list_move(&orphan->list, &inode->orphan_extents);
4053         }
4054
4055         level = btrfs_header_level(root->node);
4056         memset(wc->nodes, 0, sizeof(wc->nodes));
4057         wc->nodes[level] = &root_node;
4058         wc->active_node = level;
4059         wc->root_level = level;
4060
4061         /* We may not have checked the root block, lets do that now */
4062         if (btrfs_is_leaf(root->node))
4063                 status = btrfs_check_leaf(root, NULL, root->node);
4064         else
4065                 status = btrfs_check_node(root, NULL, root->node);
4066         if (status != BTRFS_TREE_BLOCK_CLEAN)
4067                 return -EIO;
4068
4069         if (btrfs_root_refs(root_item) > 0 ||
4070             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4071                 path.nodes[level] = root->node;
4072                 extent_buffer_get(root->node);
4073                 path.slots[level] = 0;
4074         } else {
4075                 struct btrfs_key key;
4076                 struct btrfs_disk_key found_key;
4077
4078                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4079                 level = root_item->drop_level;
4080                 path.lowest_level = level;
4081                 if (level > btrfs_header_level(root->node) ||
4082                     level >= BTRFS_MAX_LEVEL) {
4083                         error("ignoring invalid drop level: %u", level);
4084                         goto skip_walking;
4085                 }
4086                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4087                 if (wret < 0)
4088                         goto skip_walking;
4089                 btrfs_node_key(path.nodes[level], &found_key,
4090                                 path.slots[level]);
4091                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4092                                         sizeof(found_key)));
4093         }
4094
4095         while (1) {
4096                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4097                 if (wret < 0)
4098                         ret = wret;
4099                 if (wret != 0)
4100                         break;
4101
4102                 wret = walk_up_tree(root, &path, wc, &level);
4103                 if (wret < 0)
4104                         ret = wret;
4105                 if (wret != 0)
4106                         break;
4107         }
4108 skip_walking:
4109         btrfs_release_path(&path);
4110
4111         if (!cache_tree_empty(&corrupt_blocks)) {
4112                 struct cache_extent *cache;
4113                 struct btrfs_corrupt_block *corrupt;
4114
4115                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4116                        root->root_key.objectid);
4117                 cache = first_cache_extent(&corrupt_blocks);
4118                 while (cache) {
4119                         corrupt = container_of(cache,
4120                                                struct btrfs_corrupt_block,
4121                                                cache);
4122                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4123                                cache->start, corrupt->level,
4124                                corrupt->key.objectid, corrupt->key.type,
4125                                corrupt->key.offset);
4126                         cache = next_cache_extent(cache);
4127                 }
4128                 if (repair) {
4129                         printf("Try to repair the btree for root %llu\n",
4130                                root->root_key.objectid);
4131                         ret = repair_btree(root, &corrupt_blocks);
4132                         if (ret < 0)
4133                                 fprintf(stderr, "Failed to repair btree: %s\n",
4134                                         strerror(-ret));
4135                         if (!ret)
4136                                 printf("Btree for root %llu is fixed\n",
4137                                        root->root_key.objectid);
4138                 }
4139         }
4140
4141         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4142         if (err < 0)
4143                 ret = err;
4144
4145         if (root_node.current) {
4146                 root_node.current->checked = 1;
4147                 maybe_free_inode_rec(&root_node.inode_cache,
4148                                 root_node.current);
4149         }
4150
4151         err = check_inode_recs(root, &root_node.inode_cache);
4152         if (!ret)
4153                 ret = err;
4154
4155         free_corrupt_blocks_tree(&corrupt_blocks);
4156         root->fs_info->corrupt_blocks = NULL;
4157         free_orphan_data_extents(&root->orphan_data_extents);
4158         return ret;
4159 }
4160
4161 static int fs_root_objectid(u64 objectid)
4162 {
4163         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4164             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4165                 return 1;
4166         return is_fstree(objectid);
4167 }
4168
4169 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4170                           struct cache_tree *root_cache)
4171 {
4172         struct btrfs_path path;
4173         struct btrfs_key key;
4174         struct walk_control wc;
4175         struct extent_buffer *leaf, *tree_node;
4176         struct btrfs_root *tmp_root;
4177         struct btrfs_root *tree_root = fs_info->tree_root;
4178         int ret;
4179         int err = 0;
4180
4181         if (ctx.progress_enabled) {
4182                 ctx.tp = TASK_FS_ROOTS;
4183                 task_start(ctx.info);
4184         }
4185
4186         /*
4187          * Just in case we made any changes to the extent tree that weren't
4188          * reflected into the free space cache yet.
4189          */
4190         if (repair)
4191                 reset_cached_block_groups(fs_info);
4192         memset(&wc, 0, sizeof(wc));
4193         cache_tree_init(&wc.shared);
4194         btrfs_init_path(&path);
4195
4196 again:
4197         key.offset = 0;
4198         key.objectid = 0;
4199         key.type = BTRFS_ROOT_ITEM_KEY;
4200         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4201         if (ret < 0) {
4202                 err = 1;
4203                 goto out;
4204         }
4205         tree_node = tree_root->node;
4206         while (1) {
4207                 if (tree_node != tree_root->node) {
4208                         free_root_recs_tree(root_cache);
4209                         btrfs_release_path(&path);
4210                         goto again;
4211                 }
4212                 leaf = path.nodes[0];
4213                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4214                         ret = btrfs_next_leaf(tree_root, &path);
4215                         if (ret) {
4216                                 if (ret < 0)
4217                                         err = 1;
4218                                 break;
4219                         }
4220                         leaf = path.nodes[0];
4221                 }
4222                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4223                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4224                     fs_root_objectid(key.objectid)) {
4225                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4226                                 tmp_root = btrfs_read_fs_root_no_cache(
4227                                                 fs_info, &key);
4228                         } else {
4229                                 key.offset = (u64)-1;
4230                                 tmp_root = btrfs_read_fs_root(
4231                                                 fs_info, &key);
4232                         }
4233                         if (IS_ERR(tmp_root)) {
4234                                 err = 1;
4235                                 goto next;
4236                         }
4237                         ret = check_fs_root(tmp_root, root_cache, &wc);
4238                         if (ret == -EAGAIN) {
4239                                 free_root_recs_tree(root_cache);
4240                                 btrfs_release_path(&path);
4241                                 goto again;
4242                         }
4243                         if (ret)
4244                                 err = 1;
4245                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4246                                 btrfs_free_fs_root(tmp_root);
4247                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4248                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4249                         process_root_ref(leaf, path.slots[0], &key,
4250                                          root_cache);
4251                 }
4252 next:
4253                 path.slots[0]++;
4254         }
4255 out:
4256         btrfs_release_path(&path);
4257         if (err)
4258                 free_extent_cache_tree(&wc.shared);
4259         if (!cache_tree_empty(&wc.shared))
4260                 fprintf(stderr, "warning line %d\n", __LINE__);
4261
4262         task_stop(ctx.info);
4263
4264         return err;
4265 }
4266
4267 /*
4268  * Find the @index according by @ino and name.
4269  * Notice:time efficiency is O(N)
4270  *
4271  * @root:       the root of the fs/file tree
4272  * @index_ret:  the index as return value
4273  * @namebuf:    the name to match
4274  * @name_len:   the length of name to match
4275  * @file_type:  the file_type of INODE_ITEM to match
4276  *
4277  * Returns 0 if found and *@index_ret will be modified with right value
4278  * Returns< 0 not found and *@index_ret will be (u64)-1
4279  */
4280 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4281                           u64 *index_ret, char *namebuf, u32 name_len,
4282                           u8 file_type)
4283 {
4284         struct btrfs_path path;
4285         struct extent_buffer *node;
4286         struct btrfs_dir_item *di;
4287         struct btrfs_key key;
4288         struct btrfs_key location;
4289         char name[BTRFS_NAME_LEN] = {0};
4290
4291         u32 total;
4292         u32 cur = 0;
4293         u32 len;
4294         u32 data_len;
4295         u8 filetype;
4296         int slot;
4297         int ret;
4298
4299         ASSERT(index_ret);
4300
4301         /* search from the last index */
4302         key.objectid = dirid;
4303         key.offset = (u64)-1;
4304         key.type = BTRFS_DIR_INDEX_KEY;
4305
4306         btrfs_init_path(&path);
4307         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4308         if (ret < 0)
4309                 return ret;
4310
4311 loop:
4312         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4313         if (ret) {
4314                 ret = -ENOENT;
4315                 *index_ret = (64)-1;
4316                 goto out;
4317         }
4318         /* Check whether inode_id/filetype/name match */
4319         node = path.nodes[0];
4320         slot = path.slots[0];
4321         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4322         total = btrfs_item_size_nr(node, slot);
4323         while (cur < total) {
4324                 ret = -ENOENT;
4325                 len = btrfs_dir_name_len(node, di);
4326                 data_len = btrfs_dir_data_len(node, di);
4327
4328                 btrfs_dir_item_key_to_cpu(node, di, &location);
4329                 if (location.objectid != location_id ||
4330                     location.type != BTRFS_INODE_ITEM_KEY ||
4331                     location.offset != 0)
4332                         goto next;
4333
4334                 filetype = btrfs_dir_type(node, di);
4335                 if (file_type != filetype)
4336                         goto next;
4337
4338                 if (len > BTRFS_NAME_LEN)
4339                         len = BTRFS_NAME_LEN;
4340
4341                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4342                 if (len != name_len || strncmp(namebuf, name, len))
4343                         goto next;
4344
4345                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4346                 *index_ret = key.offset;
4347                 ret = 0;
4348                 goto out;
4349 next:
4350                 len += sizeof(*di) + data_len;
4351                 di = (struct btrfs_dir_item *)((char *)di + len);
4352                 cur += len;
4353         }
4354         goto loop;
4355
4356 out:
4357         btrfs_release_path(&path);
4358         return ret;
4359 }
4360
4361 /*
4362  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4363  * INODE_REF/INODE_EXTREF match.
4364  *
4365  * @root:       the root of the fs/file tree
4366  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4367  *              value while find index
4368  * @location_key: location key of the struct btrfs_dir_item to match
4369  * @name:       the name to match
4370  * @namelen:    the length of name
4371  * @file_type:  the type of file to math
4372  *
4373  * Return 0 if no error occurred.
4374  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4375  * DIR_ITEM/DIR_INDEX
4376  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4377  * and DIR_ITEM/DIR_INDEX mismatch
4378  */
4379 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4380                          struct btrfs_key *location_key, char *name,
4381                          u32 namelen, u8 file_type)
4382 {
4383         struct btrfs_path path;
4384         struct extent_buffer *node;
4385         struct btrfs_dir_item *di;
4386         struct btrfs_key location;
4387         char namebuf[BTRFS_NAME_LEN] = {0};
4388         u32 total;
4389         u32 cur = 0;
4390         u32 len;
4391         u32 data_len;
4392         u8 filetype;
4393         int slot;
4394         int ret;
4395
4396         /* get the index by traversing all index */
4397         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4398                 ret = find_dir_index(root, key->objectid,
4399                                      location_key->objectid, &key->offset,
4400                                      name, namelen, file_type);
4401                 if (ret)
4402                         ret = DIR_INDEX_MISSING;
4403                 return ret;
4404         }
4405
4406         btrfs_init_path(&path);
4407         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4408         if (ret) {
4409                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4410                         DIR_INDEX_MISSING;
4411                 goto out;
4412         }
4413
4414         /* Check whether inode_id/filetype/name match */
4415         node = path.nodes[0];
4416         slot = path.slots[0];
4417         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4418         total = btrfs_item_size_nr(node, slot);
4419         while (cur < total) {
4420                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4421                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4422
4423                 len = btrfs_dir_name_len(node, di);
4424                 data_len = btrfs_dir_data_len(node, di);
4425
4426                 btrfs_dir_item_key_to_cpu(node, di, &location);
4427                 if (location.objectid != location_key->objectid ||
4428                     location.type != location_key->type ||
4429                     location.offset != location_key->offset)
4430                         goto next;
4431
4432                 filetype = btrfs_dir_type(node, di);
4433                 if (file_type != filetype)
4434                         goto next;
4435
4436                 if (len > BTRFS_NAME_LEN) {
4437                         len = BTRFS_NAME_LEN;
4438                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4439                         root->objectid,
4440                         key->type == BTRFS_DIR_ITEM_KEY ?
4441                         "DIR_ITEM" : "DIR_INDEX",
4442                         key->objectid, key->offset, len);
4443                 }
4444                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4445                                    len);
4446                 if (len != namelen || strncmp(namebuf, name, len))
4447                         goto next;
4448
4449                 ret = 0;
4450                 goto out;
4451 next:
4452                 len += sizeof(*di) + data_len;
4453                 di = (struct btrfs_dir_item *)((char *)di + len);
4454                 cur += len;
4455         }
4456
4457 out:
4458         btrfs_release_path(&path);
4459         return ret;
4460 }
4461
4462 /*
4463  * Prints inode ref error message
4464  */
4465 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4466                                 u64 index, const char *namebuf, int name_len,
4467                                 u8 filetype, int err)
4468 {
4469         if (!err)
4470                 return;
4471
4472         /* root dir error */
4473         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4474                 error(
4475         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4476                       root->objectid, key->objectid, key->offset, namebuf);
4477                 return;
4478         }
4479
4480         /* normal error */
4481         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4482                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4483                       root->objectid, key->offset,
4484                       btrfs_name_hash(namebuf, name_len),
4485                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4486                       namebuf, filetype);
4487         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4488                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4489                       root->objectid, key->offset, index,
4490                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4491                       namebuf, filetype);
4492 }
4493
4494 /*
4495  * Insert the missing inode item.
4496  *
4497  * Returns 0 means success.
4498  * Returns <0 means error.
4499  */
4500 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4501                                      u8 filetype)
4502 {
4503         struct btrfs_key key;
4504         struct btrfs_trans_handle *trans;
4505         struct btrfs_path path;
4506         int ret;
4507
4508         key.objectid = ino;
4509         key.type = BTRFS_INODE_ITEM_KEY;
4510         key.offset = 0;
4511
4512         btrfs_init_path(&path);
4513         trans = btrfs_start_transaction(root, 1);
4514         if (IS_ERR(trans)) {
4515                 ret = -EIO;
4516                 goto out;
4517         }
4518
4519         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4520         if (ret < 0 || !ret)
4521                 goto fail;
4522
4523         /* insert inode item */
4524         create_inode_item_lowmem(trans, root, ino, filetype);
4525         ret = 0;
4526 fail:
4527         btrfs_commit_transaction(trans, root);
4528 out:
4529         if (ret)
4530                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4531                       root->objectid, ino);
4532         btrfs_release_path(&path);
4533         return ret;
4534 }
4535
4536 /*
4537  * The ternary means dir item, dir index and relative inode ref.
4538  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4539  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4540  * strategy:
4541  * If two of three is missing or mismatched, delete the existing one.
4542  * If one of three is missing or mismatched, add the missing one.
4543  *
4544  * returns 0 means success.
4545  * returns not 0 means on error;
4546  */
4547 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4548                           u64 index, char *name, int name_len, u8 filetype,
4549                           int err)
4550 {
4551         struct btrfs_trans_handle *trans;
4552         int stage = 0;
4553         int ret = 0;
4554
4555         /*
4556          * stage shall be one of following valild values:
4557          *      0: Fine, nothing to do.
4558          *      1: One of three is wrong, so add missing one.
4559          *      2: Two of three is wrong, so delete existed one.
4560          */
4561         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4562                 stage++;
4563         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4564                 stage++;
4565         if (err & (INODE_REF_MISSING))
4566                 stage++;
4567
4568         /* stage must be smllarer than 3 */
4569         ASSERT(stage < 3);
4570
4571         trans = btrfs_start_transaction(root, 1);
4572         if (stage == 2) {
4573                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4574                                    name_len, 0);
4575                 goto out;
4576         }
4577         if (stage == 1) {
4578                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4579                                filetype, &index, 1, 1);
4580                 goto out;
4581         }
4582 out:
4583         btrfs_commit_transaction(trans, root);
4584
4585         if (ret)
4586                 error("fail to repair inode %llu name %s filetype %u",
4587                       ino, name, filetype);
4588         else
4589                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4590                        stage == 2 ? "Delete" : "Add",
4591                        ino, name, filetype);
4592
4593         return ret;
4594 }
4595
4596 /*
4597  * Traverse the given INODE_REF and call find_dir_item() to find related
4598  * DIR_ITEM/DIR_INDEX.
4599  *
4600  * @root:       the root of the fs/file tree
4601  * @ref_key:    the key of the INODE_REF
4602  * @path        the path provides node and slot
4603  * @refs:       the count of INODE_REF
4604  * @mode:       the st_mode of INODE_ITEM
4605  * @name_ret:   returns with the first ref's name
4606  * @name_len_ret:    len of the name_ret
4607  *
4608  * Return 0 if no error occurred.
4609  */
4610 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4611                            struct btrfs_path *path, char *name_ret,
4612                            u32 *namelen_ret, u64 *refs_ret, int mode)
4613 {
4614         struct btrfs_key key;
4615         struct btrfs_key location;
4616         struct btrfs_inode_ref *ref;
4617         struct extent_buffer *node;
4618         char namebuf[BTRFS_NAME_LEN] = {0};
4619         u32 total;
4620         u32 cur = 0;
4621         u32 len;
4622         u32 name_len;
4623         u64 index;
4624         int ret;
4625         int err = 0;
4626         int tmp_err;
4627         int slot;
4628         int need_research = 0;
4629         u64 refs;
4630
4631 begin:
4632         err = 0;
4633         cur = 0;
4634         refs = *refs_ret;
4635
4636         /* since after repair, path and the dir item may be changed */
4637         if (need_research) {
4638                 need_research = 0;
4639                 btrfs_release_path(path);
4640                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4641                 /* the item was deleted, let path point to the last checked item */
4642                 if (ret > 0) {
4643                         if (path->slots[0] == 0)
4644                                 btrfs_prev_leaf(root, path);
4645                         else
4646                                 path->slots[0]--;
4647                 }
4648                 if (ret)
4649                         goto out;
4650         }
4651
4652         location.objectid = ref_key->objectid;
4653         location.type = BTRFS_INODE_ITEM_KEY;
4654         location.offset = 0;
4655         node = path->nodes[0];
4656         slot = path->slots[0];
4657
4658         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4659         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4660         total = btrfs_item_size_nr(node, slot);
4661
4662 next:
4663         /* Update inode ref count */
4664         refs++;
4665         tmp_err = 0;
4666         index = btrfs_inode_ref_index(node, ref);
4667         name_len = btrfs_inode_ref_name_len(node, ref);
4668
4669         if (name_len <= BTRFS_NAME_LEN) {
4670                 len = name_len;
4671         } else {
4672                 len = BTRFS_NAME_LEN;
4673                 warning("root %llu INODE_REF[%llu %llu] name too long",
4674                         root->objectid, ref_key->objectid, ref_key->offset);
4675         }
4676
4677         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4678
4679         /* copy the first name found to name_ret */
4680         if (refs == 1 && name_ret) {
4681                 memcpy(name_ret, namebuf, len);
4682                 *namelen_ret = len;
4683         }
4684
4685         /* Check root dir ref */
4686         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4687                 if (index != 0 || len != strlen("..") ||
4688                     strncmp("..", namebuf, len) ||
4689                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4690                         /* set err bits then repair will delete the ref */
4691                         err |= DIR_INDEX_MISSING;
4692                         err |= DIR_ITEM_MISSING;
4693                 }
4694                 goto end;
4695         }
4696
4697         /* Find related DIR_INDEX */
4698         key.objectid = ref_key->offset;
4699         key.type = BTRFS_DIR_INDEX_KEY;
4700         key.offset = index;
4701         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4702                             imode_to_type(mode));
4703
4704         /* Find related dir_item */
4705         key.objectid = ref_key->offset;
4706         key.type = BTRFS_DIR_ITEM_KEY;
4707         key.offset = btrfs_name_hash(namebuf, len);
4708         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4709                             imode_to_type(mode));
4710 end:
4711         if (tmp_err && repair) {
4712                 ret = repair_ternary_lowmem(root, ref_key->offset,
4713                                             ref_key->objectid, index, namebuf,
4714                                             name_len, imode_to_type(mode),
4715                                             tmp_err);
4716                 if (!ret) {
4717                         need_research = 1;
4718                         goto begin;
4719                 }
4720         }
4721         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4722                             imode_to_type(mode), tmp_err);
4723         err |= tmp_err;
4724         len = sizeof(*ref) + name_len;
4725         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4726         cur += len;
4727         if (cur < total)
4728                 goto next;
4729
4730 out:
4731         *refs_ret = refs;
4732         return err;
4733 }
4734
4735 /*
4736  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4737  * DIR_ITEM/DIR_INDEX.
4738  *
4739  * @root:       the root of the fs/file tree
4740  * @ref_key:    the key of the INODE_EXTREF
4741  * @refs:       the count of INODE_EXTREF
4742  * @mode:       the st_mode of INODE_ITEM
4743  *
4744  * Return 0 if no error occurred.
4745  */
4746 static int check_inode_extref(struct btrfs_root *root,
4747                               struct btrfs_key *ref_key,
4748                               struct extent_buffer *node, int slot, u64 *refs,
4749                               int mode)
4750 {
4751         struct btrfs_key key;
4752         struct btrfs_key location;
4753         struct btrfs_inode_extref *extref;
4754         char namebuf[BTRFS_NAME_LEN] = {0};
4755         u32 total;
4756         u32 cur = 0;
4757         u32 len;
4758         u32 name_len;
4759         u64 index;
4760         u64 parent;
4761         int ret;
4762         int err = 0;
4763
4764         location.objectid = ref_key->objectid;
4765         location.type = BTRFS_INODE_ITEM_KEY;
4766         location.offset = 0;
4767
4768         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4769         total = btrfs_item_size_nr(node, slot);
4770
4771 next:
4772         /* update inode ref count */
4773         (*refs)++;
4774         name_len = btrfs_inode_extref_name_len(node, extref);
4775         index = btrfs_inode_extref_index(node, extref);
4776         parent = btrfs_inode_extref_parent(node, extref);
4777         if (name_len <= BTRFS_NAME_LEN) {
4778                 len = name_len;
4779         } else {
4780                 len = BTRFS_NAME_LEN;
4781                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4782                         root->objectid, ref_key->objectid, ref_key->offset);
4783         }
4784         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4785
4786         /* Check root dir ref name */
4787         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4788                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4789                       root->objectid, ref_key->objectid, ref_key->offset,
4790                       namebuf);
4791                 err |= ROOT_DIR_ERROR;
4792         }
4793
4794         /* find related dir_index */
4795         key.objectid = parent;
4796         key.type = BTRFS_DIR_INDEX_KEY;
4797         key.offset = index;
4798         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4799         err |= ret;
4800
4801         /* find related dir_item */
4802         key.objectid = parent;
4803         key.type = BTRFS_DIR_ITEM_KEY;
4804         key.offset = btrfs_name_hash(namebuf, len);
4805         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4806         err |= ret;
4807
4808         len = sizeof(*extref) + name_len;
4809         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4810         cur += len;
4811
4812         if (cur < total)
4813                 goto next;
4814
4815         return err;
4816 }
4817
4818 /*
4819  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4820  * DIR_ITEM/DIR_INDEX match.
4821  * Return with @index_ret.
4822  *
4823  * @root:       the root of the fs/file tree
4824  * @key:        the key of the INODE_REF/INODE_EXTREF
4825  * @name:       the name in the INODE_REF/INODE_EXTREF
4826  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4827  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
4828  *              value (64)-1 means do not check index
4829  * @ext_ref:    the EXTENDED_IREF feature
4830  *
4831  * Return 0 if no error occurred.
4832  * Return >0 for error bitmap
4833  */
4834 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4835                           char *name, int namelen, u64 *index_ret,
4836                           unsigned int ext_ref)
4837 {
4838         struct btrfs_path path;
4839         struct btrfs_inode_ref *ref;
4840         struct btrfs_inode_extref *extref;
4841         struct extent_buffer *node;
4842         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4843         u32 total;
4844         u32 cur = 0;
4845         u32 len;
4846         u32 ref_namelen;
4847         u64 ref_index;
4848         u64 parent;
4849         u64 dir_id;
4850         int slot;
4851         int ret;
4852
4853         ASSERT(index_ret);
4854
4855         btrfs_init_path(&path);
4856         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4857         if (ret) {
4858                 ret = INODE_REF_MISSING;
4859                 goto extref;
4860         }
4861
4862         node = path.nodes[0];
4863         slot = path.slots[0];
4864
4865         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4866         total = btrfs_item_size_nr(node, slot);
4867
4868         /* Iterate all entry of INODE_REF */
4869         while (cur < total) {
4870                 ret = INODE_REF_MISSING;
4871
4872                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4873                 ref_index = btrfs_inode_ref_index(node, ref);
4874                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4875                         goto next_ref;
4876
4877                 if (cur + sizeof(*ref) + ref_namelen > total ||
4878                     ref_namelen > BTRFS_NAME_LEN) {
4879                         warning("root %llu INODE %s[%llu %llu] name too long",
4880                                 root->objectid,
4881                                 key->type == BTRFS_INODE_REF_KEY ?
4882                                         "REF" : "EXTREF",
4883                                 key->objectid, key->offset);
4884
4885                         if (cur + sizeof(*ref) > total)
4886                                 break;
4887                         len = min_t(u32, total - cur - sizeof(*ref),
4888                                     BTRFS_NAME_LEN);
4889                 } else {
4890                         len = ref_namelen;
4891                 }
4892
4893                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4894                                    len);
4895
4896                 if (len != namelen || strncmp(ref_namebuf, name, len))
4897                         goto next_ref;
4898
4899                 *index_ret = ref_index;
4900                 ret = 0;
4901                 goto out;
4902 next_ref:
4903                 len = sizeof(*ref) + ref_namelen;
4904                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4905                 cur += len;
4906         }
4907
4908 extref:
4909         /* Skip if not support EXTENDED_IREF feature */
4910         if (!ext_ref)
4911                 goto out;
4912
4913         btrfs_release_path(&path);
4914         btrfs_init_path(&path);
4915
4916         dir_id = key->offset;
4917         key->type = BTRFS_INODE_EXTREF_KEY;
4918         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4919
4920         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4921         if (ret) {
4922                 ret = INODE_REF_MISSING;
4923                 goto out;
4924         }
4925
4926         node = path.nodes[0];
4927         slot = path.slots[0];
4928
4929         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4930         cur = 0;
4931         total = btrfs_item_size_nr(node, slot);
4932
4933         /* Iterate all entry of INODE_EXTREF */
4934         while (cur < total) {
4935                 ret = INODE_REF_MISSING;
4936
4937                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4938                 ref_index = btrfs_inode_extref_index(node, extref);
4939                 parent = btrfs_inode_extref_parent(node, extref);
4940                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4941                         goto next_extref;
4942
4943                 if (parent != dir_id)
4944                         goto next_extref;
4945
4946                 if (ref_namelen <= BTRFS_NAME_LEN) {
4947                         len = ref_namelen;
4948                 } else {
4949                         len = BTRFS_NAME_LEN;
4950                         warning("root %llu INODE %s[%llu %llu] name too long",
4951                                 root->objectid,
4952                                 key->type == BTRFS_INODE_REF_KEY ?
4953                                         "REF" : "EXTREF",
4954                                 key->objectid, key->offset);
4955                 }
4956                 read_extent_buffer(node, ref_namebuf,
4957                                    (unsigned long)(extref + 1), len);
4958
4959                 if (len != namelen || strncmp(ref_namebuf, name, len))
4960                         goto next_extref;
4961
4962                 *index_ret = ref_index;
4963                 ret = 0;
4964                 goto out;
4965
4966 next_extref:
4967                 len = sizeof(*extref) + ref_namelen;
4968                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4969                 cur += len;
4970
4971         }
4972 out:
4973         btrfs_release_path(&path);
4974         return ret;
4975 }
4976
4977 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4978                                u64 ino, u64 index, const char *namebuf,
4979                                int name_len, u8 filetype, int err)
4980 {
4981         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
4982                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
4983                       root->objectid, key->objectid, key->offset, namebuf,
4984                       filetype,
4985                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4986         }
4987
4988         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
4989                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
4990                       root->objectid, key->objectid, index, namebuf, filetype,
4991                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4992         }
4993
4994         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
4995                 error(
4996                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
4997                       root->objectid, ino, index, namebuf, filetype,
4998                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
4999         }
5000
5001         if (err & INODE_REF_MISSING)
5002                 error(
5003                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5004                       root->objectid, ino, key->objectid, namebuf, filetype);
5005
5006 }
5007
5008 /*
5009  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5010  *
5011  * Returns error after repair
5012  */
5013 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5014                            u64 index, u8 filetype, char *namebuf, u32 name_len,
5015                            int err)
5016 {
5017         int ret;
5018
5019         if (err & INODE_ITEM_MISSING) {
5020                 ret = repair_inode_item_missing(root, ino, filetype);
5021                 if (!ret)
5022                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5023         }
5024
5025         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5026                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5027                                             name_len, filetype, err);
5028                 if (!ret) {
5029                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5030                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5031                         err &= ~(INODE_REF_MISSING);
5032                 }
5033         }
5034         return err;
5035 }
5036
5037 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5038                 u64 *size_ret)
5039 {
5040         struct btrfs_key key;
5041         struct btrfs_path path;
5042         u32 len;
5043         struct btrfs_dir_item *di;
5044         int ret;
5045         int cur = 0;
5046         int total = 0;
5047
5048         ASSERT(size_ret);
5049         *size_ret = 0;
5050
5051         key.objectid = ino;
5052         key.type = type;
5053         key.offset = (u64)-1;
5054
5055         btrfs_init_path(&path);
5056         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5057         if (ret < 0) {
5058                 ret = -EIO;
5059                 goto out;
5060         }
5061         /* if found, go to spacial case */
5062         if (ret == 0)
5063                 goto special_case;
5064
5065 loop:
5066         ret = btrfs_previous_item(root, &path, ino, type);
5067
5068         if (ret) {
5069                 ret = 0;
5070                 goto out;
5071         }
5072
5073 special_case:
5074         di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5075         cur = 0;
5076         total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5077
5078         while (cur < total) {
5079                 len = btrfs_dir_name_len(path.nodes[0], di);
5080                 if (len > BTRFS_NAME_LEN)
5081                         len = BTRFS_NAME_LEN;
5082                 *size_ret += len;
5083
5084                 len += btrfs_dir_data_len(path.nodes[0], di);
5085                 len += sizeof(*di);
5086                 di = (struct btrfs_dir_item *)((char *)di + len);
5087                 cur += len;
5088         }
5089         goto loop;
5090
5091 out:
5092         btrfs_release_path(&path);
5093         return ret;
5094 }
5095
5096 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5097 {
5098         u64 item_size;
5099         u64 index_size;
5100         int ret;
5101
5102         ASSERT(size);
5103         ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5104         if (ret)
5105                 goto out;
5106
5107         ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5108         if (ret)
5109                 goto out;
5110
5111         *size = item_size + index_size;
5112
5113 out:
5114         if (ret)
5115                 error("failed to count root %llu INODE[%llu] root size",
5116                       root->objectid, ino);
5117         return ret;
5118 }
5119
5120 /*
5121  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5122  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5123  *
5124  * @root:       the root of the fs/file tree
5125  * @key:        the key of the INODE_REF/INODE_EXTREF
5126  * @path:       the path
5127  * @size:       the st_size of the INODE_ITEM
5128  * @ext_ref:    the EXTENDED_IREF feature
5129  *
5130  * Return 0 if no error occurred.
5131  * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5132  */
5133 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5134                           struct btrfs_path *path, u64 *size,
5135                           unsigned int ext_ref)
5136 {
5137         struct btrfs_dir_item *di;
5138         struct btrfs_inode_item *ii;
5139         struct btrfs_key key;
5140         struct btrfs_key location;
5141         struct extent_buffer *node;
5142         int slot;
5143         char namebuf[BTRFS_NAME_LEN] = {0};
5144         u32 total;
5145         u32 cur = 0;
5146         u32 len;
5147         u32 name_len;
5148         u32 data_len;
5149         u8 filetype;
5150         u32 mode = 0;
5151         u64 index;
5152         int ret;
5153         int err;
5154         int tmp_err;
5155         int need_research = 0;
5156
5157         /*
5158          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5159          * ignore index check.
5160          */
5161         if (di_key->type == BTRFS_DIR_INDEX_KEY)
5162                 index = di_key->offset;
5163         else
5164                 index = (u64)-1;
5165 begin:
5166         err = 0;
5167         cur = 0;
5168
5169         /* since after repair, path and the dir item may be changed */
5170         if (need_research) {
5171                 need_research = 0;
5172                 err |= DIR_COUNT_AGAIN;
5173                 btrfs_release_path(path);
5174                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5175                 /* the item was deleted, let path point the last checked item */
5176                 if (ret > 0) {
5177                         if (path->slots[0] == 0)
5178                                 btrfs_prev_leaf(root, path);
5179                         else
5180                                 path->slots[0]--;
5181                 }
5182                 if (ret)
5183                         goto out;
5184         }
5185
5186         node = path->nodes[0];
5187         slot = path->slots[0];
5188
5189         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5190         total = btrfs_item_size_nr(node, slot);
5191         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5192
5193         while (cur < total) {
5194                 data_len = btrfs_dir_data_len(node, di);
5195                 tmp_err = 0;
5196                 if (data_len)
5197                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5198                               root->objectid,
5199               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5200                               di_key->objectid, di_key->offset, data_len);
5201
5202                 name_len = btrfs_dir_name_len(node, di);
5203                 if (name_len <= BTRFS_NAME_LEN) {
5204                         len = name_len;
5205                 } else {
5206                         len = BTRFS_NAME_LEN;
5207                         warning("root %llu %s[%llu %llu] name too long",
5208                                 root->objectid,
5209                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5210                                 di_key->objectid, di_key->offset);
5211                 }
5212                 (*size) += name_len;
5213                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5214                                    len);
5215                 filetype = btrfs_dir_type(node, di);
5216
5217                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5218                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5219                         err |= -EIO;
5220                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5221                         root->objectid, di_key->objectid, di_key->offset,
5222                         namebuf, len, filetype, di_key->offset,
5223                         btrfs_name_hash(namebuf, len));
5224                 }
5225
5226                 btrfs_dir_item_key_to_cpu(node, di, &location);
5227                 /* Ignore related ROOT_ITEM check */
5228                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5229                         goto next;
5230
5231                 btrfs_release_path(path);
5232                 /* Check relative INODE_ITEM(existence/filetype) */
5233                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5234                 if (ret) {
5235                         tmp_err |= INODE_ITEM_MISSING;
5236                         goto next;
5237                 }
5238
5239                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5240                                     struct btrfs_inode_item);
5241                 mode = btrfs_inode_mode(path->nodes[0], ii);
5242                 if (imode_to_type(mode) != filetype) {
5243                         tmp_err |= INODE_ITEM_MISMATCH;
5244                         goto next;
5245                 }
5246
5247                 /* Check relative INODE_REF/INODE_EXTREF */
5248                 key.objectid = location.objectid;
5249                 key.type = BTRFS_INODE_REF_KEY;
5250                 key.offset = di_key->objectid;
5251                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5252                                           &index, ext_ref);
5253
5254                 /* check relative INDEX/ITEM */
5255                 key.objectid = di_key->objectid;
5256                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5257                         key.type = BTRFS_DIR_INDEX_KEY;
5258                         key.offset = index;
5259                 } else {
5260                         key.type = BTRFS_DIR_ITEM_KEY;
5261                         key.offset = btrfs_name_hash(namebuf, name_len);
5262                 }
5263
5264                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5265                                          name_len, filetype);
5266                 /* find_dir_item may find index */
5267                 if (key.type == BTRFS_DIR_INDEX_KEY)
5268                         index = key.offset;
5269 next:
5270
5271                 if (tmp_err && repair) {
5272                         ret = repair_dir_item(root, di_key->objectid,
5273                                               location.objectid, index,
5274                                               imode_to_type(mode), namebuf,
5275                                               name_len, tmp_err);
5276                         if (ret != tmp_err) {
5277                                 need_research = 1;
5278                                 goto begin;
5279                         }
5280                 }
5281                 btrfs_release_path(path);
5282                 print_dir_item_err(root, di_key, location.objectid, index,
5283                                    namebuf, name_len, filetype, tmp_err);
5284                 err |= tmp_err;
5285                 len = sizeof(*di) + name_len + data_len;
5286                 di = (struct btrfs_dir_item *)((char *)di + len);
5287                 cur += len;
5288
5289                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5290                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5291                               root->objectid, di_key->objectid,
5292                               di_key->offset);
5293                         break;
5294                 }
5295         }
5296 out:
5297         /* research path */
5298         btrfs_release_path(path);
5299         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5300         if (ret)
5301                 err |= ret > 0 ? -ENOENT : ret;
5302         return err;
5303 }
5304
5305 /*
5306  * Check file extent datasum/hole, update the size of the file extents,
5307  * check and update the last offset of the file extent.
5308  *
5309  * @root:       the root of fs/file tree.
5310  * @fkey:       the key of the file extent.
5311  * @nodatasum:  INODE_NODATASUM feature.
5312  * @size:       the sum of all EXTENT_DATA items size for this inode.
5313  * @end:        the offset of the last extent.
5314  *
5315  * Return 0 if no error occurred.
5316  */
5317 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5318                              struct extent_buffer *node, int slot,
5319                              unsigned int nodatasum, u64 *size, u64 *end)
5320 {
5321         struct btrfs_file_extent_item *fi;
5322         u64 disk_bytenr;
5323         u64 disk_num_bytes;
5324         u64 extent_num_bytes;
5325         u64 extent_offset;
5326         u64 csum_found;         /* In byte size, sectorsize aligned */
5327         u64 search_start;       /* Logical range start we search for csum */
5328         u64 search_len;         /* Logical range len we search for csum */
5329         unsigned int extent_type;
5330         unsigned int is_hole;
5331         int compressed = 0;
5332         int ret;
5333         int err = 0;
5334
5335         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5336
5337         /* Check inline extent */
5338         extent_type = btrfs_file_extent_type(node, fi);
5339         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5340                 struct btrfs_item *e = btrfs_item_nr(slot);
5341                 u32 item_inline_len;
5342
5343                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5344                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5345                 compressed = btrfs_file_extent_compression(node, fi);
5346                 if (extent_num_bytes == 0) {
5347                         error(
5348                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5349                                 root->objectid, fkey->objectid, fkey->offset);
5350                         err |= FILE_EXTENT_ERROR;
5351                 }
5352                 if (!compressed && extent_num_bytes != item_inline_len) {
5353                         error(
5354                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5355                                 root->objectid, fkey->objectid, fkey->offset,
5356                                 extent_num_bytes, item_inline_len);
5357                         err |= FILE_EXTENT_ERROR;
5358                 }
5359                 *end += extent_num_bytes;
5360                 *size += extent_num_bytes;
5361                 return err;
5362         }
5363
5364         /* Check extent type */
5365         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5366                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5367                 err |= FILE_EXTENT_ERROR;
5368                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5369                       root->objectid, fkey->objectid, fkey->offset);
5370                 return err;
5371         }
5372
5373         /* Check REG_EXTENT/PREALLOC_EXTENT */
5374         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5375         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5376         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5377         extent_offset = btrfs_file_extent_offset(node, fi);
5378         compressed = btrfs_file_extent_compression(node, fi);
5379         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5380
5381         /*
5382          * Check EXTENT_DATA csum
5383          *
5384          * For plain (uncompressed) extent, we should only check the range
5385          * we're referring to, as it's possible that part of prealloc extent
5386          * has been written, and has csum:
5387          *
5388          * |<--- Original large preallocated extent A ---->|
5389          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5390          *      No csum                         Has csum
5391          *
5392          * For compressed extent, we should check the whole range.
5393          */
5394         if (!compressed) {
5395                 search_start = disk_bytenr + extent_offset;
5396                 search_len = extent_num_bytes;
5397         } else {
5398                 search_start = disk_bytenr;
5399                 search_len = disk_num_bytes;
5400         }
5401         ret = count_csum_range(root, search_start, search_len, &csum_found);
5402         if (csum_found > 0 && nodatasum) {
5403                 err |= ODD_CSUM_ITEM;
5404                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5405                       root->objectid, fkey->objectid, fkey->offset);
5406         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5407                    !is_hole && (ret < 0 || csum_found < search_len)) {
5408                 err |= CSUM_ITEM_MISSING;
5409                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5410                       root->objectid, fkey->objectid, fkey->offset,
5411                       csum_found, search_len);
5412         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5413                 err |= ODD_CSUM_ITEM;
5414                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5415                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5416         }
5417
5418         /* Check EXTENT_DATA hole */
5419         if (!no_holes && *end != fkey->offset) {
5420                 err |= FILE_EXTENT_ERROR;
5421                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5422                       root->objectid, fkey->objectid, fkey->offset);
5423         }
5424
5425         *end += extent_num_bytes;
5426         if (!is_hole)
5427                 *size += extent_num_bytes;
5428
5429         return err;
5430 }
5431
5432 /*
5433  * Set inode item nbytes to @nbytes
5434  *
5435  * Returns  0     on success
5436  * Returns  != 0  on error
5437  */
5438 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5439                                       struct btrfs_path *path,
5440                                       u64 ino, u64 nbytes)
5441 {
5442         struct btrfs_trans_handle *trans;
5443         struct btrfs_inode_item *ii;
5444         struct btrfs_key key;
5445         struct btrfs_key research_key;
5446         int err = 0;
5447         int ret;
5448
5449         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5450
5451         key.objectid = ino;
5452         key.type = BTRFS_INODE_ITEM_KEY;
5453         key.offset = 0;
5454
5455         trans = btrfs_start_transaction(root, 1);
5456         if (IS_ERR(trans)) {
5457                 ret = PTR_ERR(trans);
5458                 err |= ret;
5459                 goto out;
5460         }
5461
5462         btrfs_release_path(path);
5463         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5464         if (ret > 0)
5465                 ret = -ENOENT;
5466         if (ret) {
5467                 err |= ret;
5468                 goto fail;
5469         }
5470
5471         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5472                             struct btrfs_inode_item);
5473         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5474         btrfs_mark_buffer_dirty(path->nodes[0]);
5475 fail:
5476         btrfs_commit_transaction(trans, root);
5477 out:
5478         if (ret)
5479                 error("failed to set nbytes in inode %llu root %llu",
5480                       ino, root->root_key.objectid);
5481         else
5482                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5483                        root->root_key.objectid, nbytes);
5484
5485         /* research path */
5486         btrfs_release_path(path);
5487         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5488         err |= ret;
5489
5490         return err;
5491 }
5492
5493 /*
5494  * Set directory inode isize to @isize.
5495  *
5496  * Returns 0     on success.
5497  * Returns != 0  on error.
5498  */
5499 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5500                                    struct btrfs_path *path,
5501                                    u64 ino, u64 isize)
5502 {
5503         struct btrfs_trans_handle *trans;
5504         struct btrfs_inode_item *ii;
5505         struct btrfs_key key;
5506         struct btrfs_key research_key;
5507         int ret;
5508         int err = 0;
5509
5510         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5511
5512         key.objectid = ino;
5513         key.type = BTRFS_INODE_ITEM_KEY;
5514         key.offset = 0;
5515
5516         trans = btrfs_start_transaction(root, 1);
5517         if (IS_ERR(trans)) {
5518                 ret = PTR_ERR(trans);
5519                 err |= ret;
5520                 goto out;
5521         }
5522
5523         btrfs_release_path(path);
5524         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5525         if (ret > 0)
5526                 ret = -ENOENT;
5527         if (ret) {
5528                 err |= ret;
5529                 goto fail;
5530         }
5531
5532         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5533                             struct btrfs_inode_item);
5534         btrfs_set_inode_size(path->nodes[0], ii, isize);
5535         btrfs_mark_buffer_dirty(path->nodes[0]);
5536 fail:
5537         btrfs_commit_transaction(trans, root);
5538 out:
5539         if (ret)
5540                 error("failed to set isize in inode %llu root %llu",
5541                       ino, root->root_key.objectid);
5542         else
5543                 printf("Set isize in inode %llu root %llu to %llu\n",
5544                        ino, root->root_key.objectid, isize);
5545
5546         btrfs_release_path(path);
5547         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5548         err |= ret;
5549
5550         return err;
5551 }
5552
5553 /*
5554  * Wrapper function for btrfs_add_orphan_item().
5555  *
5556  * Returns 0     on success.
5557  * Returns != 0  on error.
5558  */
5559 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5560                                            struct btrfs_path *path, u64 ino)
5561 {
5562         struct btrfs_trans_handle *trans;
5563         struct btrfs_key research_key;
5564         int ret;
5565         int err = 0;
5566
5567         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5568
5569         trans = btrfs_start_transaction(root, 1);
5570         if (IS_ERR(trans)) {
5571                 ret = PTR_ERR(trans);
5572                 err |= ret;
5573                 goto out;
5574         }
5575
5576         btrfs_release_path(path);
5577         ret = btrfs_add_orphan_item(trans, root, path, ino);
5578         err |= ret;
5579         btrfs_commit_transaction(trans, root);
5580 out:
5581         if (ret)
5582                 error("failed to add inode %llu as orphan item root %llu",
5583                       ino, root->root_key.objectid);
5584         else
5585                 printf("Added inode %llu as orphan item root %llu\n",
5586                        ino, root->root_key.objectid);
5587
5588         btrfs_release_path(path);
5589         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5590         err |= ret;
5591
5592         return err;
5593 }
5594
5595 /*
5596  * Check INODE_ITEM and related ITEMs (the same inode number)
5597  * 1. check link count
5598  * 2. check inode ref/extref
5599  * 3. check dir item/index
5600  *
5601  * @ext_ref:    the EXTENDED_IREF feature
5602  *
5603  * Return 0 if no error occurred.
5604  * Return >0 for error or hit the traversal is done(by error bitmap)
5605  */
5606 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5607                             unsigned int ext_ref)
5608 {
5609         struct extent_buffer *node;
5610         struct btrfs_inode_item *ii;
5611         struct btrfs_key key;
5612         u64 inode_id;
5613         u32 mode;
5614         u64 nlink;
5615         u64 nbytes;
5616         u64 isize;
5617         u64 size = 0;
5618         u64 refs = 0;
5619         u64 extent_end = 0;
5620         u64 extent_size = 0;
5621         unsigned int dir;
5622         unsigned int nodatasum;
5623         int slot;
5624         int ret;
5625         int err = 0;
5626         char namebuf[BTRFS_NAME_LEN] = {0};
5627         u32 name_len = 0;
5628
5629         node = path->nodes[0];
5630         slot = path->slots[0];
5631
5632         btrfs_item_key_to_cpu(node, &key, slot);
5633         inode_id = key.objectid;
5634
5635         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5636                 ret = btrfs_next_item(root, path);
5637                 if (ret > 0)
5638                         err |= LAST_ITEM;
5639                 return err;
5640         }
5641
5642         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5643         isize = btrfs_inode_size(node, ii);
5644         nbytes = btrfs_inode_nbytes(node, ii);
5645         mode = btrfs_inode_mode(node, ii);
5646         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5647         nlink = btrfs_inode_nlink(node, ii);
5648         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5649
5650         while (1) {
5651                 ret = btrfs_next_item(root, path);
5652                 if (ret < 0) {
5653                         /* out will fill 'err' rusing current statistics */
5654                         goto out;
5655                 } else if (ret > 0) {
5656                         err |= LAST_ITEM;
5657                         goto out;
5658                 }
5659
5660                 node = path->nodes[0];
5661                 slot = path->slots[0];
5662                 btrfs_item_key_to_cpu(node, &key, slot);
5663                 if (key.objectid != inode_id)
5664                         goto out;
5665
5666                 switch (key.type) {
5667                 case BTRFS_INODE_REF_KEY:
5668                         ret = check_inode_ref(root, &key, path, namebuf,
5669                                               &name_len, &refs, mode);
5670                         err |= ret;
5671                         break;
5672                 case BTRFS_INODE_EXTREF_KEY:
5673                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5674                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5675                                         root->objectid, key.objectid,
5676                                         key.offset);
5677                         ret = check_inode_extref(root, &key, node, slot, &refs,
5678                                                  mode);
5679                         err |= ret;
5680                         break;
5681                 case BTRFS_DIR_ITEM_KEY:
5682                 case BTRFS_DIR_INDEX_KEY:
5683                         if (!dir) {
5684                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5685                                         root->objectid, inode_id,
5686                                         imode_to_type(mode), key.objectid,
5687                                         key.offset);
5688                         }
5689                         ret = check_dir_item(root, &key, path, &size, ext_ref);
5690                         err |= ret;
5691                         break;
5692                 case BTRFS_EXTENT_DATA_KEY:
5693                         if (dir) {
5694                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5695                                         root->objectid, inode_id, key.objectid,
5696                                         key.offset);
5697                         }
5698                         ret = check_file_extent(root, &key, node, slot,
5699                                                 nodatasum, &extent_size,
5700                                                 &extent_end);
5701                         err |= ret;
5702                         break;
5703                 case BTRFS_XATTR_ITEM_KEY:
5704                         break;
5705                 default:
5706                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5707                               key.objectid, key.type, key.offset);
5708                 }
5709         }
5710
5711 out:
5712         /* verify INODE_ITEM nlink/isize/nbytes */
5713         if (dir) {
5714                 if (repair && (err & DIR_COUNT_AGAIN)) {
5715                         err &= ~DIR_COUNT_AGAIN;
5716                         count_dir_isize(root, inode_id, &size);
5717                 }
5718                 if (nlink != 1) {
5719                         err |= LINK_COUNT_ERROR;
5720                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5721                               root->objectid, inode_id, nlink);
5722                 }
5723
5724                 /*
5725                  * Just a warning, as dir inode nbytes is just an
5726                  * instructive value.
5727                  */
5728                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5729                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5730                                 root->objectid, inode_id,
5731                                 root->fs_info->nodesize);
5732                 }
5733
5734                 if (isize != size) {
5735                         if (repair)
5736                                 ret = repair_dir_isize_lowmem(root, path,
5737                                                               inode_id, size);
5738                         if (!repair || ret) {
5739                                 err |= ISIZE_ERROR;
5740                                 error(
5741                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5742                                       root->objectid, inode_id, isize, size);
5743                         }
5744                 }
5745         } else {
5746                 if (nlink != refs) {
5747                         err |= LINK_COUNT_ERROR;
5748                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5749                               root->objectid, inode_id, nlink, refs);
5750                 } else if (!nlink) {
5751                         if (repair)
5752                                 ret = repair_inode_orphan_item_lowmem(root,
5753                                                               path, inode_id);
5754                         if (!repair || ret) {
5755                                 err |= ORPHAN_ITEM;
5756                                 error("root %llu INODE[%llu] is orphan item",
5757                                       root->objectid, inode_id);
5758                         }
5759                 }
5760
5761                 if (!nbytes && !no_holes && extent_end < isize) {
5762                         err |= NBYTES_ERROR;
5763                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5764                               root->objectid, inode_id, isize);
5765                 }
5766
5767                 if (nbytes != extent_size) {
5768                         if (repair)
5769                                 ret = repair_inode_nbytes_lowmem(root, path,
5770                                                          inode_id, extent_size);
5771                         if (!repair || ret) {
5772                                 err |= NBYTES_ERROR;
5773                                 error(
5774         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5775                                       root->objectid, inode_id, nbytes,
5776                                       extent_size);
5777                         }
5778                 }
5779         }
5780
5781         return err;
5782 }
5783
5784 /*
5785  * Insert the missing inode item and inode ref.
5786  *
5787  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
5788  * Root dir should be handled specially because root dir is the root of fs.
5789  *
5790  * returns err (>0 or 0) after repair
5791  */
5792 static int repair_fs_first_inode(struct btrfs_root *root, int err)
5793 {
5794         struct btrfs_trans_handle *trans;
5795         struct btrfs_key key;
5796         struct btrfs_path path;
5797         int filetype = BTRFS_FT_DIR;
5798         int ret = 0;
5799
5800         btrfs_init_path(&path);
5801
5802         if (err & INODE_REF_MISSING) {
5803                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5804                 key.type = BTRFS_INODE_REF_KEY;
5805                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5806
5807                 trans = btrfs_start_transaction(root, 1);
5808                 if (IS_ERR(trans)) {
5809                         ret = PTR_ERR(trans);
5810                         goto out;
5811                 }
5812
5813                 btrfs_release_path(&path);
5814                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
5815                 if (ret)
5816                         goto trans_fail;
5817
5818                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
5819                                              BTRFS_FIRST_FREE_OBJECTID,
5820                                              BTRFS_FIRST_FREE_OBJECTID, 0);
5821                 if (ret)
5822                         goto trans_fail;
5823
5824                 printf("Add INODE_REF[%llu %llu] name %s\n",
5825                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
5826                        "..");
5827                 err &= ~INODE_REF_MISSING;
5828 trans_fail:
5829                 if (ret)
5830                         error("fail to insert first inode's ref");
5831                 btrfs_commit_transaction(trans, root);
5832         }
5833
5834         if (err & INODE_ITEM_MISSING) {
5835                 ret = repair_inode_item_missing(root,
5836                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
5837                 if (ret)
5838                         goto out;
5839                 err &= ~INODE_ITEM_MISSING;
5840         }
5841 out:
5842         if (ret)
5843                 error("fail to repair first inode");
5844         btrfs_release_path(&path);
5845         return err;
5846 }
5847
5848 /*
5849  * check first root dir's inode_item and inode_ref
5850  *
5851  * returns 0 means no error
5852  * returns >0 means error
5853  * returns <0 means fatal error
5854  */
5855 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5856 {
5857         struct btrfs_path path;
5858         struct btrfs_key key;
5859         struct btrfs_inode_item *ii;
5860         u64 index;
5861         u32 mode;
5862         int err = 0;
5863         int ret;
5864
5865         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5866         key.type = BTRFS_INODE_ITEM_KEY;
5867         key.offset = 0;
5868
5869         /* For root being dropped, we don't need to check first inode */
5870         if (btrfs_root_refs(&root->root_item) == 0 &&
5871             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5872             BTRFS_FIRST_FREE_OBJECTID)
5873                 return 0;
5874
5875         btrfs_init_path(&path);
5876         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5877         if (ret < 0)
5878                 goto out;
5879         if (ret > 0) {
5880                 ret = 0;
5881                 err |= INODE_ITEM_MISSING;
5882         } else {
5883                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
5884                                     struct btrfs_inode_item);
5885                 mode = btrfs_inode_mode(path.nodes[0], ii);
5886                 if (imode_to_type(mode) != BTRFS_FT_DIR)
5887                         err |= INODE_ITEM_MISMATCH;
5888         }
5889
5890         /* lookup first inode ref */
5891         key.offset = BTRFS_FIRST_FREE_OBJECTID;
5892         key.type = BTRFS_INODE_REF_KEY;
5893         /* special index value */
5894         index = 0;
5895
5896         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
5897         if (ret < 0)
5898                 goto out;
5899         err |= ret;
5900
5901 out:
5902         btrfs_release_path(&path);
5903
5904         if (err && repair)
5905                 err = repair_fs_first_inode(root, err);
5906
5907         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
5908                 error("root dir INODE_ITEM is %s",
5909                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
5910         if (err & INODE_REF_MISSING)
5911                 error("root dir INODE_REF is missing");
5912
5913         return ret < 0 ? ret : err;
5914 }
5915
5916 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5917                                                 u64 parent, u64 root)
5918 {
5919         struct rb_node *node;
5920         struct tree_backref *back = NULL;
5921         struct tree_backref match = {
5922                 .node = {
5923                         .is_data = 0,
5924                 },
5925         };
5926
5927         if (parent) {
5928                 match.parent = parent;
5929                 match.node.full_backref = 1;
5930         } else {
5931                 match.root = root;
5932         }
5933
5934         node = rb_search(&rec->backref_tree, &match.node.node,
5935                          (rb_compare_keys)compare_extent_backref, NULL);
5936         if (node)
5937                 back = to_tree_backref(rb_node_to_extent_backref(node));
5938
5939         return back;
5940 }
5941
5942 static struct data_backref *find_data_backref(struct extent_record *rec,
5943                                                 u64 parent, u64 root,
5944                                                 u64 owner, u64 offset,
5945                                                 int found_ref,
5946                                                 u64 disk_bytenr, u64 bytes)
5947 {
5948         struct rb_node *node;
5949         struct data_backref *back = NULL;
5950         struct data_backref match = {
5951                 .node = {
5952                         .is_data = 1,
5953                 },
5954                 .owner = owner,
5955                 .offset = offset,
5956                 .bytes = bytes,
5957                 .found_ref = found_ref,
5958                 .disk_bytenr = disk_bytenr,
5959         };
5960
5961         if (parent) {
5962                 match.parent = parent;
5963                 match.node.full_backref = 1;
5964         } else {
5965                 match.root = root;
5966         }
5967
5968         node = rb_search(&rec->backref_tree, &match.node.node,
5969                          (rb_compare_keys)compare_extent_backref, NULL);
5970         if (node)
5971                 back = to_data_backref(rb_node_to_extent_backref(node));
5972
5973         return back;
5974 }
5975 /*
5976  * Iterate all item on the tree and call check_inode_item() to check.
5977  *
5978  * @root:       the root of the tree to be checked.
5979  * @ext_ref:    the EXTENDED_IREF feature
5980  *
5981  * Return 0 if no error found.
5982  * Return <0 for error.
5983  */
5984 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5985 {
5986         struct btrfs_path path;
5987         struct node_refs nrefs;
5988         struct btrfs_root_item *root_item = &root->root_item;
5989         int ret;
5990         int level;
5991         int err = 0;
5992
5993         /*
5994          * We need to manually check the first inode item(256)
5995          * As the following traversal function will only start from
5996          * the first inode item in the leaf, if inode item(256) is missing
5997          * we will just skip it forever.
5998          */
5999         ret = check_fs_first_inode(root, ext_ref);
6000         if (ret < 0)
6001                 return ret;
6002         err |= !!ret;
6003
6004         memset(&nrefs, 0, sizeof(nrefs));
6005         level = btrfs_header_level(root->node);
6006         btrfs_init_path(&path);
6007
6008         if (btrfs_root_refs(root_item) > 0 ||
6009             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6010                 path.nodes[level] = root->node;
6011                 path.slots[level] = 0;
6012                 extent_buffer_get(root->node);
6013         } else {
6014                 struct btrfs_key key;
6015
6016                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6017                 level = root_item->drop_level;
6018                 path.lowest_level = level;
6019                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6020                 if (ret < 0)
6021                         goto out;
6022                 ret = 0;
6023         }
6024
6025         while (1) {
6026                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
6027                 err |= !!ret;
6028
6029                 /* if ret is negative, walk shall stop */
6030                 if (ret < 0) {
6031                         ret = err;
6032                         break;
6033                 }
6034
6035                 ret = walk_up_tree_v2(root, &path, &level);
6036                 if (ret != 0) {
6037                         /* Normal exit, reset ret to err */
6038                         ret = err;
6039                         break;
6040                 }
6041         }
6042
6043 out:
6044         btrfs_release_path(&path);
6045         return ret;
6046 }
6047
6048 /*
6049  * Find the relative ref for root_ref and root_backref.
6050  *
6051  * @root:       the root of the root tree.
6052  * @ref_key:    the key of the root ref.
6053  *
6054  * Return 0 if no error occurred.
6055  */
6056 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6057                           struct extent_buffer *node, int slot)
6058 {
6059         struct btrfs_path path;
6060         struct btrfs_key key;
6061         struct btrfs_root_ref *ref;
6062         struct btrfs_root_ref *backref;
6063         char ref_name[BTRFS_NAME_LEN] = {0};
6064         char backref_name[BTRFS_NAME_LEN] = {0};
6065         u64 ref_dirid;
6066         u64 ref_seq;
6067         u32 ref_namelen;
6068         u64 backref_dirid;
6069         u64 backref_seq;
6070         u32 backref_namelen;
6071         u32 len;
6072         int ret;
6073         int err = 0;
6074
6075         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6076         ref_dirid = btrfs_root_ref_dirid(node, ref);
6077         ref_seq = btrfs_root_ref_sequence(node, ref);
6078         ref_namelen = btrfs_root_ref_name_len(node, ref);
6079
6080         if (ref_namelen <= BTRFS_NAME_LEN) {
6081                 len = ref_namelen;
6082         } else {
6083                 len = BTRFS_NAME_LEN;
6084                 warning("%s[%llu %llu] ref_name too long",
6085                         ref_key->type == BTRFS_ROOT_REF_KEY ?
6086                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6087                         ref_key->offset);
6088         }
6089         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6090
6091         /* Find relative root_ref */
6092         key.objectid = ref_key->offset;
6093         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6094         key.offset = ref_key->objectid;
6095
6096         btrfs_init_path(&path);
6097         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6098         if (ret) {
6099                 err |= ROOT_REF_MISSING;
6100                 error("%s[%llu %llu] couldn't find relative ref",
6101                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6102                       "ROOT_REF" : "ROOT_BACKREF",
6103                       ref_key->objectid, ref_key->offset);
6104                 goto out;
6105         }
6106
6107         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6108                                  struct btrfs_root_ref);
6109         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6110         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6111         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6112
6113         if (backref_namelen <= BTRFS_NAME_LEN) {
6114                 len = backref_namelen;
6115         } else {
6116                 len = BTRFS_NAME_LEN;
6117                 warning("%s[%llu %llu] ref_name too long",
6118                         key.type == BTRFS_ROOT_REF_KEY ?
6119                         "ROOT_REF" : "ROOT_BACKREF",
6120                         key.objectid, key.offset);
6121         }
6122         read_extent_buffer(path.nodes[0], backref_name,
6123                            (unsigned long)(backref + 1), len);
6124
6125         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6126             ref_namelen != backref_namelen ||
6127             strncmp(ref_name, backref_name, len)) {
6128                 err |= ROOT_REF_MISMATCH;
6129                 error("%s[%llu %llu] mismatch relative ref",
6130                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6131                       "ROOT_REF" : "ROOT_BACKREF",
6132                       ref_key->objectid, ref_key->offset);
6133         }
6134 out:
6135         btrfs_release_path(&path);
6136         return err;
6137 }
6138
6139 /*
6140  * Check all fs/file tree in low_memory mode.
6141  *
6142  * 1. for fs tree root item, call check_fs_root_v2()
6143  * 2. for fs tree root ref/backref, call check_root_ref()
6144  *
6145  * Return 0 if no error occurred.
6146  */
6147 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6148 {
6149         struct btrfs_root *tree_root = fs_info->tree_root;
6150         struct btrfs_root *cur_root = NULL;
6151         struct btrfs_path path;
6152         struct btrfs_key key;
6153         struct extent_buffer *node;
6154         unsigned int ext_ref;
6155         int slot;
6156         int ret;
6157         int err = 0;
6158
6159         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6160
6161         btrfs_init_path(&path);
6162         key.objectid = BTRFS_FS_TREE_OBJECTID;
6163         key.offset = 0;
6164         key.type = BTRFS_ROOT_ITEM_KEY;
6165
6166         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6167         if (ret < 0) {
6168                 err = ret;
6169                 goto out;
6170         } else if (ret > 0) {
6171                 err = -ENOENT;
6172                 goto out;
6173         }
6174
6175         while (1) {
6176                 node = path.nodes[0];
6177                 slot = path.slots[0];
6178                 btrfs_item_key_to_cpu(node, &key, slot);
6179                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6180                         goto out;
6181                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6182                     fs_root_objectid(key.objectid)) {
6183                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6184                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6185                                                                        &key);
6186                         } else {
6187                                 key.offset = (u64)-1;
6188                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6189                         }
6190
6191                         if (IS_ERR(cur_root)) {
6192                                 error("Fail to read fs/subvol tree: %lld",
6193                                       key.objectid);
6194                                 err = -EIO;
6195                                 goto next;
6196                         }
6197
6198                         ret = check_fs_root_v2(cur_root, ext_ref);
6199                         err |= ret;
6200
6201                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6202                                 btrfs_free_fs_root(cur_root);
6203                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6204                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6205                         ret = check_root_ref(tree_root, &key, node, slot);
6206                         err |= ret;
6207                 }
6208 next:
6209                 ret = btrfs_next_item(tree_root, &path);
6210                 if (ret > 0)
6211                         goto out;
6212                 if (ret < 0) {
6213                         err = ret;
6214                         goto out;
6215                 }
6216         }
6217
6218 out:
6219         btrfs_release_path(&path);
6220         return err;
6221 }
6222
6223 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6224                           struct cache_tree *root_cache)
6225 {
6226         int ret;
6227
6228         if (!ctx.progress_enabled)
6229                 fprintf(stderr, "checking fs roots\n");
6230         if (check_mode == CHECK_MODE_LOWMEM)
6231                 ret = check_fs_roots_v2(fs_info);
6232         else
6233                 ret = check_fs_roots(fs_info, root_cache);
6234
6235         return ret;
6236 }
6237
6238 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6239 {
6240         struct extent_backref *back, *tmp;
6241         struct tree_backref *tback;
6242         struct data_backref *dback;
6243         u64 found = 0;
6244         int err = 0;
6245
6246         rbtree_postorder_for_each_entry_safe(back, tmp,
6247                                              &rec->backref_tree, node) {
6248                 if (!back->found_extent_tree) {
6249                         err = 1;
6250                         if (!print_errs)
6251                                 goto out;
6252                         if (back->is_data) {
6253                                 dback = to_data_backref(back);
6254                                 fprintf(stderr, "Data backref %llu %s %llu"
6255                                         " owner %llu offset %llu num_refs %lu"
6256                                         " not found in extent tree\n",
6257                                         (unsigned long long)rec->start,
6258                                         back->full_backref ?
6259                                         "parent" : "root",
6260                                         back->full_backref ?
6261                                         (unsigned long long)dback->parent:
6262                                         (unsigned long long)dback->root,
6263                                         (unsigned long long)dback->owner,
6264                                         (unsigned long long)dback->offset,
6265                                         (unsigned long)dback->num_refs);
6266                         } else {
6267                                 tback = to_tree_backref(back);
6268                                 fprintf(stderr, "Tree backref %llu parent %llu"
6269                                         " root %llu not found in extent tree\n",
6270                                         (unsigned long long)rec->start,
6271                                         (unsigned long long)tback->parent,
6272                                         (unsigned long long)tback->root);
6273                         }
6274                 }
6275                 if (!back->is_data && !back->found_ref) {
6276                         err = 1;
6277                         if (!print_errs)
6278                                 goto out;
6279                         tback = to_tree_backref(back);
6280                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6281                                 (unsigned long long)rec->start,
6282                                 back->full_backref ? "parent" : "root",
6283                                 back->full_backref ?
6284                                 (unsigned long long)tback->parent :
6285                                 (unsigned long long)tback->root, back);
6286                 }
6287                 if (back->is_data) {
6288                         dback = to_data_backref(back);
6289                         if (dback->found_ref != dback->num_refs) {
6290                                 err = 1;
6291                                 if (!print_errs)
6292                                         goto out;
6293                                 fprintf(stderr, "Incorrect local backref count"
6294                                         " on %llu %s %llu owner %llu"
6295                                         " offset %llu found %u wanted %u back %p\n",
6296                                         (unsigned long long)rec->start,
6297                                         back->full_backref ?
6298                                         "parent" : "root",
6299                                         back->full_backref ?
6300                                         (unsigned long long)dback->parent:
6301                                         (unsigned long long)dback->root,
6302                                         (unsigned long long)dback->owner,
6303                                         (unsigned long long)dback->offset,
6304                                         dback->found_ref, dback->num_refs, back);
6305                         }
6306                         if (dback->disk_bytenr != rec->start) {
6307                                 err = 1;
6308                                 if (!print_errs)
6309                                         goto out;
6310                                 fprintf(stderr, "Backref disk bytenr does not"
6311                                         " match extent record, bytenr=%llu, "
6312                                         "ref bytenr=%llu\n",
6313                                         (unsigned long long)rec->start,
6314                                         (unsigned long long)dback->disk_bytenr);
6315                         }
6316
6317                         if (dback->bytes != rec->nr) {
6318                                 err = 1;
6319                                 if (!print_errs)
6320                                         goto out;
6321                                 fprintf(stderr, "Backref bytes do not match "
6322                                         "extent backref, bytenr=%llu, ref "
6323                                         "bytes=%llu, backref bytes=%llu\n",
6324                                         (unsigned long long)rec->start,
6325                                         (unsigned long long)rec->nr,
6326                                         (unsigned long long)dback->bytes);
6327                         }
6328                 }
6329                 if (!back->is_data) {
6330                         found += 1;
6331                 } else {
6332                         dback = to_data_backref(back);
6333                         found += dback->found_ref;
6334                 }
6335         }
6336         if (found != rec->refs) {
6337                 err = 1;
6338                 if (!print_errs)
6339                         goto out;
6340                 fprintf(stderr, "Incorrect global backref count "
6341                         "on %llu found %llu wanted %llu\n",
6342                         (unsigned long long)rec->start,
6343                         (unsigned long long)found,
6344                         (unsigned long long)rec->refs);
6345         }
6346 out:
6347         return err;
6348 }
6349
6350 static void __free_one_backref(struct rb_node *node)
6351 {
6352         struct extent_backref *back = rb_node_to_extent_backref(node);
6353
6354         free(back);
6355 }
6356
6357 static void free_all_extent_backrefs(struct extent_record *rec)
6358 {
6359         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6360 }
6361
6362 static void free_extent_record_cache(struct cache_tree *extent_cache)
6363 {
6364         struct cache_extent *cache;
6365         struct extent_record *rec;
6366
6367         while (1) {
6368                 cache = first_cache_extent(extent_cache);
6369                 if (!cache)
6370                         break;
6371                 rec = container_of(cache, struct extent_record, cache);
6372                 remove_cache_extent(extent_cache, cache);
6373                 free_all_extent_backrefs(rec);
6374                 free(rec);
6375         }
6376 }
6377
6378 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6379                                  struct extent_record *rec)
6380 {
6381         if (rec->content_checked && rec->owner_ref_checked &&
6382             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6383             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6384             !rec->bad_full_backref && !rec->crossing_stripes &&
6385             !rec->wrong_chunk_type) {
6386                 remove_cache_extent(extent_cache, &rec->cache);
6387                 free_all_extent_backrefs(rec);
6388                 list_del_init(&rec->list);
6389                 free(rec);
6390         }
6391         return 0;
6392 }
6393
6394 static int check_owner_ref(struct btrfs_root *root,
6395                             struct extent_record *rec,
6396                             struct extent_buffer *buf)
6397 {
6398         struct extent_backref *node, *tmp;
6399         struct tree_backref *back;
6400         struct btrfs_root *ref_root;
6401         struct btrfs_key key;
6402         struct btrfs_path path;
6403         struct extent_buffer *parent;
6404         int level;
6405         int found = 0;
6406         int ret;
6407
6408         rbtree_postorder_for_each_entry_safe(node, tmp,
6409                                              &rec->backref_tree, node) {
6410                 if (node->is_data)
6411                         continue;
6412                 if (!node->found_ref)
6413                         continue;
6414                 if (node->full_backref)
6415                         continue;
6416                 back = to_tree_backref(node);
6417                 if (btrfs_header_owner(buf) == back->root)
6418                         return 0;
6419         }
6420         BUG_ON(rec->is_root);
6421
6422         /* try to find the block by search corresponding fs tree */
6423         key.objectid = btrfs_header_owner(buf);
6424         key.type = BTRFS_ROOT_ITEM_KEY;
6425         key.offset = (u64)-1;
6426
6427         ref_root = btrfs_read_fs_root(root->fs_info, &key);
6428         if (IS_ERR(ref_root))
6429                 return 1;
6430
6431         level = btrfs_header_level(buf);
6432         if (level == 0)
6433                 btrfs_item_key_to_cpu(buf, &key, 0);
6434         else
6435                 btrfs_node_key_to_cpu(buf, &key, 0);
6436
6437         btrfs_init_path(&path);
6438         path.lowest_level = level + 1;
6439         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6440         if (ret < 0)
6441                 return 0;
6442
6443         parent = path.nodes[level + 1];
6444         if (parent && buf->start == btrfs_node_blockptr(parent,
6445                                                         path.slots[level + 1]))
6446                 found = 1;
6447
6448         btrfs_release_path(&path);
6449         return found ? 0 : 1;
6450 }
6451
6452 static int is_extent_tree_record(struct extent_record *rec)
6453 {
6454         struct extent_backref *node, *tmp;
6455         struct tree_backref *back;
6456         int is_extent = 0;
6457
6458         rbtree_postorder_for_each_entry_safe(node, tmp,
6459                                              &rec->backref_tree, node) {
6460                 if (node->is_data)
6461                         return 0;
6462                 back = to_tree_backref(node);
6463                 if (node->full_backref)
6464                         return 0;
6465                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6466                         is_extent = 1;
6467         }
6468         return is_extent;
6469 }
6470
6471
6472 static int record_bad_block_io(struct btrfs_fs_info *info,
6473                                struct cache_tree *extent_cache,
6474                                u64 start, u64 len)
6475 {
6476         struct extent_record *rec;
6477         struct cache_extent *cache;
6478         struct btrfs_key key;
6479
6480         cache = lookup_cache_extent(extent_cache, start, len);
6481         if (!cache)
6482                 return 0;
6483
6484         rec = container_of(cache, struct extent_record, cache);
6485         if (!is_extent_tree_record(rec))
6486                 return 0;
6487
6488         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6489         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6490 }
6491
6492 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6493                        struct extent_buffer *buf, int slot)
6494 {
6495         if (btrfs_header_level(buf)) {
6496                 struct btrfs_key_ptr ptr1, ptr2;
6497
6498                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6499                                    sizeof(struct btrfs_key_ptr));
6500                 read_extent_buffer(buf, &ptr2,
6501                                    btrfs_node_key_ptr_offset(slot + 1),
6502                                    sizeof(struct btrfs_key_ptr));
6503                 write_extent_buffer(buf, &ptr1,
6504                                     btrfs_node_key_ptr_offset(slot + 1),
6505                                     sizeof(struct btrfs_key_ptr));
6506                 write_extent_buffer(buf, &ptr2,
6507                                     btrfs_node_key_ptr_offset(slot),
6508                                     sizeof(struct btrfs_key_ptr));
6509                 if (slot == 0) {
6510                         struct btrfs_disk_key key;
6511                         btrfs_node_key(buf, &key, 0);
6512                         btrfs_fixup_low_keys(root, path, &key,
6513                                              btrfs_header_level(buf) + 1);
6514                 }
6515         } else {
6516                 struct btrfs_item *item1, *item2;
6517                 struct btrfs_key k1, k2;
6518                 char *item1_data, *item2_data;
6519                 u32 item1_offset, item2_offset, item1_size, item2_size;
6520
6521                 item1 = btrfs_item_nr(slot);
6522                 item2 = btrfs_item_nr(slot + 1);
6523                 btrfs_item_key_to_cpu(buf, &k1, slot);
6524                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6525                 item1_offset = btrfs_item_offset(buf, item1);
6526                 item2_offset = btrfs_item_offset(buf, item2);
6527                 item1_size = btrfs_item_size(buf, item1);
6528                 item2_size = btrfs_item_size(buf, item2);
6529
6530                 item1_data = malloc(item1_size);
6531                 if (!item1_data)
6532                         return -ENOMEM;
6533                 item2_data = malloc(item2_size);
6534                 if (!item2_data) {
6535                         free(item1_data);
6536                         return -ENOMEM;
6537                 }
6538
6539                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6540                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6541
6542                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6543                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6544                 free(item1_data);
6545                 free(item2_data);
6546
6547                 btrfs_set_item_offset(buf, item1, item2_offset);
6548                 btrfs_set_item_offset(buf, item2, item1_offset);
6549                 btrfs_set_item_size(buf, item1, item2_size);
6550                 btrfs_set_item_size(buf, item2, item1_size);
6551
6552                 path->slots[0] = slot;
6553                 btrfs_set_item_key_unsafe(root, path, &k2);
6554                 path->slots[0] = slot + 1;
6555                 btrfs_set_item_key_unsafe(root, path, &k1);
6556         }
6557         return 0;
6558 }
6559
6560 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6561 {
6562         struct extent_buffer *buf;
6563         struct btrfs_key k1, k2;
6564         int i;
6565         int level = path->lowest_level;
6566         int ret = -EIO;
6567
6568         buf = path->nodes[level];
6569         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6570                 if (level) {
6571                         btrfs_node_key_to_cpu(buf, &k1, i);
6572                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6573                 } else {
6574                         btrfs_item_key_to_cpu(buf, &k1, i);
6575                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6576                 }
6577                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6578                         continue;
6579                 ret = swap_values(root, path, buf, i);
6580                 if (ret)
6581                         break;
6582                 btrfs_mark_buffer_dirty(buf);
6583                 i = 0;
6584         }
6585         return ret;
6586 }
6587
6588 static int delete_bogus_item(struct btrfs_root *root,
6589                              struct btrfs_path *path,
6590                              struct extent_buffer *buf, int slot)
6591 {
6592         struct btrfs_key key;
6593         int nritems = btrfs_header_nritems(buf);
6594
6595         btrfs_item_key_to_cpu(buf, &key, slot);
6596
6597         /* These are all the keys we can deal with missing. */
6598         if (key.type != BTRFS_DIR_INDEX_KEY &&
6599             key.type != BTRFS_EXTENT_ITEM_KEY &&
6600             key.type != BTRFS_METADATA_ITEM_KEY &&
6601             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6602             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6603                 return -1;
6604
6605         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6606                (unsigned long long)key.objectid, key.type,
6607                (unsigned long long)key.offset, slot, buf->start);
6608         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6609                               btrfs_item_nr_offset(slot + 1),
6610                               sizeof(struct btrfs_item) *
6611                               (nritems - slot - 1));
6612         btrfs_set_header_nritems(buf, nritems - 1);
6613         if (slot == 0) {
6614                 struct btrfs_disk_key disk_key;
6615
6616                 btrfs_item_key(buf, &disk_key, 0);
6617                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6618         }
6619         btrfs_mark_buffer_dirty(buf);
6620         return 0;
6621 }
6622
6623 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6624 {
6625         struct extent_buffer *buf;
6626         int i;
6627         int ret = 0;
6628
6629         /* We should only get this for leaves */
6630         BUG_ON(path->lowest_level);
6631         buf = path->nodes[0];
6632 again:
6633         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6634                 unsigned int shift = 0, offset;
6635
6636                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6637                     BTRFS_LEAF_DATA_SIZE(root)) {
6638                         if (btrfs_item_end_nr(buf, i) >
6639                             BTRFS_LEAF_DATA_SIZE(root)) {
6640                                 ret = delete_bogus_item(root, path, buf, i);
6641                                 if (!ret)
6642                                         goto again;
6643                                 fprintf(stderr, "item is off the end of the "
6644                                         "leaf, can't fix\n");
6645                                 ret = -EIO;
6646                                 break;
6647                         }
6648                         shift = BTRFS_LEAF_DATA_SIZE(root) -
6649                                 btrfs_item_end_nr(buf, i);
6650                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6651                            btrfs_item_offset_nr(buf, i - 1)) {
6652                         if (btrfs_item_end_nr(buf, i) >
6653                             btrfs_item_offset_nr(buf, i - 1)) {
6654                                 ret = delete_bogus_item(root, path, buf, i);
6655                                 if (!ret)
6656                                         goto again;
6657                                 fprintf(stderr, "items overlap, can't fix\n");
6658                                 ret = -EIO;
6659                                 break;
6660                         }
6661                         shift = btrfs_item_offset_nr(buf, i - 1) -
6662                                 btrfs_item_end_nr(buf, i);
6663                 }
6664                 if (!shift)
6665                         continue;
6666
6667                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6668                        i, shift, (unsigned long long)buf->start);
6669                 offset = btrfs_item_offset_nr(buf, i);
6670                 memmove_extent_buffer(buf,
6671                                       btrfs_leaf_data(buf) + offset + shift,
6672                                       btrfs_leaf_data(buf) + offset,
6673                                       btrfs_item_size_nr(buf, i));
6674                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6675                                       offset + shift);
6676                 btrfs_mark_buffer_dirty(buf);
6677         }
6678
6679         /*
6680          * We may have moved things, in which case we want to exit so we don't
6681          * write those changes out.  Once we have proper abort functionality in
6682          * progs this can be changed to something nicer.
6683          */
6684         BUG_ON(ret);
6685         return ret;
6686 }
6687
6688 /*
6689  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6690  * then just return -EIO.
6691  */
6692 static int try_to_fix_bad_block(struct btrfs_root *root,
6693                                 struct extent_buffer *buf,
6694                                 enum btrfs_tree_block_status status)
6695 {
6696         struct btrfs_trans_handle *trans;
6697         struct ulist *roots;
6698         struct ulist_node *node;
6699         struct btrfs_root *search_root;
6700         struct btrfs_path path;
6701         struct ulist_iterator iter;
6702         struct btrfs_key root_key, key;
6703         int ret;
6704
6705         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6706             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6707                 return -EIO;
6708
6709         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6710         if (ret)
6711                 return -EIO;
6712
6713         btrfs_init_path(&path);
6714         ULIST_ITER_INIT(&iter);
6715         while ((node = ulist_next(roots, &iter))) {
6716                 root_key.objectid = node->val;
6717                 root_key.type = BTRFS_ROOT_ITEM_KEY;
6718                 root_key.offset = (u64)-1;
6719
6720                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6721                 if (IS_ERR(root)) {
6722                         ret = -EIO;
6723                         break;
6724                 }
6725
6726
6727                 trans = btrfs_start_transaction(search_root, 0);
6728                 if (IS_ERR(trans)) {
6729                         ret = PTR_ERR(trans);
6730                         break;
6731                 }
6732
6733                 path.lowest_level = btrfs_header_level(buf);
6734                 path.skip_check_block = 1;
6735                 if (path.lowest_level)
6736                         btrfs_node_key_to_cpu(buf, &key, 0);
6737                 else
6738                         btrfs_item_key_to_cpu(buf, &key, 0);
6739                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6740                 if (ret) {
6741                         ret = -EIO;
6742                         btrfs_commit_transaction(trans, search_root);
6743                         break;
6744                 }
6745                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6746                         ret = fix_key_order(search_root, &path);
6747                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6748                         ret = fix_item_offset(search_root, &path);
6749                 if (ret) {
6750                         btrfs_commit_transaction(trans, search_root);
6751                         break;
6752                 }
6753                 btrfs_release_path(&path);
6754                 btrfs_commit_transaction(trans, search_root);
6755         }
6756         ulist_free(roots);
6757         btrfs_release_path(&path);
6758         return ret;
6759 }
6760
6761 static int check_block(struct btrfs_root *root,
6762                        struct cache_tree *extent_cache,
6763                        struct extent_buffer *buf, u64 flags)
6764 {
6765         struct extent_record *rec;
6766         struct cache_extent *cache;
6767         struct btrfs_key key;
6768         enum btrfs_tree_block_status status;
6769         int ret = 0;
6770         int level;
6771
6772         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6773         if (!cache)
6774                 return 1;
6775         rec = container_of(cache, struct extent_record, cache);
6776         rec->generation = btrfs_header_generation(buf);
6777
6778         level = btrfs_header_level(buf);
6779         if (btrfs_header_nritems(buf) > 0) {
6780
6781                 if (level == 0)
6782                         btrfs_item_key_to_cpu(buf, &key, 0);
6783                 else
6784                         btrfs_node_key_to_cpu(buf, &key, 0);
6785
6786                 rec->info_objectid = key.objectid;
6787         }
6788         rec->info_level = level;
6789
6790         if (btrfs_is_leaf(buf))
6791                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6792         else
6793                 status = btrfs_check_node(root, &rec->parent_key, buf);
6794
6795         if (status != BTRFS_TREE_BLOCK_CLEAN) {
6796                 if (repair)
6797                         status = try_to_fix_bad_block(root, buf, status);
6798                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6799                         ret = -EIO;
6800                         fprintf(stderr, "bad block %llu\n",
6801                                 (unsigned long long)buf->start);
6802                 } else {
6803                         /*
6804                          * Signal to callers we need to start the scan over
6805                          * again since we'll have cowed blocks.
6806                          */
6807                         ret = -EAGAIN;
6808                 }
6809         } else {
6810                 rec->content_checked = 1;
6811                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6812                         rec->owner_ref_checked = 1;
6813                 else {
6814                         ret = check_owner_ref(root, rec, buf);
6815                         if (!ret)
6816                                 rec->owner_ref_checked = 1;
6817                 }
6818         }
6819         if (!ret)
6820                 maybe_free_extent_rec(extent_cache, rec);
6821         return ret;
6822 }
6823
6824 #if 0
6825 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6826                                                 u64 parent, u64 root)
6827 {
6828         struct list_head *cur = rec->backrefs.next;
6829         struct extent_backref *node;
6830         struct tree_backref *back;
6831
6832         while(cur != &rec->backrefs) {
6833                 node = to_extent_backref(cur);
6834                 cur = cur->next;
6835                 if (node->is_data)
6836                         continue;
6837                 back = to_tree_backref(node);
6838                 if (parent > 0) {
6839                         if (!node->full_backref)
6840                                 continue;
6841                         if (parent == back->parent)
6842                                 return back;
6843                 } else {
6844                         if (node->full_backref)
6845                                 continue;
6846                         if (back->root == root)
6847                                 return back;
6848                 }
6849         }
6850         return NULL;
6851 }
6852 #endif
6853
6854 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6855                                                 u64 parent, u64 root)
6856 {
6857         struct tree_backref *ref = malloc(sizeof(*ref));
6858
6859         if (!ref)
6860                 return NULL;
6861         memset(&ref->node, 0, sizeof(ref->node));
6862         if (parent > 0) {
6863                 ref->parent = parent;
6864                 ref->node.full_backref = 1;
6865         } else {
6866                 ref->root = root;
6867                 ref->node.full_backref = 0;
6868         }
6869
6870         return ref;
6871 }
6872
6873 #if 0
6874 static struct data_backref *find_data_backref(struct extent_record *rec,
6875                                                 u64 parent, u64 root,
6876                                                 u64 owner, u64 offset,
6877                                                 int found_ref,
6878                                                 u64 disk_bytenr, u64 bytes)
6879 {
6880         struct list_head *cur = rec->backrefs.next;
6881         struct extent_backref *node;
6882         struct data_backref *back;
6883
6884         while(cur != &rec->backrefs) {
6885                 node = to_extent_backref(cur);
6886                 cur = cur->next;
6887                 if (!node->is_data)
6888                         continue;
6889                 back = to_data_backref(node);
6890                 if (parent > 0) {
6891                         if (!node->full_backref)
6892                                 continue;
6893                         if (parent == back->parent)
6894                                 return back;
6895                 } else {
6896                         if (node->full_backref)
6897                                 continue;
6898                         if (back->root == root && back->owner == owner &&
6899                             back->offset == offset) {
6900                                 if (found_ref && node->found_ref &&
6901                                     (back->bytes != bytes ||
6902                                     back->disk_bytenr != disk_bytenr))
6903                                         continue;
6904                                 return back;
6905                         }
6906                 }
6907         }
6908         return NULL;
6909 }
6910 #endif
6911
6912 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6913                                                 u64 parent, u64 root,
6914                                                 u64 owner, u64 offset,
6915                                                 u64 max_size)
6916 {
6917         struct data_backref *ref = malloc(sizeof(*ref));
6918
6919         if (!ref)
6920                 return NULL;
6921         memset(&ref->node, 0, sizeof(ref->node));
6922         ref->node.is_data = 1;
6923
6924         if (parent > 0) {
6925                 ref->parent = parent;
6926                 ref->owner = 0;
6927                 ref->offset = 0;
6928                 ref->node.full_backref = 1;
6929         } else {
6930                 ref->root = root;
6931                 ref->owner = owner;
6932                 ref->offset = offset;
6933                 ref->node.full_backref = 0;
6934         }
6935         ref->bytes = max_size;
6936         ref->found_ref = 0;
6937         ref->num_refs = 0;
6938         if (max_size > rec->max_size)
6939                 rec->max_size = max_size;
6940         return ref;
6941 }
6942
6943 /* Check if the type of extent matches with its chunk */
6944 static void check_extent_type(struct extent_record *rec)
6945 {
6946         struct btrfs_block_group_cache *bg_cache;
6947
6948         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6949         if (!bg_cache)
6950                 return;
6951
6952         /* data extent, check chunk directly*/
6953         if (!rec->metadata) {
6954                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6955                         rec->wrong_chunk_type = 1;
6956                 return;
6957         }
6958
6959         /* metadata extent, check the obvious case first */
6960         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6961                                  BTRFS_BLOCK_GROUP_METADATA))) {
6962                 rec->wrong_chunk_type = 1;
6963                 return;
6964         }
6965
6966         /*
6967          * Check SYSTEM extent, as it's also marked as metadata, we can only
6968          * make sure it's a SYSTEM extent by its backref
6969          */
6970         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
6971                 struct extent_backref *node;
6972                 struct tree_backref *tback;
6973                 u64 bg_type;
6974
6975                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
6976                 if (node->is_data) {
6977                         /* tree block shouldn't have data backref */
6978                         rec->wrong_chunk_type = 1;
6979                         return;
6980                 }
6981                 tback = container_of(node, struct tree_backref, node);
6982
6983                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6984                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6985                 else
6986                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
6987                 if (!(bg_cache->flags & bg_type))
6988                         rec->wrong_chunk_type = 1;
6989         }
6990 }
6991
6992 /*
6993  * Allocate a new extent record, fill default values from @tmpl and insert int
6994  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6995  * the cache, otherwise it fails.
6996  */
6997 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6998                 struct extent_record *tmpl)
6999 {
7000         struct extent_record *rec;
7001         int ret = 0;
7002
7003         BUG_ON(tmpl->max_size == 0);
7004         rec = malloc(sizeof(*rec));
7005         if (!rec)
7006                 return -ENOMEM;
7007         rec->start = tmpl->start;
7008         rec->max_size = tmpl->max_size;
7009         rec->nr = max(tmpl->nr, tmpl->max_size);
7010         rec->found_rec = tmpl->found_rec;
7011         rec->content_checked = tmpl->content_checked;
7012         rec->owner_ref_checked = tmpl->owner_ref_checked;
7013         rec->num_duplicates = 0;
7014         rec->metadata = tmpl->metadata;
7015         rec->flag_block_full_backref = FLAG_UNSET;
7016         rec->bad_full_backref = 0;
7017         rec->crossing_stripes = 0;
7018         rec->wrong_chunk_type = 0;
7019         rec->is_root = tmpl->is_root;
7020         rec->refs = tmpl->refs;
7021         rec->extent_item_refs = tmpl->extent_item_refs;
7022         rec->parent_generation = tmpl->parent_generation;
7023         INIT_LIST_HEAD(&rec->backrefs);
7024         INIT_LIST_HEAD(&rec->dups);
7025         INIT_LIST_HEAD(&rec->list);
7026         rec->backref_tree = RB_ROOT;
7027         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7028         rec->cache.start = tmpl->start;
7029         rec->cache.size = tmpl->nr;
7030         ret = insert_cache_extent(extent_cache, &rec->cache);
7031         if (ret) {
7032                 free(rec);
7033                 return ret;
7034         }
7035         bytes_used += rec->nr;
7036
7037         if (tmpl->metadata)
7038                 rec->crossing_stripes = check_crossing_stripes(global_info,
7039                                 rec->start, global_info->nodesize);
7040         check_extent_type(rec);
7041         return ret;
7042 }
7043
7044 /*
7045  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7046  * some are hints:
7047  * - refs              - if found, increase refs
7048  * - is_root           - if found, set
7049  * - content_checked   - if found, set
7050  * - owner_ref_checked - if found, set
7051  *
7052  * If not found, create a new one, initialize and insert.
7053  */
7054 static int add_extent_rec(struct cache_tree *extent_cache,
7055                 struct extent_record *tmpl)
7056 {
7057         struct extent_record *rec;
7058         struct cache_extent *cache;
7059         int ret = 0;
7060         int dup = 0;
7061
7062         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7063         if (cache) {
7064                 rec = container_of(cache, struct extent_record, cache);
7065                 if (tmpl->refs)
7066                         rec->refs++;
7067                 if (rec->nr == 1)
7068                         rec->nr = max(tmpl->nr, tmpl->max_size);
7069
7070                 /*
7071                  * We need to make sure to reset nr to whatever the extent
7072                  * record says was the real size, this way we can compare it to
7073                  * the backrefs.
7074                  */
7075                 if (tmpl->found_rec) {
7076                         if (tmpl->start != rec->start || rec->found_rec) {
7077                                 struct extent_record *tmp;
7078
7079                                 dup = 1;
7080                                 if (list_empty(&rec->list))
7081                                         list_add_tail(&rec->list,
7082                                                       &duplicate_extents);
7083
7084                                 /*
7085                                  * We have to do this song and dance in case we
7086                                  * find an extent record that falls inside of
7087                                  * our current extent record but does not have
7088                                  * the same objectid.
7089                                  */
7090                                 tmp = malloc(sizeof(*tmp));
7091                                 if (!tmp)
7092                                         return -ENOMEM;
7093                                 tmp->start = tmpl->start;
7094                                 tmp->max_size = tmpl->max_size;
7095                                 tmp->nr = tmpl->nr;
7096                                 tmp->found_rec = 1;
7097                                 tmp->metadata = tmpl->metadata;
7098                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7099                                 INIT_LIST_HEAD(&tmp->list);
7100                                 list_add_tail(&tmp->list, &rec->dups);
7101                                 rec->num_duplicates++;
7102                         } else {
7103                                 rec->nr = tmpl->nr;
7104                                 rec->found_rec = 1;
7105                         }
7106                 }
7107
7108                 if (tmpl->extent_item_refs && !dup) {
7109                         if (rec->extent_item_refs) {
7110                                 fprintf(stderr, "block %llu rec "
7111                                         "extent_item_refs %llu, passed %llu\n",
7112                                         (unsigned long long)tmpl->start,
7113                                         (unsigned long long)
7114                                                         rec->extent_item_refs,
7115                                         (unsigned long long)tmpl->extent_item_refs);
7116                         }
7117                         rec->extent_item_refs = tmpl->extent_item_refs;
7118                 }
7119                 if (tmpl->is_root)
7120                         rec->is_root = 1;
7121                 if (tmpl->content_checked)
7122                         rec->content_checked = 1;
7123                 if (tmpl->owner_ref_checked)
7124                         rec->owner_ref_checked = 1;
7125                 memcpy(&rec->parent_key, &tmpl->parent_key,
7126                                 sizeof(tmpl->parent_key));
7127                 if (tmpl->parent_generation)
7128                         rec->parent_generation = tmpl->parent_generation;
7129                 if (rec->max_size < tmpl->max_size)
7130                         rec->max_size = tmpl->max_size;
7131
7132                 /*
7133                  * A metadata extent can't cross stripe_len boundary, otherwise
7134                  * kernel scrub won't be able to handle it.
7135                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7136                  * it.
7137                  */
7138                 if (tmpl->metadata)
7139                         rec->crossing_stripes = check_crossing_stripes(
7140                                         global_info, rec->start,
7141                                         global_info->nodesize);
7142                 check_extent_type(rec);
7143                 maybe_free_extent_rec(extent_cache, rec);
7144                 return ret;
7145         }
7146
7147         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7148
7149         return ret;
7150 }
7151
7152 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7153                             u64 parent, u64 root, int found_ref)
7154 {
7155         struct extent_record *rec;
7156         struct tree_backref *back;
7157         struct cache_extent *cache;
7158         int ret;
7159         bool insert = false;
7160
7161         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7162         if (!cache) {
7163                 struct extent_record tmpl;
7164
7165                 memset(&tmpl, 0, sizeof(tmpl));
7166                 tmpl.start = bytenr;
7167                 tmpl.nr = 1;
7168                 tmpl.metadata = 1;
7169                 tmpl.max_size = 1;
7170
7171                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7172                 if (ret)
7173                         return ret;
7174
7175                 /* really a bug in cache_extent implement now */
7176                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7177                 if (!cache)
7178                         return -ENOENT;
7179         }
7180
7181         rec = container_of(cache, struct extent_record, cache);
7182         if (rec->start != bytenr) {
7183                 /*
7184                  * Several cause, from unaligned bytenr to over lapping extents
7185                  */
7186                 return -EEXIST;
7187         }
7188
7189         back = find_tree_backref(rec, parent, root);
7190         if (!back) {
7191                 back = alloc_tree_backref(rec, parent, root);
7192                 if (!back)
7193                         return -ENOMEM;
7194                 insert = true;
7195         }
7196
7197         if (found_ref) {
7198                 if (back->node.found_ref) {
7199                         fprintf(stderr, "Extent back ref already exists "
7200                                 "for %llu parent %llu root %llu \n",
7201                                 (unsigned long long)bytenr,
7202                                 (unsigned long long)parent,
7203                                 (unsigned long long)root);
7204                 }
7205                 back->node.found_ref = 1;
7206         } else {
7207                 if (back->node.found_extent_tree) {
7208                         fprintf(stderr, "Extent back ref already exists "
7209                                 "for %llu parent %llu root %llu \n",
7210                                 (unsigned long long)bytenr,
7211                                 (unsigned long long)parent,
7212                                 (unsigned long long)root);
7213                 }
7214                 back->node.found_extent_tree = 1;
7215         }
7216         if (insert)
7217                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7218                         compare_extent_backref));
7219         check_extent_type(rec);
7220         maybe_free_extent_rec(extent_cache, rec);
7221         return 0;
7222 }
7223
7224 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7225                             u64 parent, u64 root, u64 owner, u64 offset,
7226                             u32 num_refs, int found_ref, u64 max_size)
7227 {
7228         struct extent_record *rec;
7229         struct data_backref *back;
7230         struct cache_extent *cache;
7231         int ret;
7232         bool insert = false;
7233
7234         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7235         if (!cache) {
7236                 struct extent_record tmpl;
7237
7238                 memset(&tmpl, 0, sizeof(tmpl));
7239                 tmpl.start = bytenr;
7240                 tmpl.nr = 1;
7241                 tmpl.max_size = max_size;
7242
7243                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7244                 if (ret)
7245                         return ret;
7246
7247                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7248                 if (!cache)
7249                         abort();
7250         }
7251
7252         rec = container_of(cache, struct extent_record, cache);
7253         if (rec->max_size < max_size)
7254                 rec->max_size = max_size;
7255
7256         /*
7257          * If found_ref is set then max_size is the real size and must match the
7258          * existing refs.  So if we have already found a ref then we need to
7259          * make sure that this ref matches the existing one, otherwise we need
7260          * to add a new backref so we can notice that the backrefs don't match
7261          * and we need to figure out who is telling the truth.  This is to
7262          * account for that awful fsync bug I introduced where we'd end up with
7263          * a btrfs_file_extent_item that would have its length include multiple
7264          * prealloc extents or point inside of a prealloc extent.
7265          */
7266         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7267                                  bytenr, max_size);
7268         if (!back) {
7269                 back = alloc_data_backref(rec, parent, root, owner, offset,
7270                                           max_size);
7271                 BUG_ON(!back);
7272                 insert = true;
7273         }
7274
7275         if (found_ref) {
7276                 BUG_ON(num_refs != 1);
7277                 if (back->node.found_ref)
7278                         BUG_ON(back->bytes != max_size);
7279                 back->node.found_ref = 1;
7280                 back->found_ref += 1;
7281                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7282                         back->bytes = max_size;
7283                         back->disk_bytenr = bytenr;
7284
7285                         /* Need to reinsert if not already in the tree */
7286                         if (!insert) {
7287                                 rb_erase(&back->node.node, &rec->backref_tree);
7288                                 insert = true;
7289                         }
7290                 }
7291                 rec->refs += 1;
7292                 rec->content_checked = 1;
7293                 rec->owner_ref_checked = 1;
7294         } else {
7295                 if (back->node.found_extent_tree) {
7296                         fprintf(stderr, "Extent back ref already exists "
7297                                 "for %llu parent %llu root %llu "
7298                                 "owner %llu offset %llu num_refs %lu\n",
7299                                 (unsigned long long)bytenr,
7300                                 (unsigned long long)parent,
7301                                 (unsigned long long)root,
7302                                 (unsigned long long)owner,
7303                                 (unsigned long long)offset,
7304                                 (unsigned long)num_refs);
7305                 }
7306                 back->num_refs = num_refs;
7307                 back->node.found_extent_tree = 1;
7308         }
7309         if (insert)
7310                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7311                         compare_extent_backref));
7312
7313         maybe_free_extent_rec(extent_cache, rec);
7314         return 0;
7315 }
7316
7317 static int add_pending(struct cache_tree *pending,
7318                        struct cache_tree *seen, u64 bytenr, u32 size)
7319 {
7320         int ret;
7321         ret = add_cache_extent(seen, bytenr, size);
7322         if (ret)
7323                 return ret;
7324         add_cache_extent(pending, bytenr, size);
7325         return 0;
7326 }
7327
7328 static int pick_next_pending(struct cache_tree *pending,
7329                         struct cache_tree *reada,
7330                         struct cache_tree *nodes,
7331                         u64 last, struct block_info *bits, int bits_nr,
7332                         int *reada_bits)
7333 {
7334         unsigned long node_start = last;
7335         struct cache_extent *cache;
7336         int ret;
7337
7338         cache = search_cache_extent(reada, 0);
7339         if (cache) {
7340                 bits[0].start = cache->start;
7341                 bits[0].size = cache->size;
7342                 *reada_bits = 1;
7343                 return 1;
7344         }
7345         *reada_bits = 0;
7346         if (node_start > 32768)
7347                 node_start -= 32768;
7348
7349         cache = search_cache_extent(nodes, node_start);
7350         if (!cache)
7351                 cache = search_cache_extent(nodes, 0);
7352
7353         if (!cache) {
7354                  cache = search_cache_extent(pending, 0);
7355                  if (!cache)
7356                          return 0;
7357                  ret = 0;
7358                  do {
7359                          bits[ret].start = cache->start;
7360                          bits[ret].size = cache->size;
7361                          cache = next_cache_extent(cache);
7362                          ret++;
7363                  } while (cache && ret < bits_nr);
7364                  return ret;
7365         }
7366
7367         ret = 0;
7368         do {
7369                 bits[ret].start = cache->start;
7370                 bits[ret].size = cache->size;
7371                 cache = next_cache_extent(cache);
7372                 ret++;
7373         } while (cache && ret < bits_nr);
7374
7375         if (bits_nr - ret > 8) {
7376                 u64 lookup = bits[0].start + bits[0].size;
7377                 struct cache_extent *next;
7378                 next = search_cache_extent(pending, lookup);
7379                 while(next) {
7380                         if (next->start - lookup > 32768)
7381                                 break;
7382                         bits[ret].start = next->start;
7383                         bits[ret].size = next->size;
7384                         lookup = next->start + next->size;
7385                         ret++;
7386                         if (ret == bits_nr)
7387                                 break;
7388                         next = next_cache_extent(next);
7389                         if (!next)
7390                                 break;
7391                 }
7392         }
7393         return ret;
7394 }
7395
7396 static void free_chunk_record(struct cache_extent *cache)
7397 {
7398         struct chunk_record *rec;
7399
7400         rec = container_of(cache, struct chunk_record, cache);
7401         list_del_init(&rec->list);
7402         list_del_init(&rec->dextents);
7403         free(rec);
7404 }
7405
7406 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7407 {
7408         cache_tree_free_extents(chunk_cache, free_chunk_record);
7409 }
7410
7411 static void free_device_record(struct rb_node *node)
7412 {
7413         struct device_record *rec;
7414
7415         rec = container_of(node, struct device_record, node);
7416         free(rec);
7417 }
7418
7419 FREE_RB_BASED_TREE(device_cache, free_device_record);
7420
7421 int insert_block_group_record(struct block_group_tree *tree,
7422                               struct block_group_record *bg_rec)
7423 {
7424         int ret;
7425
7426         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7427         if (ret)
7428                 return ret;
7429
7430         list_add_tail(&bg_rec->list, &tree->block_groups);
7431         return 0;
7432 }
7433
7434 static void free_block_group_record(struct cache_extent *cache)
7435 {
7436         struct block_group_record *rec;
7437
7438         rec = container_of(cache, struct block_group_record, cache);
7439         list_del_init(&rec->list);
7440         free(rec);
7441 }
7442
7443 void free_block_group_tree(struct block_group_tree *tree)
7444 {
7445         cache_tree_free_extents(&tree->tree, free_block_group_record);
7446 }
7447
7448 int insert_device_extent_record(struct device_extent_tree *tree,
7449                                 struct device_extent_record *de_rec)
7450 {
7451         int ret;
7452
7453         /*
7454          * Device extent is a bit different from the other extents, because
7455          * the extents which belong to the different devices may have the
7456          * same start and size, so we need use the special extent cache
7457          * search/insert functions.
7458          */
7459         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7460         if (ret)
7461                 return ret;
7462
7463         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7464         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7465         return 0;
7466 }
7467
7468 static void free_device_extent_record(struct cache_extent *cache)
7469 {
7470         struct device_extent_record *rec;
7471
7472         rec = container_of(cache, struct device_extent_record, cache);
7473         if (!list_empty(&rec->chunk_list))
7474                 list_del_init(&rec->chunk_list);
7475         if (!list_empty(&rec->device_list))
7476                 list_del_init(&rec->device_list);
7477         free(rec);
7478 }
7479
7480 void free_device_extent_tree(struct device_extent_tree *tree)
7481 {
7482         cache_tree_free_extents(&tree->tree, free_device_extent_record);
7483 }
7484
7485 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7486 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7487                                  struct extent_buffer *leaf, int slot)
7488 {
7489         struct btrfs_extent_ref_v0 *ref0;
7490         struct btrfs_key key;
7491         int ret;
7492
7493         btrfs_item_key_to_cpu(leaf, &key, slot);
7494         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7495         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7496                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7497                                 0, 0);
7498         } else {
7499                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7500                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7501         }
7502         return ret;
7503 }
7504 #endif
7505
7506 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7507                                             struct btrfs_key *key,
7508                                             int slot)
7509 {
7510         struct btrfs_chunk *ptr;
7511         struct chunk_record *rec;
7512         int num_stripes, i;
7513
7514         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7515         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7516
7517         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7518         if (!rec) {
7519                 fprintf(stderr, "memory allocation failed\n");
7520                 exit(-1);
7521         }
7522
7523         INIT_LIST_HEAD(&rec->list);
7524         INIT_LIST_HEAD(&rec->dextents);
7525         rec->bg_rec = NULL;
7526
7527         rec->cache.start = key->offset;
7528         rec->cache.size = btrfs_chunk_length(leaf, ptr);
7529
7530         rec->generation = btrfs_header_generation(leaf);
7531
7532         rec->objectid = key->objectid;
7533         rec->type = key->type;
7534         rec->offset = key->offset;
7535
7536         rec->length = rec->cache.size;
7537         rec->owner = btrfs_chunk_owner(leaf, ptr);
7538         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7539         rec->type_flags = btrfs_chunk_type(leaf, ptr);
7540         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7541         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7542         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7543         rec->num_stripes = num_stripes;
7544         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7545
7546         for (i = 0; i < rec->num_stripes; ++i) {
7547                 rec->stripes[i].devid =
7548                         btrfs_stripe_devid_nr(leaf, ptr, i);
7549                 rec->stripes[i].offset =
7550                         btrfs_stripe_offset_nr(leaf, ptr, i);
7551                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7552                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7553                                 BTRFS_UUID_SIZE);
7554         }
7555
7556         return rec;
7557 }
7558
7559 static int process_chunk_item(struct cache_tree *chunk_cache,
7560                               struct btrfs_key *key, struct extent_buffer *eb,
7561                               int slot)
7562 {
7563         struct chunk_record *rec;
7564         struct btrfs_chunk *chunk;
7565         int ret = 0;
7566
7567         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7568         /*
7569          * Do extra check for this chunk item,
7570          *
7571          * It's still possible one can craft a leaf with CHUNK_ITEM, with
7572          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7573          * and owner<->key_type check.
7574          */
7575         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7576                                       key->offset);
7577         if (ret < 0) {
7578                 error("chunk(%llu, %llu) is not valid, ignore it",
7579                       key->offset, btrfs_chunk_length(eb, chunk));
7580                 return 0;
7581         }
7582         rec = btrfs_new_chunk_record(eb, key, slot);
7583         ret = insert_cache_extent(chunk_cache, &rec->cache);
7584         if (ret) {
7585                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7586                         rec->offset, rec->length);
7587                 free(rec);
7588         }
7589
7590         return ret;
7591 }
7592
7593 static int process_device_item(struct rb_root *dev_cache,
7594                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7595 {
7596         struct btrfs_dev_item *ptr;
7597         struct device_record *rec;
7598         int ret = 0;
7599
7600         ptr = btrfs_item_ptr(eb,
7601                 slot, struct btrfs_dev_item);
7602
7603         rec = malloc(sizeof(*rec));
7604         if (!rec) {
7605                 fprintf(stderr, "memory allocation failed\n");
7606                 return -ENOMEM;
7607         }
7608
7609         rec->devid = key->offset;
7610         rec->generation = btrfs_header_generation(eb);
7611
7612         rec->objectid = key->objectid;
7613         rec->type = key->type;
7614         rec->offset = key->offset;
7615
7616         rec->devid = btrfs_device_id(eb, ptr);
7617         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7618         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7619
7620         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7621         if (ret) {
7622                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7623                 free(rec);
7624         }
7625
7626         return ret;
7627 }
7628
7629 struct block_group_record *
7630 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7631                              int slot)
7632 {
7633         struct btrfs_block_group_item *ptr;
7634         struct block_group_record *rec;
7635
7636         rec = calloc(1, sizeof(*rec));
7637         if (!rec) {
7638                 fprintf(stderr, "memory allocation failed\n");
7639                 exit(-1);
7640         }
7641
7642         rec->cache.start = key->objectid;
7643         rec->cache.size = key->offset;
7644
7645         rec->generation = btrfs_header_generation(leaf);
7646
7647         rec->objectid = key->objectid;
7648         rec->type = key->type;
7649         rec->offset = key->offset;
7650
7651         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7652         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7653
7654         INIT_LIST_HEAD(&rec->list);
7655
7656         return rec;
7657 }
7658
7659 static int process_block_group_item(struct block_group_tree *block_group_cache,
7660                                     struct btrfs_key *key,
7661                                     struct extent_buffer *eb, int slot)
7662 {
7663         struct block_group_record *rec;
7664         int ret = 0;
7665
7666         rec = btrfs_new_block_group_record(eb, key, slot);
7667         ret = insert_block_group_record(block_group_cache, rec);
7668         if (ret) {
7669                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7670                         rec->objectid, rec->offset);
7671                 free(rec);
7672         }
7673
7674         return ret;
7675 }
7676
7677 struct device_extent_record *
7678 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7679                                struct btrfs_key *key, int slot)
7680 {
7681         struct device_extent_record *rec;
7682         struct btrfs_dev_extent *ptr;
7683
7684         rec = calloc(1, sizeof(*rec));
7685         if (!rec) {
7686                 fprintf(stderr, "memory allocation failed\n");
7687                 exit(-1);
7688         }
7689
7690         rec->cache.objectid = key->objectid;
7691         rec->cache.start = key->offset;
7692
7693         rec->generation = btrfs_header_generation(leaf);
7694
7695         rec->objectid = key->objectid;
7696         rec->type = key->type;
7697         rec->offset = key->offset;
7698
7699         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7700         rec->chunk_objecteid =
7701                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7702         rec->chunk_offset =
7703                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7704         rec->length = btrfs_dev_extent_length(leaf, ptr);
7705         rec->cache.size = rec->length;
7706
7707         INIT_LIST_HEAD(&rec->chunk_list);
7708         INIT_LIST_HEAD(&rec->device_list);
7709
7710         return rec;
7711 }
7712
7713 static int
7714 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7715                            struct btrfs_key *key, struct extent_buffer *eb,
7716                            int slot)
7717 {
7718         struct device_extent_record *rec;
7719         int ret;
7720
7721         rec = btrfs_new_device_extent_record(eb, key, slot);
7722         ret = insert_device_extent_record(dev_extent_cache, rec);
7723         if (ret) {
7724                 fprintf(stderr,
7725                         "Device extent[%llu, %llu, %llu] existed.\n",
7726                         rec->objectid, rec->offset, rec->length);
7727                 free(rec);
7728         }
7729
7730         return ret;
7731 }
7732
7733 static int process_extent_item(struct btrfs_root *root,
7734                                struct cache_tree *extent_cache,
7735                                struct extent_buffer *eb, int slot)
7736 {
7737         struct btrfs_extent_item *ei;
7738         struct btrfs_extent_inline_ref *iref;
7739         struct btrfs_extent_data_ref *dref;
7740         struct btrfs_shared_data_ref *sref;
7741         struct btrfs_key key;
7742         struct extent_record tmpl;
7743         unsigned long end;
7744         unsigned long ptr;
7745         int ret;
7746         int type;
7747         u32 item_size = btrfs_item_size_nr(eb, slot);
7748         u64 refs = 0;
7749         u64 offset;
7750         u64 num_bytes;
7751         int metadata = 0;
7752
7753         btrfs_item_key_to_cpu(eb, &key, slot);
7754
7755         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7756                 metadata = 1;
7757                 num_bytes = root->fs_info->nodesize;
7758         } else {
7759                 num_bytes = key.offset;
7760         }
7761
7762         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7763                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7764                       key.objectid, root->fs_info->sectorsize);
7765                 return -EIO;
7766         }
7767         if (item_size < sizeof(*ei)) {
7768 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7769                 struct btrfs_extent_item_v0 *ei0;
7770                 BUG_ON(item_size != sizeof(*ei0));
7771                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7772                 refs = btrfs_extent_refs_v0(eb, ei0);
7773 #else
7774                 BUG();
7775 #endif
7776                 memset(&tmpl, 0, sizeof(tmpl));
7777                 tmpl.start = key.objectid;
7778                 tmpl.nr = num_bytes;
7779                 tmpl.extent_item_refs = refs;
7780                 tmpl.metadata = metadata;
7781                 tmpl.found_rec = 1;
7782                 tmpl.max_size = num_bytes;
7783
7784                 return add_extent_rec(extent_cache, &tmpl);
7785         }
7786
7787         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7788         refs = btrfs_extent_refs(eb, ei);
7789         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7790                 metadata = 1;
7791         else
7792                 metadata = 0;
7793         if (metadata && num_bytes != root->fs_info->nodesize) {
7794                 error("ignore invalid metadata extent, length %llu does not equal to %u",
7795                       num_bytes, root->fs_info->nodesize);
7796                 return -EIO;
7797         }
7798         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7799                 error("ignore invalid data extent, length %llu is not aligned to %u",
7800                       num_bytes, root->fs_info->sectorsize);
7801                 return -EIO;
7802         }
7803
7804         memset(&tmpl, 0, sizeof(tmpl));
7805         tmpl.start = key.objectid;
7806         tmpl.nr = num_bytes;
7807         tmpl.extent_item_refs = refs;
7808         tmpl.metadata = metadata;
7809         tmpl.found_rec = 1;
7810         tmpl.max_size = num_bytes;
7811         add_extent_rec(extent_cache, &tmpl);
7812
7813         ptr = (unsigned long)(ei + 1);
7814         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7815             key.type == BTRFS_EXTENT_ITEM_KEY)
7816                 ptr += sizeof(struct btrfs_tree_block_info);
7817
7818         end = (unsigned long)ei + item_size;
7819         while (ptr < end) {
7820                 iref = (struct btrfs_extent_inline_ref *)ptr;
7821                 type = btrfs_extent_inline_ref_type(eb, iref);
7822                 offset = btrfs_extent_inline_ref_offset(eb, iref);
7823                 switch (type) {
7824                 case BTRFS_TREE_BLOCK_REF_KEY:
7825                         ret = add_tree_backref(extent_cache, key.objectid,
7826                                         0, offset, 0);
7827                         if (ret < 0)
7828                                 error(
7829                         "add_tree_backref failed (extent items tree block): %s",
7830                                       strerror(-ret));
7831                         break;
7832                 case BTRFS_SHARED_BLOCK_REF_KEY:
7833                         ret = add_tree_backref(extent_cache, key.objectid,
7834                                         offset, 0, 0);
7835                         if (ret < 0)
7836                                 error(
7837                         "add_tree_backref failed (extent items shared block): %s",
7838                                       strerror(-ret));
7839                         break;
7840                 case BTRFS_EXTENT_DATA_REF_KEY:
7841                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7842                         add_data_backref(extent_cache, key.objectid, 0,
7843                                         btrfs_extent_data_ref_root(eb, dref),
7844                                         btrfs_extent_data_ref_objectid(eb,
7845                                                                        dref),
7846                                         btrfs_extent_data_ref_offset(eb, dref),
7847                                         btrfs_extent_data_ref_count(eb, dref),
7848                                         0, num_bytes);
7849                         break;
7850                 case BTRFS_SHARED_DATA_REF_KEY:
7851                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
7852                         add_data_backref(extent_cache, key.objectid, offset,
7853                                         0, 0, 0,
7854                                         btrfs_shared_data_ref_count(eb, sref),
7855                                         0, num_bytes);
7856                         break;
7857                 default:
7858                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7859                                 key.objectid, key.type, num_bytes);
7860                         goto out;
7861                 }
7862                 ptr += btrfs_extent_inline_ref_size(type);
7863         }
7864         WARN_ON(ptr > end);
7865 out:
7866         return 0;
7867 }
7868
7869 static int check_cache_range(struct btrfs_root *root,
7870                              struct btrfs_block_group_cache *cache,
7871                              u64 offset, u64 bytes)
7872 {
7873         struct btrfs_free_space *entry;
7874         u64 *logical;
7875         u64 bytenr;
7876         int stripe_len;
7877         int i, nr, ret;
7878
7879         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7880                 bytenr = btrfs_sb_offset(i);
7881                 ret = btrfs_rmap_block(root->fs_info,
7882                                        cache->key.objectid, bytenr, 0,
7883                                        &logical, &nr, &stripe_len);
7884                 if (ret)
7885                         return ret;
7886
7887                 while (nr--) {
7888                         if (logical[nr] + stripe_len <= offset)
7889                                 continue;
7890                         if (offset + bytes <= logical[nr])
7891                                 continue;
7892                         if (logical[nr] == offset) {
7893                                 if (stripe_len >= bytes) {
7894                                         free(logical);
7895                                         return 0;
7896                                 }
7897                                 bytes -= stripe_len;
7898                                 offset += stripe_len;
7899                         } else if (logical[nr] < offset) {
7900                                 if (logical[nr] + stripe_len >=
7901                                     offset + bytes) {
7902                                         free(logical);
7903                                         return 0;
7904                                 }
7905                                 bytes = (offset + bytes) -
7906                                         (logical[nr] + stripe_len);
7907                                 offset = logical[nr] + stripe_len;
7908                         } else {
7909                                 /*
7910                                  * Could be tricky, the super may land in the
7911                                  * middle of the area we're checking.  First
7912                                  * check the easiest case, it's at the end.
7913                                  */
7914                                 if (logical[nr] + stripe_len >=
7915                                     bytes + offset) {
7916                                         bytes = logical[nr] - offset;
7917                                         continue;
7918                                 }
7919
7920                                 /* Check the left side */
7921                                 ret = check_cache_range(root, cache,
7922                                                         offset,
7923                                                         logical[nr] - offset);
7924                                 if (ret) {
7925                                         free(logical);
7926                                         return ret;
7927                                 }
7928
7929                                 /* Now we continue with the right side */
7930                                 bytes = (offset + bytes) -
7931                                         (logical[nr] + stripe_len);
7932                                 offset = logical[nr] + stripe_len;
7933                         }
7934                 }
7935
7936                 free(logical);
7937         }
7938
7939         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7940         if (!entry) {
7941                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7942                         offset, offset+bytes);
7943                 return -EINVAL;
7944         }
7945
7946         if (entry->offset != offset) {
7947                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7948                         entry->offset);
7949                 return -EINVAL;
7950         }
7951
7952         if (entry->bytes != bytes) {
7953                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7954                         bytes, entry->bytes, offset);
7955                 return -EINVAL;
7956         }
7957
7958         unlink_free_space(cache->free_space_ctl, entry);
7959         free(entry);
7960         return 0;
7961 }
7962
7963 static int verify_space_cache(struct btrfs_root *root,
7964                               struct btrfs_block_group_cache *cache)
7965 {
7966         struct btrfs_path path;
7967         struct extent_buffer *leaf;
7968         struct btrfs_key key;
7969         u64 last;
7970         int ret = 0;
7971
7972         root = root->fs_info->extent_root;
7973
7974         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7975
7976         btrfs_init_path(&path);
7977         key.objectid = last;
7978         key.offset = 0;
7979         key.type = BTRFS_EXTENT_ITEM_KEY;
7980         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7981         if (ret < 0)
7982                 goto out;
7983         ret = 0;
7984         while (1) {
7985                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7986                         ret = btrfs_next_leaf(root, &path);
7987                         if (ret < 0)
7988                                 goto out;
7989                         if (ret > 0) {
7990                                 ret = 0;
7991                                 break;
7992                         }
7993                 }
7994                 leaf = path.nodes[0];
7995                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7996                 if (key.objectid >= cache->key.offset + cache->key.objectid)
7997                         break;
7998                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7999                     key.type != BTRFS_METADATA_ITEM_KEY) {
8000                         path.slots[0]++;
8001                         continue;
8002                 }
8003
8004                 if (last == key.objectid) {
8005                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
8006                                 last = key.objectid + key.offset;
8007                         else
8008                                 last = key.objectid + root->fs_info->nodesize;
8009                         path.slots[0]++;
8010                         continue;
8011                 }
8012
8013                 ret = check_cache_range(root, cache, last,
8014                                         key.objectid - last);
8015                 if (ret)
8016                         break;
8017                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8018                         last = key.objectid + key.offset;
8019                 else
8020                         last = key.objectid + root->fs_info->nodesize;
8021                 path.slots[0]++;
8022         }
8023
8024         if (last < cache->key.objectid + cache->key.offset)
8025                 ret = check_cache_range(root, cache, last,
8026                                         cache->key.objectid +
8027                                         cache->key.offset - last);
8028
8029 out:
8030         btrfs_release_path(&path);
8031
8032         if (!ret &&
8033             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8034                 fprintf(stderr, "There are still entries left in the space "
8035                         "cache\n");
8036                 ret = -EINVAL;
8037         }
8038
8039         return ret;
8040 }
8041
8042 static int check_space_cache(struct btrfs_root *root)
8043 {
8044         struct btrfs_block_group_cache *cache;
8045         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8046         int ret;
8047         int error = 0;
8048
8049         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8050             btrfs_super_generation(root->fs_info->super_copy) !=
8051             btrfs_super_cache_generation(root->fs_info->super_copy)) {
8052                 printf("cache and super generation don't match, space cache "
8053                        "will be invalidated\n");
8054                 return 0;
8055         }
8056
8057         if (ctx.progress_enabled) {
8058                 ctx.tp = TASK_FREE_SPACE;
8059                 task_start(ctx.info);
8060         }
8061
8062         while (1) {
8063                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8064                 if (!cache)
8065                         break;
8066
8067                 start = cache->key.objectid + cache->key.offset;
8068                 if (!cache->free_space_ctl) {
8069                         if (btrfs_init_free_space_ctl(cache,
8070                                                 root->fs_info->sectorsize)) {
8071                                 ret = -ENOMEM;
8072                                 break;
8073                         }
8074                 } else {
8075                         btrfs_remove_free_space_cache(cache);
8076                 }
8077
8078                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8079                         ret = exclude_super_stripes(root, cache);
8080                         if (ret) {
8081                                 fprintf(stderr, "could not exclude super stripes: %s\n",
8082                                         strerror(-ret));
8083                                 error++;
8084                                 continue;
8085                         }
8086                         ret = load_free_space_tree(root->fs_info, cache);
8087                         free_excluded_extents(root, cache);
8088                         if (ret < 0) {
8089                                 fprintf(stderr, "could not load free space tree: %s\n",
8090                                         strerror(-ret));
8091                                 error++;
8092                                 continue;
8093                         }
8094                         error += ret;
8095                 } else {
8096                         ret = load_free_space_cache(root->fs_info, cache);
8097                         if (!ret)
8098                                 continue;
8099                 }
8100
8101                 ret = verify_space_cache(root, cache);
8102                 if (ret) {
8103                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8104                                 cache->key.objectid);
8105                         error++;
8106                 }
8107         }
8108
8109         task_stop(ctx.info);
8110
8111         return error ? -EINVAL : 0;
8112 }
8113
8114 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8115                         u64 num_bytes, unsigned long leaf_offset,
8116                         struct extent_buffer *eb) {
8117
8118         struct btrfs_fs_info *fs_info = root->fs_info;
8119         u64 offset = 0;
8120         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8121         char *data;
8122         unsigned long csum_offset;
8123         u32 csum;
8124         u32 csum_expected;
8125         u64 read_len;
8126         u64 data_checked = 0;
8127         u64 tmp;
8128         int ret = 0;
8129         int mirror;
8130         int num_copies;
8131
8132         if (num_bytes % fs_info->sectorsize)
8133                 return -EINVAL;
8134
8135         data = malloc(num_bytes);
8136         if (!data)
8137                 return -ENOMEM;
8138
8139         while (offset < num_bytes) {
8140                 mirror = 0;
8141 again:
8142                 read_len = num_bytes - offset;
8143                 /* read as much space once a time */
8144                 ret = read_extent_data(fs_info, data + offset,
8145                                 bytenr + offset, &read_len, mirror);
8146                 if (ret)
8147                         goto out;
8148                 data_checked = 0;
8149                 /* verify every 4k data's checksum */
8150                 while (data_checked < read_len) {
8151                         csum = ~(u32)0;
8152                         tmp = offset + data_checked;
8153
8154                         csum = btrfs_csum_data((char *)data + tmp,
8155                                                csum, fs_info->sectorsize);
8156                         btrfs_csum_final(csum, (u8 *)&csum);
8157
8158                         csum_offset = leaf_offset +
8159                                  tmp / fs_info->sectorsize * csum_size;
8160                         read_extent_buffer(eb, (char *)&csum_expected,
8161                                            csum_offset, csum_size);
8162                         /* try another mirror */
8163                         if (csum != csum_expected) {
8164                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8165                                                 mirror, bytenr + tmp,
8166                                                 csum, csum_expected);
8167                                 num_copies = btrfs_num_copies(root->fs_info,
8168                                                 bytenr, num_bytes);
8169                                 if (mirror < num_copies - 1) {
8170                                         mirror += 1;
8171                                         goto again;
8172                                 }
8173                         }
8174                         data_checked += fs_info->sectorsize;
8175                 }
8176                 offset += read_len;
8177         }
8178 out:
8179         free(data);
8180         return ret;
8181 }
8182
8183 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8184                                u64 num_bytes)
8185 {
8186         struct btrfs_path path;
8187         struct extent_buffer *leaf;
8188         struct btrfs_key key;
8189         int ret;
8190
8191         btrfs_init_path(&path);
8192         key.objectid = bytenr;
8193         key.type = BTRFS_EXTENT_ITEM_KEY;
8194         key.offset = (u64)-1;
8195
8196 again:
8197         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8198                                 0, 0);
8199         if (ret < 0) {
8200                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8201                 btrfs_release_path(&path);
8202                 return ret;
8203         } else if (ret) {
8204                 if (path.slots[0] > 0) {
8205                         path.slots[0]--;
8206                 } else {
8207                         ret = btrfs_prev_leaf(root, &path);
8208                         if (ret < 0) {
8209                                 goto out;
8210                         } else if (ret > 0) {
8211                                 ret = 0;
8212                                 goto out;
8213                         }
8214                 }
8215         }
8216
8217         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8218
8219         /*
8220          * Block group items come before extent items if they have the same
8221          * bytenr, so walk back one more just in case.  Dear future traveller,
8222          * first congrats on mastering time travel.  Now if it's not too much
8223          * trouble could you go back to 2006 and tell Chris to make the
8224          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8225          * EXTENT_ITEM_KEY please?
8226          */
8227         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8228                 if (path.slots[0] > 0) {
8229                         path.slots[0]--;
8230                 } else {
8231                         ret = btrfs_prev_leaf(root, &path);
8232                         if (ret < 0) {
8233                                 goto out;
8234                         } else if (ret > 0) {
8235                                 ret = 0;
8236                                 goto out;
8237                         }
8238                 }
8239                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8240         }
8241
8242         while (num_bytes) {
8243                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8244                         ret = btrfs_next_leaf(root, &path);
8245                         if (ret < 0) {
8246                                 fprintf(stderr, "Error going to next leaf "
8247                                         "%d\n", ret);
8248                                 btrfs_release_path(&path);
8249                                 return ret;
8250                         } else if (ret) {
8251                                 break;
8252                         }
8253                 }
8254                 leaf = path.nodes[0];
8255                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8256                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8257                         path.slots[0]++;
8258                         continue;
8259                 }
8260                 if (key.objectid + key.offset < bytenr) {
8261                         path.slots[0]++;
8262                         continue;
8263                 }
8264                 if (key.objectid > bytenr + num_bytes)
8265                         break;
8266
8267                 if (key.objectid == bytenr) {
8268                         if (key.offset >= num_bytes) {
8269                                 num_bytes = 0;
8270                                 break;
8271                         }
8272                         num_bytes -= key.offset;
8273                         bytenr += key.offset;
8274                 } else if (key.objectid < bytenr) {
8275                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8276                                 num_bytes = 0;
8277                                 break;
8278                         }
8279                         num_bytes = (bytenr + num_bytes) -
8280                                 (key.objectid + key.offset);
8281                         bytenr = key.objectid + key.offset;
8282                 } else {
8283                         if (key.objectid + key.offset < bytenr + num_bytes) {
8284                                 u64 new_start = key.objectid + key.offset;
8285                                 u64 new_bytes = bytenr + num_bytes - new_start;
8286
8287                                 /*
8288                                  * Weird case, the extent is in the middle of
8289                                  * our range, we'll have to search one side
8290                                  * and then the other.  Not sure if this happens
8291                                  * in real life, but no harm in coding it up
8292                                  * anyway just in case.
8293                                  */
8294                                 btrfs_release_path(&path);
8295                                 ret = check_extent_exists(root, new_start,
8296                                                           new_bytes);
8297                                 if (ret) {
8298                                         fprintf(stderr, "Right section didn't "
8299                                                 "have a record\n");
8300                                         break;
8301                                 }
8302                                 num_bytes = key.objectid - bytenr;
8303                                 goto again;
8304                         }
8305                         num_bytes = key.objectid - bytenr;
8306                 }
8307                 path.slots[0]++;
8308         }
8309         ret = 0;
8310
8311 out:
8312         if (num_bytes && !ret) {
8313                 fprintf(stderr, "There are no extents for csum range "
8314                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8315                 ret = 1;
8316         }
8317
8318         btrfs_release_path(&path);
8319         return ret;
8320 }
8321
8322 static int check_csums(struct btrfs_root *root)
8323 {
8324         struct btrfs_path path;
8325         struct extent_buffer *leaf;
8326         struct btrfs_key key;
8327         u64 offset = 0, num_bytes = 0;
8328         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8329         int errors = 0;
8330         int ret;
8331         u64 data_len;
8332         unsigned long leaf_offset;
8333
8334         root = root->fs_info->csum_root;
8335         if (!extent_buffer_uptodate(root->node)) {
8336                 fprintf(stderr, "No valid csum tree found\n");
8337                 return -ENOENT;
8338         }
8339
8340         btrfs_init_path(&path);
8341         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8342         key.type = BTRFS_EXTENT_CSUM_KEY;
8343         key.offset = 0;
8344         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8345         if (ret < 0) {
8346                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8347                 btrfs_release_path(&path);
8348                 return ret;
8349         }
8350
8351         if (ret > 0 && path.slots[0])
8352                 path.slots[0]--;
8353         ret = 0;
8354
8355         while (1) {
8356                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8357                         ret = btrfs_next_leaf(root, &path);
8358                         if (ret < 0) {
8359                                 fprintf(stderr, "Error going to next leaf "
8360                                         "%d\n", ret);
8361                                 break;
8362                         }
8363                         if (ret)
8364                                 break;
8365                 }
8366                 leaf = path.nodes[0];
8367
8368                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8369                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8370                         path.slots[0]++;
8371                         continue;
8372                 }
8373
8374                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8375                               csum_size) * root->fs_info->sectorsize;
8376                 if (!check_data_csum)
8377                         goto skip_csum_check;
8378                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8379                 ret = check_extent_csums(root, key.offset, data_len,
8380                                          leaf_offset, leaf);
8381                 if (ret)
8382                         break;
8383 skip_csum_check:
8384                 if (!num_bytes) {
8385                         offset = key.offset;
8386                 } else if (key.offset != offset + num_bytes) {
8387                         ret = check_extent_exists(root, offset, num_bytes);
8388                         if (ret) {
8389                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8390                                         "there is no extent record\n",
8391                                         offset, offset+num_bytes);
8392                                 errors++;
8393                         }
8394                         offset = key.offset;
8395                         num_bytes = 0;
8396                 }
8397                 num_bytes += data_len;
8398                 path.slots[0]++;
8399         }
8400
8401         btrfs_release_path(&path);
8402         return errors;
8403 }
8404
8405 static int is_dropped_key(struct btrfs_key *key,
8406                           struct btrfs_key *drop_key) {
8407         if (key->objectid < drop_key->objectid)
8408                 return 1;
8409         else if (key->objectid == drop_key->objectid) {
8410                 if (key->type < drop_key->type)
8411                         return 1;
8412                 else if (key->type == drop_key->type) {
8413                         if (key->offset < drop_key->offset)
8414                                 return 1;
8415                 }
8416         }
8417         return 0;
8418 }
8419
8420 /*
8421  * Here are the rules for FULL_BACKREF.
8422  *
8423  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8424  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8425  *      FULL_BACKREF set.
8426  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
8427  *    if it happened after the relocation occurred since we'll have dropped the
8428  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8429  *    have no real way to know for sure.
8430  *
8431  * We process the blocks one root at a time, and we start from the lowest root
8432  * objectid and go to the highest.  So we can just lookup the owner backref for
8433  * the record and if we don't find it then we know it doesn't exist and we have
8434  * a FULL BACKREF.
8435  *
8436  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8437  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8438  * be set or not and then we can check later once we've gathered all the refs.
8439  */
8440 static int calc_extent_flag(struct cache_tree *extent_cache,
8441                            struct extent_buffer *buf,
8442                            struct root_item_record *ri,
8443                            u64 *flags)
8444 {
8445         struct extent_record *rec;
8446         struct cache_extent *cache;
8447         struct tree_backref *tback;
8448         u64 owner = 0;
8449
8450         cache = lookup_cache_extent(extent_cache, buf->start, 1);
8451         /* we have added this extent before */
8452         if (!cache)
8453                 return -ENOENT;
8454
8455         rec = container_of(cache, struct extent_record, cache);
8456
8457         /*
8458          * Except file/reloc tree, we can not have
8459          * FULL BACKREF MODE
8460          */
8461         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8462                 goto normal;
8463         /*
8464          * root node
8465          */
8466         if (buf->start == ri->bytenr)
8467                 goto normal;
8468
8469         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8470                 goto full_backref;
8471
8472         owner = btrfs_header_owner(buf);
8473         if (owner == ri->objectid)
8474                 goto normal;
8475
8476         tback = find_tree_backref(rec, 0, owner);
8477         if (!tback)
8478                 goto full_backref;
8479 normal:
8480         *flags = 0;
8481         if (rec->flag_block_full_backref != FLAG_UNSET &&
8482             rec->flag_block_full_backref != 0)
8483                 rec->bad_full_backref = 1;
8484         return 0;
8485 full_backref:
8486         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8487         if (rec->flag_block_full_backref != FLAG_UNSET &&
8488             rec->flag_block_full_backref != 1)
8489                 rec->bad_full_backref = 1;
8490         return 0;
8491 }
8492
8493 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8494 {
8495         fprintf(stderr, "Invalid key type(");
8496         print_key_type(stderr, 0, key_type);
8497         fprintf(stderr, ") found in root(");
8498         print_objectid(stderr, rootid, 0);
8499         fprintf(stderr, ")\n");
8500 }
8501
8502 /*
8503  * Check if the key is valid with its extent buffer.
8504  *
8505  * This is a early check in case invalid key exists in a extent buffer
8506  * This is not comprehensive yet, but should prevent wrong key/item passed
8507  * further
8508  */
8509 static int check_type_with_root(u64 rootid, u8 key_type)
8510 {
8511         switch (key_type) {
8512         /* Only valid in chunk tree */
8513         case BTRFS_DEV_ITEM_KEY:
8514         case BTRFS_CHUNK_ITEM_KEY:
8515                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8516                         goto err;
8517                 break;
8518         /* valid in csum and log tree */
8519         case BTRFS_CSUM_TREE_OBJECTID:
8520                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8521                       is_fstree(rootid)))
8522                         goto err;
8523                 break;
8524         case BTRFS_EXTENT_ITEM_KEY:
8525         case BTRFS_METADATA_ITEM_KEY:
8526         case BTRFS_BLOCK_GROUP_ITEM_KEY:
8527                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8528                         goto err;
8529                 break;
8530         case BTRFS_ROOT_ITEM_KEY:
8531                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8532                         goto err;
8533                 break;
8534         case BTRFS_DEV_EXTENT_KEY:
8535                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8536                         goto err;
8537                 break;
8538         }
8539         return 0;
8540 err:
8541         report_mismatch_key_root(key_type, rootid);
8542         return -EINVAL;
8543 }
8544
8545 static int run_next_block(struct btrfs_root *root,
8546                           struct block_info *bits,
8547                           int bits_nr,
8548                           u64 *last,
8549                           struct cache_tree *pending,
8550                           struct cache_tree *seen,
8551                           struct cache_tree *reada,
8552                           struct cache_tree *nodes,
8553                           struct cache_tree *extent_cache,
8554                           struct cache_tree *chunk_cache,
8555                           struct rb_root *dev_cache,
8556                           struct block_group_tree *block_group_cache,
8557                           struct device_extent_tree *dev_extent_cache,
8558                           struct root_item_record *ri)
8559 {
8560         struct btrfs_fs_info *fs_info = root->fs_info;
8561         struct extent_buffer *buf;
8562         struct extent_record *rec = NULL;
8563         u64 bytenr;
8564         u32 size;
8565         u64 parent;
8566         u64 owner;
8567         u64 flags;
8568         u64 ptr;
8569         u64 gen = 0;
8570         int ret = 0;
8571         int i;
8572         int nritems;
8573         struct btrfs_key key;
8574         struct cache_extent *cache;
8575         int reada_bits;
8576
8577         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8578                                     bits_nr, &reada_bits);
8579         if (nritems == 0)
8580                 return 1;
8581
8582         if (!reada_bits) {
8583                 for(i = 0; i < nritems; i++) {
8584                         ret = add_cache_extent(reada, bits[i].start,
8585                                                bits[i].size);
8586                         if (ret == -EEXIST)
8587                                 continue;
8588
8589                         /* fixme, get the parent transid */
8590                         readahead_tree_block(fs_info, bits[i].start, 0);
8591                 }
8592         }
8593         *last = bits[0].start;
8594         bytenr = bits[0].start;
8595         size = bits[0].size;
8596
8597         cache = lookup_cache_extent(pending, bytenr, size);
8598         if (cache) {
8599                 remove_cache_extent(pending, cache);
8600                 free(cache);
8601         }
8602         cache = lookup_cache_extent(reada, bytenr, size);
8603         if (cache) {
8604                 remove_cache_extent(reada, cache);
8605                 free(cache);
8606         }
8607         cache = lookup_cache_extent(nodes, bytenr, size);
8608         if (cache) {
8609                 remove_cache_extent(nodes, cache);
8610                 free(cache);
8611         }
8612         cache = lookup_cache_extent(extent_cache, bytenr, size);
8613         if (cache) {
8614                 rec = container_of(cache, struct extent_record, cache);
8615                 gen = rec->parent_generation;
8616         }
8617
8618         /* fixme, get the real parent transid */
8619         buf = read_tree_block(root->fs_info, bytenr, gen);
8620         if (!extent_buffer_uptodate(buf)) {
8621                 record_bad_block_io(root->fs_info,
8622                                     extent_cache, bytenr, size);
8623                 goto out;
8624         }
8625
8626         nritems = btrfs_header_nritems(buf);
8627
8628         flags = 0;
8629         if (!init_extent_tree) {
8630                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8631                                        btrfs_header_level(buf), 1, NULL,
8632                                        &flags);
8633                 if (ret < 0) {
8634                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8635                         if (ret < 0) {
8636                                 fprintf(stderr, "Couldn't calc extent flags\n");
8637                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8638                         }
8639                 }
8640         } else {
8641                 flags = 0;
8642                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8643                 if (ret < 0) {
8644                         fprintf(stderr, "Couldn't calc extent flags\n");
8645                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8646                 }
8647         }
8648
8649         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8650                 if (ri != NULL &&
8651                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8652                     ri->objectid == btrfs_header_owner(buf)) {
8653                         /*
8654                          * Ok we got to this block from it's original owner and
8655                          * we have FULL_BACKREF set.  Relocation can leave
8656                          * converted blocks over so this is altogether possible,
8657                          * however it's not possible if the generation > the
8658                          * last snapshot, so check for this case.
8659                          */
8660                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8661                             btrfs_header_generation(buf) > ri->last_snapshot) {
8662                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8663                                 rec->bad_full_backref = 1;
8664                         }
8665                 }
8666         } else {
8667                 if (ri != NULL &&
8668                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8669                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8670                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8671                         rec->bad_full_backref = 1;
8672                 }
8673         }
8674
8675         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8676                 rec->flag_block_full_backref = 1;
8677                 parent = bytenr;
8678                 owner = 0;
8679         } else {
8680                 rec->flag_block_full_backref = 0;
8681                 parent = 0;
8682                 owner = btrfs_header_owner(buf);
8683         }
8684
8685         ret = check_block(root, extent_cache, buf, flags);
8686         if (ret)
8687                 goto out;
8688
8689         if (btrfs_is_leaf(buf)) {
8690                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8691                 for (i = 0; i < nritems; i++) {
8692                         struct btrfs_file_extent_item *fi;
8693                         btrfs_item_key_to_cpu(buf, &key, i);
8694                         /*
8695                          * Check key type against the leaf owner.
8696                          * Could filter quite a lot of early error if
8697                          * owner is correct
8698                          */
8699                         if (check_type_with_root(btrfs_header_owner(buf),
8700                                                  key.type)) {
8701                                 fprintf(stderr, "ignoring invalid key\n");
8702                                 continue;
8703                         }
8704                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8705                                 process_extent_item(root, extent_cache, buf,
8706                                                     i);
8707                                 continue;
8708                         }
8709                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8710                                 process_extent_item(root, extent_cache, buf,
8711                                                     i);
8712                                 continue;
8713                         }
8714                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8715                                 total_csum_bytes +=
8716                                         btrfs_item_size_nr(buf, i);
8717                                 continue;
8718                         }
8719                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8720                                 process_chunk_item(chunk_cache, &key, buf, i);
8721                                 continue;
8722                         }
8723                         if (key.type == BTRFS_DEV_ITEM_KEY) {
8724                                 process_device_item(dev_cache, &key, buf, i);
8725                                 continue;
8726                         }
8727                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8728                                 process_block_group_item(block_group_cache,
8729                                         &key, buf, i);
8730                                 continue;
8731                         }
8732                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
8733                                 process_device_extent_item(dev_extent_cache,
8734                                         &key, buf, i);
8735                                 continue;
8736
8737                         }
8738                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8739 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8740                                 process_extent_ref_v0(extent_cache, buf, i);
8741 #else
8742                                 BUG();
8743 #endif
8744                                 continue;
8745                         }
8746
8747                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8748                                 ret = add_tree_backref(extent_cache,
8749                                                 key.objectid, 0, key.offset, 0);
8750                                 if (ret < 0)
8751                                         error(
8752                                 "add_tree_backref failed (leaf tree block): %s",
8753                                               strerror(-ret));
8754                                 continue;
8755                         }
8756                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8757                                 ret = add_tree_backref(extent_cache,
8758                                                 key.objectid, key.offset, 0, 0);
8759                                 if (ret < 0)
8760                                         error(
8761                                 "add_tree_backref failed (leaf shared block): %s",
8762                                               strerror(-ret));
8763                                 continue;
8764                         }
8765                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8766                                 struct btrfs_extent_data_ref *ref;
8767                                 ref = btrfs_item_ptr(buf, i,
8768                                                 struct btrfs_extent_data_ref);
8769                                 add_data_backref(extent_cache,
8770                                         key.objectid, 0,
8771                                         btrfs_extent_data_ref_root(buf, ref),
8772                                         btrfs_extent_data_ref_objectid(buf,
8773                                                                        ref),
8774                                         btrfs_extent_data_ref_offset(buf, ref),
8775                                         btrfs_extent_data_ref_count(buf, ref),
8776                                         0, root->fs_info->sectorsize);
8777                                 continue;
8778                         }
8779                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8780                                 struct btrfs_shared_data_ref *ref;
8781                                 ref = btrfs_item_ptr(buf, i,
8782                                                 struct btrfs_shared_data_ref);
8783                                 add_data_backref(extent_cache,
8784                                         key.objectid, key.offset, 0, 0, 0,
8785                                         btrfs_shared_data_ref_count(buf, ref),
8786                                         0, root->fs_info->sectorsize);
8787                                 continue;
8788                         }
8789                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8790                                 struct bad_item *bad;
8791
8792                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8793                                         continue;
8794                                 if (!owner)
8795                                         continue;
8796                                 bad = malloc(sizeof(struct bad_item));
8797                                 if (!bad)
8798                                         continue;
8799                                 INIT_LIST_HEAD(&bad->list);
8800                                 memcpy(&bad->key, &key,
8801                                        sizeof(struct btrfs_key));
8802                                 bad->root_id = owner;
8803                                 list_add_tail(&bad->list, &delete_items);
8804                                 continue;
8805                         }
8806                         if (key.type != BTRFS_EXTENT_DATA_KEY)
8807                                 continue;
8808                         fi = btrfs_item_ptr(buf, i,
8809                                             struct btrfs_file_extent_item);
8810                         if (btrfs_file_extent_type(buf, fi) ==
8811                             BTRFS_FILE_EXTENT_INLINE)
8812                                 continue;
8813                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8814                                 continue;
8815
8816                         data_bytes_allocated +=
8817                                 btrfs_file_extent_disk_num_bytes(buf, fi);
8818                         if (data_bytes_allocated < root->fs_info->sectorsize) {
8819                                 abort();
8820                         }
8821                         data_bytes_referenced +=
8822                                 btrfs_file_extent_num_bytes(buf, fi);
8823                         add_data_backref(extent_cache,
8824                                 btrfs_file_extent_disk_bytenr(buf, fi),
8825                                 parent, owner, key.objectid, key.offset -
8826                                 btrfs_file_extent_offset(buf, fi), 1, 1,
8827                                 btrfs_file_extent_disk_num_bytes(buf, fi));
8828                 }
8829         } else {
8830                 int level;
8831                 struct btrfs_key first_key;
8832
8833                 first_key.objectid = 0;
8834
8835                 if (nritems > 0)
8836                         btrfs_item_key_to_cpu(buf, &first_key, 0);
8837                 level = btrfs_header_level(buf);
8838                 for (i = 0; i < nritems; i++) {
8839                         struct extent_record tmpl;
8840
8841                         ptr = btrfs_node_blockptr(buf, i);
8842                         size = root->fs_info->nodesize;
8843                         btrfs_node_key_to_cpu(buf, &key, i);
8844                         if (ri != NULL) {
8845                                 if ((level == ri->drop_level)
8846                                     && is_dropped_key(&key, &ri->drop_key)) {
8847                                         continue;
8848                                 }
8849                         }
8850
8851                         memset(&tmpl, 0, sizeof(tmpl));
8852                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8853                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8854                         tmpl.start = ptr;
8855                         tmpl.nr = size;
8856                         tmpl.refs = 1;
8857                         tmpl.metadata = 1;
8858                         tmpl.max_size = size;
8859                         ret = add_extent_rec(extent_cache, &tmpl);
8860                         if (ret < 0)
8861                                 goto out;
8862
8863                         ret = add_tree_backref(extent_cache, ptr, parent,
8864                                         owner, 1);
8865                         if (ret < 0) {
8866                                 error(
8867                                 "add_tree_backref failed (non-leaf block): %s",
8868                                       strerror(-ret));
8869                                 continue;
8870                         }
8871
8872                         if (level > 1) {
8873                                 add_pending(nodes, seen, ptr, size);
8874                         } else {
8875                                 add_pending(pending, seen, ptr, size);
8876                         }
8877                 }
8878                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8879                                       nritems) * sizeof(struct btrfs_key_ptr);
8880         }
8881         total_btree_bytes += buf->len;
8882         if (fs_root_objectid(btrfs_header_owner(buf)))
8883                 total_fs_tree_bytes += buf->len;
8884         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8885                 total_extent_tree_bytes += buf->len;
8886 out:
8887         free_extent_buffer(buf);
8888         return ret;
8889 }
8890
8891 static int add_root_to_pending(struct extent_buffer *buf,
8892                                struct cache_tree *extent_cache,
8893                                struct cache_tree *pending,
8894                                struct cache_tree *seen,
8895                                struct cache_tree *nodes,
8896                                u64 objectid)
8897 {
8898         struct extent_record tmpl;
8899         int ret;
8900
8901         if (btrfs_header_level(buf) > 0)
8902                 add_pending(nodes, seen, buf->start, buf->len);
8903         else
8904                 add_pending(pending, seen, buf->start, buf->len);
8905
8906         memset(&tmpl, 0, sizeof(tmpl));
8907         tmpl.start = buf->start;
8908         tmpl.nr = buf->len;
8909         tmpl.is_root = 1;
8910         tmpl.refs = 1;
8911         tmpl.metadata = 1;
8912         tmpl.max_size = buf->len;
8913         add_extent_rec(extent_cache, &tmpl);
8914
8915         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8916             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8917                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8918                                 0, 1);
8919         else
8920                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8921                                 1);
8922         return ret;
8923 }
8924
8925 /* as we fix the tree, we might be deleting blocks that
8926  * we're tracking for repair.  This hook makes sure we
8927  * remove any backrefs for blocks as we are fixing them.
8928  */
8929 static int free_extent_hook(struct btrfs_trans_handle *trans,
8930                             struct btrfs_root *root,
8931                             u64 bytenr, u64 num_bytes, u64 parent,
8932                             u64 root_objectid, u64 owner, u64 offset,
8933                             int refs_to_drop)
8934 {
8935         struct extent_record *rec;
8936         struct cache_extent *cache;
8937         int is_data;
8938         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8939
8940         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8941         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8942         if (!cache)
8943                 return 0;
8944
8945         rec = container_of(cache, struct extent_record, cache);
8946         if (is_data) {
8947                 struct data_backref *back;
8948                 back = find_data_backref(rec, parent, root_objectid, owner,
8949                                          offset, 1, bytenr, num_bytes);
8950                 if (!back)
8951                         goto out;
8952                 if (back->node.found_ref) {
8953                         back->found_ref -= refs_to_drop;
8954                         if (rec->refs)
8955                                 rec->refs -= refs_to_drop;
8956                 }
8957                 if (back->node.found_extent_tree) {
8958                         back->num_refs -= refs_to_drop;
8959                         if (rec->extent_item_refs)
8960                                 rec->extent_item_refs -= refs_to_drop;
8961                 }
8962                 if (back->found_ref == 0)
8963                         back->node.found_ref = 0;
8964                 if (back->num_refs == 0)
8965                         back->node.found_extent_tree = 0;
8966
8967                 if (!back->node.found_extent_tree && back->node.found_ref) {
8968                         rb_erase(&back->node.node, &rec->backref_tree);
8969                         free(back);
8970                 }
8971         } else {
8972                 struct tree_backref *back;
8973                 back = find_tree_backref(rec, parent, root_objectid);
8974                 if (!back)
8975                         goto out;
8976                 if (back->node.found_ref) {
8977                         if (rec->refs)
8978                                 rec->refs--;
8979                         back->node.found_ref = 0;
8980                 }
8981                 if (back->node.found_extent_tree) {
8982                         if (rec->extent_item_refs)
8983                                 rec->extent_item_refs--;
8984                         back->node.found_extent_tree = 0;
8985                 }
8986                 if (!back->node.found_extent_tree && back->node.found_ref) {
8987                         rb_erase(&back->node.node, &rec->backref_tree);
8988                         free(back);
8989                 }
8990         }
8991         maybe_free_extent_rec(extent_cache, rec);
8992 out:
8993         return 0;
8994 }
8995
8996 static int delete_extent_records(struct btrfs_trans_handle *trans,
8997                                  struct btrfs_root *root,
8998                                  struct btrfs_path *path,
8999                                  u64 bytenr)
9000 {
9001         struct btrfs_key key;
9002         struct btrfs_key found_key;
9003         struct extent_buffer *leaf;
9004         int ret;
9005         int slot;
9006
9007
9008         key.objectid = bytenr;
9009         key.type = (u8)-1;
9010         key.offset = (u64)-1;
9011
9012         while(1) {
9013                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9014                                         &key, path, 0, 1);
9015                 if (ret < 0)
9016                         break;
9017
9018                 if (ret > 0) {
9019                         ret = 0;
9020                         if (path->slots[0] == 0)
9021                                 break;
9022                         path->slots[0]--;
9023                 }
9024                 ret = 0;
9025
9026                 leaf = path->nodes[0];
9027                 slot = path->slots[0];
9028
9029                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9030                 if (found_key.objectid != bytenr)
9031                         break;
9032
9033                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9034                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
9035                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9036                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9037                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9038                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9039                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9040                         btrfs_release_path(path);
9041                         if (found_key.type == 0) {
9042                                 if (found_key.offset == 0)
9043                                         break;
9044                                 key.offset = found_key.offset - 1;
9045                                 key.type = found_key.type;
9046                         }
9047                         key.type = found_key.type - 1;
9048                         key.offset = (u64)-1;
9049                         continue;
9050                 }
9051
9052                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9053                         found_key.objectid, found_key.type, found_key.offset);
9054
9055                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9056                 if (ret)
9057                         break;
9058                 btrfs_release_path(path);
9059
9060                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9061                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
9062                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9063                                 found_key.offset : root->fs_info->nodesize;
9064
9065                         ret = btrfs_update_block_group(trans, root, bytenr,
9066                                                        bytes, 0, 0);
9067                         if (ret)
9068                                 break;
9069                 }
9070         }
9071
9072         btrfs_release_path(path);
9073         return ret;
9074 }
9075
9076 /*
9077  * for a single backref, this will allocate a new extent
9078  * and add the backref to it.
9079  */
9080 static int record_extent(struct btrfs_trans_handle *trans,
9081                          struct btrfs_fs_info *info,
9082                          struct btrfs_path *path,
9083                          struct extent_record *rec,
9084                          struct extent_backref *back,
9085                          int allocated, u64 flags)
9086 {
9087         int ret = 0;
9088         struct btrfs_root *extent_root = info->extent_root;
9089         struct extent_buffer *leaf;
9090         struct btrfs_key ins_key;
9091         struct btrfs_extent_item *ei;
9092         struct data_backref *dback;
9093         struct btrfs_tree_block_info *bi;
9094
9095         if (!back->is_data)
9096                 rec->max_size = max_t(u64, rec->max_size,
9097                                     info->nodesize);
9098
9099         if (!allocated) {
9100                 u32 item_size = sizeof(*ei);
9101
9102                 if (!back->is_data)
9103                         item_size += sizeof(*bi);
9104
9105                 ins_key.objectid = rec->start;
9106                 ins_key.offset = rec->max_size;
9107                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9108
9109                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9110                                         &ins_key, item_size);
9111                 if (ret)
9112                         goto fail;
9113
9114                 leaf = path->nodes[0];
9115                 ei = btrfs_item_ptr(leaf, path->slots[0],
9116                                     struct btrfs_extent_item);
9117
9118                 btrfs_set_extent_refs(leaf, ei, 0);
9119                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9120
9121                 if (back->is_data) {
9122                         btrfs_set_extent_flags(leaf, ei,
9123                                                BTRFS_EXTENT_FLAG_DATA);
9124                 } else {
9125                         struct btrfs_disk_key copy_key;;
9126
9127                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9128                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9129                                              sizeof(*bi));
9130
9131                         btrfs_set_disk_key_objectid(&copy_key,
9132                                                     rec->info_objectid);
9133                         btrfs_set_disk_key_type(&copy_key, 0);
9134                         btrfs_set_disk_key_offset(&copy_key, 0);
9135
9136                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9137                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9138
9139                         btrfs_set_extent_flags(leaf, ei,
9140                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9141                 }
9142
9143                 btrfs_mark_buffer_dirty(leaf);
9144                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9145                                                rec->max_size, 1, 0);
9146                 if (ret)
9147                         goto fail;
9148                 btrfs_release_path(path);
9149         }
9150
9151         if (back->is_data) {
9152                 u64 parent;
9153                 int i;
9154
9155                 dback = to_data_backref(back);
9156                 if (back->full_backref)
9157                         parent = dback->parent;
9158                 else
9159                         parent = 0;
9160
9161                 for (i = 0; i < dback->found_ref; i++) {
9162                         /* if parent != 0, we're doing a full backref
9163                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9164                          * just makes the backref allocator create a data
9165                          * backref
9166                          */
9167                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9168                                                    rec->start, rec->max_size,
9169                                                    parent,
9170                                                    dback->root,
9171                                                    parent ?
9172                                                    BTRFS_FIRST_FREE_OBJECTID :
9173                                                    dback->owner,
9174                                                    dback->offset);
9175                         if (ret)
9176                                 break;
9177                 }
9178                 fprintf(stderr, "adding new data backref"
9179                                 " on %llu %s %llu owner %llu"
9180                                 " offset %llu found %d\n",
9181                                 (unsigned long long)rec->start,
9182                                 back->full_backref ?
9183                                 "parent" : "root",
9184                                 back->full_backref ?
9185                                 (unsigned long long)parent :
9186                                 (unsigned long long)dback->root,
9187                                 (unsigned long long)dback->owner,
9188                                 (unsigned long long)dback->offset,
9189                                 dback->found_ref);
9190         } else {
9191                 u64 parent;
9192                 struct tree_backref *tback;
9193
9194                 tback = to_tree_backref(back);
9195                 if (back->full_backref)
9196                         parent = tback->parent;
9197                 else
9198                         parent = 0;
9199
9200                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9201                                            rec->start, rec->max_size,
9202                                            parent, tback->root, 0, 0);
9203                 fprintf(stderr, "adding new tree backref on "
9204                         "start %llu len %llu parent %llu root %llu\n",
9205                         rec->start, rec->max_size, parent, tback->root);
9206         }
9207 fail:
9208         btrfs_release_path(path);
9209         return ret;
9210 }
9211
9212 static struct extent_entry *find_entry(struct list_head *entries,
9213                                        u64 bytenr, u64 bytes)
9214 {
9215         struct extent_entry *entry = NULL;
9216
9217         list_for_each_entry(entry, entries, list) {
9218                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9219                         return entry;
9220         }
9221
9222         return NULL;
9223 }
9224
9225 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9226 {
9227         struct extent_entry *entry, *best = NULL, *prev = NULL;
9228
9229         list_for_each_entry(entry, entries, list) {
9230                 /*
9231                  * If there are as many broken entries as entries then we know
9232                  * not to trust this particular entry.
9233                  */
9234                 if (entry->broken == entry->count)
9235                         continue;
9236
9237                 /*
9238                  * Special case, when there are only two entries and 'best' is
9239                  * the first one
9240                  */
9241                 if (!prev) {
9242                         best = entry;
9243                         prev = entry;
9244                         continue;
9245                 }
9246
9247                 /*
9248                  * If our current entry == best then we can't be sure our best
9249                  * is really the best, so we need to keep searching.
9250                  */
9251                 if (best && best->count == entry->count) {
9252                         prev = entry;
9253                         best = NULL;
9254                         continue;
9255                 }
9256
9257                 /* Prev == entry, not good enough, have to keep searching */
9258                 if (!prev->broken && prev->count == entry->count)
9259                         continue;
9260
9261                 if (!best)
9262                         best = (prev->count > entry->count) ? prev : entry;
9263                 else if (best->count < entry->count)
9264                         best = entry;
9265                 prev = entry;
9266         }
9267
9268         return best;
9269 }
9270
9271 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9272                       struct data_backref *dback, struct extent_entry *entry)
9273 {
9274         struct btrfs_trans_handle *trans;
9275         struct btrfs_root *root;
9276         struct btrfs_file_extent_item *fi;
9277         struct extent_buffer *leaf;
9278         struct btrfs_key key;
9279         u64 bytenr, bytes;
9280         int ret, err;
9281
9282         key.objectid = dback->root;
9283         key.type = BTRFS_ROOT_ITEM_KEY;
9284         key.offset = (u64)-1;
9285         root = btrfs_read_fs_root(info, &key);
9286         if (IS_ERR(root)) {
9287                 fprintf(stderr, "Couldn't find root for our ref\n");
9288                 return -EINVAL;
9289         }
9290
9291         /*
9292          * The backref points to the original offset of the extent if it was
9293          * split, so we need to search down to the offset we have and then walk
9294          * forward until we find the backref we're looking for.
9295          */
9296         key.objectid = dback->owner;
9297         key.type = BTRFS_EXTENT_DATA_KEY;
9298         key.offset = dback->offset;
9299         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9300         if (ret < 0) {
9301                 fprintf(stderr, "Error looking up ref %d\n", ret);
9302                 return ret;
9303         }
9304
9305         while (1) {
9306                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9307                         ret = btrfs_next_leaf(root, path);
9308                         if (ret) {
9309                                 fprintf(stderr, "Couldn't find our ref, next\n");
9310                                 return -EINVAL;
9311                         }
9312                 }
9313                 leaf = path->nodes[0];
9314                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9315                 if (key.objectid != dback->owner ||
9316                     key.type != BTRFS_EXTENT_DATA_KEY) {
9317                         fprintf(stderr, "Couldn't find our ref, search\n");
9318                         return -EINVAL;
9319                 }
9320                 fi = btrfs_item_ptr(leaf, path->slots[0],
9321                                     struct btrfs_file_extent_item);
9322                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9323                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9324
9325                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9326                         break;
9327                 path->slots[0]++;
9328         }
9329
9330         btrfs_release_path(path);
9331
9332         trans = btrfs_start_transaction(root, 1);
9333         if (IS_ERR(trans))
9334                 return PTR_ERR(trans);
9335
9336         /*
9337          * Ok we have the key of the file extent we want to fix, now we can cow
9338          * down to the thing and fix it.
9339          */
9340         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9341         if (ret < 0) {
9342                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9343                         key.objectid, key.type, key.offset, ret);
9344                 goto out;
9345         }
9346         if (ret > 0) {
9347                 fprintf(stderr, "Well that's odd, we just found this key "
9348                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9349                         key.offset);
9350                 ret = -EINVAL;
9351                 goto out;
9352         }
9353         leaf = path->nodes[0];
9354         fi = btrfs_item_ptr(leaf, path->slots[0],
9355                             struct btrfs_file_extent_item);
9356
9357         if (btrfs_file_extent_compression(leaf, fi) &&
9358             dback->disk_bytenr != entry->bytenr) {
9359                 fprintf(stderr, "Ref doesn't match the record start and is "
9360                         "compressed, please take a btrfs-image of this file "
9361                         "system and send it to a btrfs developer so they can "
9362                         "complete this functionality for bytenr %Lu\n",
9363                         dback->disk_bytenr);
9364                 ret = -EINVAL;
9365                 goto out;
9366         }
9367
9368         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9369                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9370         } else if (dback->disk_bytenr > entry->bytenr) {
9371                 u64 off_diff, offset;
9372
9373                 off_diff = dback->disk_bytenr - entry->bytenr;
9374                 offset = btrfs_file_extent_offset(leaf, fi);
9375                 if (dback->disk_bytenr + offset +
9376                     btrfs_file_extent_num_bytes(leaf, fi) >
9377                     entry->bytenr + entry->bytes) {
9378                         fprintf(stderr, "Ref is past the entry end, please "
9379                                 "take a btrfs-image of this file system and "
9380                                 "send it to a btrfs developer, ref %Lu\n",
9381                                 dback->disk_bytenr);
9382                         ret = -EINVAL;
9383                         goto out;
9384                 }
9385                 offset += off_diff;
9386                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9387                 btrfs_set_file_extent_offset(leaf, fi, offset);
9388         } else if (dback->disk_bytenr < entry->bytenr) {
9389                 u64 offset;
9390
9391                 offset = btrfs_file_extent_offset(leaf, fi);
9392                 if (dback->disk_bytenr + offset < entry->bytenr) {
9393                         fprintf(stderr, "Ref is before the entry start, please"
9394                                 " take a btrfs-image of this file system and "
9395                                 "send it to a btrfs developer, ref %Lu\n",
9396                                 dback->disk_bytenr);
9397                         ret = -EINVAL;
9398                         goto out;
9399                 }
9400
9401                 offset += dback->disk_bytenr;
9402                 offset -= entry->bytenr;
9403                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9404                 btrfs_set_file_extent_offset(leaf, fi, offset);
9405         }
9406
9407         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9408
9409         /*
9410          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9411          * only do this if we aren't using compression, otherwise it's a
9412          * trickier case.
9413          */
9414         if (!btrfs_file_extent_compression(leaf, fi))
9415                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9416         else
9417                 printf("ram bytes may be wrong?\n");
9418         btrfs_mark_buffer_dirty(leaf);
9419 out:
9420         err = btrfs_commit_transaction(trans, root);
9421         btrfs_release_path(path);
9422         return ret ? ret : err;
9423 }
9424
9425 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9426                            struct extent_record *rec)
9427 {
9428         struct extent_backref *back, *tmp;
9429         struct data_backref *dback;
9430         struct extent_entry *entry, *best = NULL;
9431         LIST_HEAD(entries);
9432         int nr_entries = 0;
9433         int broken_entries = 0;
9434         int ret = 0;
9435         short mismatch = 0;
9436
9437         /*
9438          * Metadata is easy and the backrefs should always agree on bytenr and
9439          * size, if not we've got bigger issues.
9440          */
9441         if (rec->metadata)
9442                 return 0;
9443
9444         rbtree_postorder_for_each_entry_safe(back, tmp,
9445                                              &rec->backref_tree, node) {
9446                 if (back->full_backref || !back->is_data)
9447                         continue;
9448
9449                 dback = to_data_backref(back);
9450
9451                 /*
9452                  * We only pay attention to backrefs that we found a real
9453                  * backref for.
9454                  */
9455                 if (dback->found_ref == 0)
9456                         continue;
9457
9458                 /*
9459                  * For now we only catch when the bytes don't match, not the
9460                  * bytenr.  We can easily do this at the same time, but I want
9461                  * to have a fs image to test on before we just add repair
9462                  * functionality willy-nilly so we know we won't screw up the
9463                  * repair.
9464                  */
9465
9466                 entry = find_entry(&entries, dback->disk_bytenr,
9467                                    dback->bytes);
9468                 if (!entry) {
9469                         entry = malloc(sizeof(struct extent_entry));
9470                         if (!entry) {
9471                                 ret = -ENOMEM;
9472                                 goto out;
9473                         }
9474                         memset(entry, 0, sizeof(*entry));
9475                         entry->bytenr = dback->disk_bytenr;
9476                         entry->bytes = dback->bytes;
9477                         list_add_tail(&entry->list, &entries);
9478                         nr_entries++;
9479                 }
9480
9481                 /*
9482                  * If we only have on entry we may think the entries agree when
9483                  * in reality they don't so we have to do some extra checking.
9484                  */
9485                 if (dback->disk_bytenr != rec->start ||
9486                     dback->bytes != rec->nr || back->broken)
9487                         mismatch = 1;
9488
9489                 if (back->broken) {
9490                         entry->broken++;
9491                         broken_entries++;
9492                 }
9493
9494                 entry->count++;
9495         }
9496
9497         /* Yay all the backrefs agree, carry on good sir */
9498         if (nr_entries <= 1 && !mismatch)
9499                 goto out;
9500
9501         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9502                 "%Lu\n", rec->start);
9503
9504         /*
9505          * First we want to see if the backrefs can agree amongst themselves who
9506          * is right, so figure out which one of the entries has the highest
9507          * count.
9508          */
9509         best = find_most_right_entry(&entries);
9510
9511         /*
9512          * Ok so we may have an even split between what the backrefs think, so
9513          * this is where we use the extent ref to see what it thinks.
9514          */
9515         if (!best) {
9516                 entry = find_entry(&entries, rec->start, rec->nr);
9517                 if (!entry && (!broken_entries || !rec->found_rec)) {
9518                         fprintf(stderr, "Backrefs don't agree with each other "
9519                                 "and extent record doesn't agree with anybody,"
9520                                 " so we can't fix bytenr %Lu bytes %Lu\n",
9521                                 rec->start, rec->nr);
9522                         ret = -EINVAL;
9523                         goto out;
9524                 } else if (!entry) {
9525                         /*
9526                          * Ok our backrefs were broken, we'll assume this is the
9527                          * correct value and add an entry for this range.
9528                          */
9529                         entry = malloc(sizeof(struct extent_entry));
9530                         if (!entry) {
9531                                 ret = -ENOMEM;
9532                                 goto out;
9533                         }
9534                         memset(entry, 0, sizeof(*entry));
9535                         entry->bytenr = rec->start;
9536                         entry->bytes = rec->nr;
9537                         list_add_tail(&entry->list, &entries);
9538                         nr_entries++;
9539                 }
9540                 entry->count++;
9541                 best = find_most_right_entry(&entries);
9542                 if (!best) {
9543                         fprintf(stderr, "Backrefs and extent record evenly "
9544                                 "split on who is right, this is going to "
9545                                 "require user input to fix bytenr %Lu bytes "
9546                                 "%Lu\n", rec->start, rec->nr);
9547                         ret = -EINVAL;
9548                         goto out;
9549                 }
9550         }
9551
9552         /*
9553          * I don't think this can happen currently as we'll abort() if we catch
9554          * this case higher up, but in case somebody removes that we still can't
9555          * deal with it properly here yet, so just bail out of that's the case.
9556          */
9557         if (best->bytenr != rec->start) {
9558                 fprintf(stderr, "Extent start and backref starts don't match, "
9559                         "please use btrfs-image on this file system and send "
9560                         "it to a btrfs developer so they can make fsck fix "
9561                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
9562                         rec->start, rec->nr);
9563                 ret = -EINVAL;
9564                 goto out;
9565         }
9566
9567         /*
9568          * Ok great we all agreed on an extent record, let's go find the real
9569          * references and fix up the ones that don't match.
9570          */
9571         rbtree_postorder_for_each_entry_safe(back, tmp,
9572                                              &rec->backref_tree, node) {
9573                 if (back->full_backref || !back->is_data)
9574                         continue;
9575
9576                 dback = to_data_backref(back);
9577
9578                 /*
9579                  * Still ignoring backrefs that don't have a real ref attached
9580                  * to them.
9581                  */
9582                 if (dback->found_ref == 0)
9583                         continue;
9584
9585                 if (dback->bytes == best->bytes &&
9586                     dback->disk_bytenr == best->bytenr)
9587                         continue;
9588
9589                 ret = repair_ref(info, path, dback, best);
9590                 if (ret)
9591                         goto out;
9592         }
9593
9594         /*
9595          * Ok we messed with the actual refs, which means we need to drop our
9596          * entire cache and go back and rescan.  I know this is a huge pain and
9597          * adds a lot of extra work, but it's the only way to be safe.  Once all
9598          * the backrefs agree we may not need to do anything to the extent
9599          * record itself.
9600          */
9601         ret = -EAGAIN;
9602 out:
9603         while (!list_empty(&entries)) {
9604                 entry = list_entry(entries.next, struct extent_entry, list);
9605                 list_del_init(&entry->list);
9606                 free(entry);
9607         }
9608         return ret;
9609 }
9610
9611 static int process_duplicates(struct cache_tree *extent_cache,
9612                               struct extent_record *rec)
9613 {
9614         struct extent_record *good, *tmp;
9615         struct cache_extent *cache;
9616         int ret;
9617
9618         /*
9619          * If we found a extent record for this extent then return, or if we
9620          * have more than one duplicate we are likely going to need to delete
9621          * something.
9622          */
9623         if (rec->found_rec || rec->num_duplicates > 1)
9624                 return 0;
9625
9626         /* Shouldn't happen but just in case */
9627         BUG_ON(!rec->num_duplicates);
9628
9629         /*
9630          * So this happens if we end up with a backref that doesn't match the
9631          * actual extent entry.  So either the backref is bad or the extent
9632          * entry is bad.  Either way we want to have the extent_record actually
9633          * reflect what we found in the extent_tree, so we need to take the
9634          * duplicate out and use that as the extent_record since the only way we
9635          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9636          */
9637         remove_cache_extent(extent_cache, &rec->cache);
9638
9639         good = to_extent_record(rec->dups.next);
9640         list_del_init(&good->list);
9641         INIT_LIST_HEAD(&good->backrefs);
9642         INIT_LIST_HEAD(&good->dups);
9643         good->cache.start = good->start;
9644         good->cache.size = good->nr;
9645         good->content_checked = 0;
9646         good->owner_ref_checked = 0;
9647         good->num_duplicates = 0;
9648         good->refs = rec->refs;
9649         list_splice_init(&rec->backrefs, &good->backrefs);
9650         while (1) {
9651                 cache = lookup_cache_extent(extent_cache, good->start,
9652                                             good->nr);
9653                 if (!cache)
9654                         break;
9655                 tmp = container_of(cache, struct extent_record, cache);
9656
9657                 /*
9658                  * If we find another overlapping extent and it's found_rec is
9659                  * set then it's a duplicate and we need to try and delete
9660                  * something.
9661                  */
9662                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9663                         if (list_empty(&good->list))
9664                                 list_add_tail(&good->list,
9665                                               &duplicate_extents);
9666                         good->num_duplicates += tmp->num_duplicates + 1;
9667                         list_splice_init(&tmp->dups, &good->dups);
9668                         list_del_init(&tmp->list);
9669                         list_add_tail(&tmp->list, &good->dups);
9670                         remove_cache_extent(extent_cache, &tmp->cache);
9671                         continue;
9672                 }
9673
9674                 /*
9675                  * Ok we have another non extent item backed extent rec, so lets
9676                  * just add it to this extent and carry on like we did above.
9677                  */
9678                 good->refs += tmp->refs;
9679                 list_splice_init(&tmp->backrefs, &good->backrefs);
9680                 remove_cache_extent(extent_cache, &tmp->cache);
9681                 free(tmp);
9682         }
9683         ret = insert_cache_extent(extent_cache, &good->cache);
9684         BUG_ON(ret);
9685         free(rec);
9686         return good->num_duplicates ? 0 : 1;
9687 }
9688
9689 static int delete_duplicate_records(struct btrfs_root *root,
9690                                     struct extent_record *rec)
9691 {
9692         struct btrfs_trans_handle *trans;
9693         LIST_HEAD(delete_list);
9694         struct btrfs_path path;
9695         struct extent_record *tmp, *good, *n;
9696         int nr_del = 0;
9697         int ret = 0, err;
9698         struct btrfs_key key;
9699
9700         btrfs_init_path(&path);
9701
9702         good = rec;
9703         /* Find the record that covers all of the duplicates. */
9704         list_for_each_entry(tmp, &rec->dups, list) {
9705                 if (good->start < tmp->start)
9706                         continue;
9707                 if (good->nr > tmp->nr)
9708                         continue;
9709
9710                 if (tmp->start + tmp->nr < good->start + good->nr) {
9711                         fprintf(stderr, "Ok we have overlapping extents that "
9712                                 "aren't completely covered by each other, this "
9713                                 "is going to require more careful thought.  "
9714                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9715                                 tmp->start, tmp->nr, good->start, good->nr);
9716                         abort();
9717                 }
9718                 good = tmp;
9719         }
9720
9721         if (good != rec)
9722                 list_add_tail(&rec->list, &delete_list);
9723
9724         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9725                 if (tmp == good)
9726                         continue;
9727                 list_move_tail(&tmp->list, &delete_list);
9728         }
9729
9730         root = root->fs_info->extent_root;
9731         trans = btrfs_start_transaction(root, 1);
9732         if (IS_ERR(trans)) {
9733                 ret = PTR_ERR(trans);
9734                 goto out;
9735         }
9736
9737         list_for_each_entry(tmp, &delete_list, list) {
9738                 if (tmp->found_rec == 0)
9739                         continue;
9740                 key.objectid = tmp->start;
9741                 key.type = BTRFS_EXTENT_ITEM_KEY;
9742                 key.offset = tmp->nr;
9743
9744                 /* Shouldn't happen but just in case */
9745                 if (tmp->metadata) {
9746                         fprintf(stderr, "Well this shouldn't happen, extent "
9747                                 "record overlaps but is metadata? "
9748                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9749                         abort();
9750                 }
9751
9752                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9753                 if (ret) {
9754                         if (ret > 0)
9755                                 ret = -EINVAL;
9756                         break;
9757                 }
9758                 ret = btrfs_del_item(trans, root, &path);
9759                 if (ret)
9760                         break;
9761                 btrfs_release_path(&path);
9762                 nr_del++;
9763         }
9764         err = btrfs_commit_transaction(trans, root);
9765         if (err && !ret)
9766                 ret = err;
9767 out:
9768         while (!list_empty(&delete_list)) {
9769                 tmp = to_extent_record(delete_list.next);
9770                 list_del_init(&tmp->list);
9771                 if (tmp == rec)
9772                         continue;
9773                 free(tmp);
9774         }
9775
9776         while (!list_empty(&rec->dups)) {
9777                 tmp = to_extent_record(rec->dups.next);
9778                 list_del_init(&tmp->list);
9779                 free(tmp);
9780         }
9781
9782         btrfs_release_path(&path);
9783
9784         if (!ret && !nr_del)
9785                 rec->num_duplicates = 0;
9786
9787         return ret ? ret : nr_del;
9788 }
9789
9790 static int find_possible_backrefs(struct btrfs_fs_info *info,
9791                                   struct btrfs_path *path,
9792                                   struct cache_tree *extent_cache,
9793                                   struct extent_record *rec)
9794 {
9795         struct btrfs_root *root;
9796         struct extent_backref *back, *tmp;
9797         struct data_backref *dback;
9798         struct cache_extent *cache;
9799         struct btrfs_file_extent_item *fi;
9800         struct btrfs_key key;
9801         u64 bytenr, bytes;
9802         int ret;
9803
9804         rbtree_postorder_for_each_entry_safe(back, tmp,
9805                                              &rec->backref_tree, node) {
9806                 /* Don't care about full backrefs (poor unloved backrefs) */
9807                 if (back->full_backref || !back->is_data)
9808                         continue;
9809
9810                 dback = to_data_backref(back);
9811
9812                 /* We found this one, we don't need to do a lookup */
9813                 if (dback->found_ref)
9814                         continue;
9815
9816                 key.objectid = dback->root;
9817                 key.type = BTRFS_ROOT_ITEM_KEY;
9818                 key.offset = (u64)-1;
9819
9820                 root = btrfs_read_fs_root(info, &key);
9821
9822                 /* No root, definitely a bad ref, skip */
9823                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9824                         continue;
9825                 /* Other err, exit */
9826                 if (IS_ERR(root))
9827                         return PTR_ERR(root);
9828
9829                 key.objectid = dback->owner;
9830                 key.type = BTRFS_EXTENT_DATA_KEY;
9831                 key.offset = dback->offset;
9832                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9833                 if (ret) {
9834                         btrfs_release_path(path);
9835                         if (ret < 0)
9836                                 return ret;
9837                         /* Didn't find it, we can carry on */
9838                         ret = 0;
9839                         continue;
9840                 }
9841
9842                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9843                                     struct btrfs_file_extent_item);
9844                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9845                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9846                 btrfs_release_path(path);
9847                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9848                 if (cache) {
9849                         struct extent_record *tmp;
9850                         tmp = container_of(cache, struct extent_record, cache);
9851
9852                         /*
9853                          * If we found an extent record for the bytenr for this
9854                          * particular backref then we can't add it to our
9855                          * current extent record.  We only want to add backrefs
9856                          * that don't have a corresponding extent item in the
9857                          * extent tree since they likely belong to this record
9858                          * and we need to fix it if it doesn't match bytenrs.
9859                          */
9860                         if  (tmp->found_rec)
9861                                 continue;
9862                 }
9863
9864                 dback->found_ref += 1;
9865                 dback->disk_bytenr = bytenr;
9866                 dback->bytes = bytes;
9867
9868                 /*
9869                  * Set this so the verify backref code knows not to trust the
9870                  * values in this backref.
9871                  */
9872                 back->broken = 1;
9873         }
9874
9875         return 0;
9876 }
9877
9878 /*
9879  * Record orphan data ref into corresponding root.
9880  *
9881  * Return 0 if the extent item contains data ref and recorded.
9882  * Return 1 if the extent item contains no useful data ref
9883  *   On that case, it may contains only shared_dataref or metadata backref
9884  *   or the file extent exists(this should be handled by the extent bytenr
9885  *   recovery routine)
9886  * Return <0 if something goes wrong.
9887  */
9888 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9889                                       struct extent_record *rec)
9890 {
9891         struct btrfs_key key;
9892         struct btrfs_root *dest_root;
9893         struct extent_backref *back, *tmp;
9894         struct data_backref *dback;
9895         struct orphan_data_extent *orphan;
9896         struct btrfs_path path;
9897         int recorded_data_ref = 0;
9898         int ret = 0;
9899
9900         if (rec->metadata)
9901                 return 1;
9902         btrfs_init_path(&path);
9903         rbtree_postorder_for_each_entry_safe(back, tmp,
9904                                              &rec->backref_tree, node) {
9905                 if (back->full_backref || !back->is_data ||
9906                     !back->found_extent_tree)
9907                         continue;
9908                 dback = to_data_backref(back);
9909                 if (dback->found_ref)
9910                         continue;
9911                 key.objectid = dback->root;
9912                 key.type = BTRFS_ROOT_ITEM_KEY;
9913                 key.offset = (u64)-1;
9914
9915                 dest_root = btrfs_read_fs_root(fs_info, &key);
9916
9917                 /* For non-exist root we just skip it */
9918                 if (IS_ERR(dest_root) || !dest_root)
9919                         continue;
9920
9921                 key.objectid = dback->owner;
9922                 key.type = BTRFS_EXTENT_DATA_KEY;
9923                 key.offset = dback->offset;
9924
9925                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9926                 btrfs_release_path(&path);
9927                 /*
9928                  * For ret < 0, it's OK since the fs-tree may be corrupted,
9929                  * we need to record it for inode/file extent rebuild.
9930                  * For ret > 0, we record it only for file extent rebuild.
9931                  * For ret == 0, the file extent exists but only bytenr
9932                  * mismatch, let the original bytenr fix routine to handle,
9933                  * don't record it.
9934                  */
9935                 if (ret == 0)
9936                         continue;
9937                 ret = 0;
9938                 orphan = malloc(sizeof(*orphan));
9939                 if (!orphan) {
9940                         ret = -ENOMEM;
9941                         goto out;
9942                 }
9943                 INIT_LIST_HEAD(&orphan->list);
9944                 orphan->root = dback->root;
9945                 orphan->objectid = dback->owner;
9946                 orphan->offset = dback->offset;
9947                 orphan->disk_bytenr = rec->cache.start;
9948                 orphan->disk_len = rec->cache.size;
9949                 list_add(&dest_root->orphan_data_extents, &orphan->list);
9950                 recorded_data_ref = 1;
9951         }
9952 out:
9953         btrfs_release_path(&path);
9954         if (!ret)
9955                 return !recorded_data_ref;
9956         else
9957                 return ret;
9958 }
9959
9960 /*
9961  * when an incorrect extent item is found, this will delete
9962  * all of the existing entries for it and recreate them
9963  * based on what the tree scan found.
9964  */
9965 static int fixup_extent_refs(struct btrfs_fs_info *info,
9966                              struct cache_tree *extent_cache,
9967                              struct extent_record *rec)
9968 {
9969         struct btrfs_trans_handle *trans = NULL;
9970         int ret;
9971         struct btrfs_path path;
9972         struct cache_extent *cache;
9973         struct extent_backref *back, *tmp;
9974         int allocated = 0;
9975         u64 flags = 0;
9976
9977         if (rec->flag_block_full_backref)
9978                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9979
9980         btrfs_init_path(&path);
9981         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9982                 /*
9983                  * Sometimes the backrefs themselves are so broken they don't
9984                  * get attached to any meaningful rec, so first go back and
9985                  * check any of our backrefs that we couldn't find and throw
9986                  * them into the list if we find the backref so that
9987                  * verify_backrefs can figure out what to do.
9988                  */
9989                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9990                 if (ret < 0)
9991                         goto out;
9992         }
9993
9994         /* step one, make sure all of the backrefs agree */
9995         ret = verify_backrefs(info, &path, rec);
9996         if (ret < 0)
9997                 goto out;
9998
9999         trans = btrfs_start_transaction(info->extent_root, 1);
10000         if (IS_ERR(trans)) {
10001                 ret = PTR_ERR(trans);
10002                 goto out;
10003         }
10004
10005         /* step two, delete all the existing records */
10006         ret = delete_extent_records(trans, info->extent_root, &path,
10007                                     rec->start);
10008
10009         if (ret < 0)
10010                 goto out;
10011
10012         /* was this block corrupt?  If so, don't add references to it */
10013         cache = lookup_cache_extent(info->corrupt_blocks,
10014                                     rec->start, rec->max_size);
10015         if (cache) {
10016                 ret = 0;
10017                 goto out;
10018         }
10019
10020         /* step three, recreate all the refs we did find */
10021         rbtree_postorder_for_each_entry_safe(back, tmp,
10022                                              &rec->backref_tree, node) {
10023                 /*
10024                  * if we didn't find any references, don't create a
10025                  * new extent record
10026                  */
10027                 if (!back->found_ref)
10028                         continue;
10029
10030                 rec->bad_full_backref = 0;
10031                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10032                 allocated = 1;
10033
10034                 if (ret)
10035                         goto out;
10036         }
10037 out:
10038         if (trans) {
10039                 int err = btrfs_commit_transaction(trans, info->extent_root);
10040                 if (!ret)
10041                         ret = err;
10042         }
10043
10044         if (!ret)
10045                 fprintf(stderr, "Repaired extent references for %llu\n",
10046                                 (unsigned long long)rec->start);
10047
10048         btrfs_release_path(&path);
10049         return ret;
10050 }
10051
10052 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10053                               struct extent_record *rec)
10054 {
10055         struct btrfs_trans_handle *trans;
10056         struct btrfs_root *root = fs_info->extent_root;
10057         struct btrfs_path path;
10058         struct btrfs_extent_item *ei;
10059         struct btrfs_key key;
10060         u64 flags;
10061         int ret = 0;
10062
10063         key.objectid = rec->start;
10064         if (rec->metadata) {
10065                 key.type = BTRFS_METADATA_ITEM_KEY;
10066                 key.offset = rec->info_level;
10067         } else {
10068                 key.type = BTRFS_EXTENT_ITEM_KEY;
10069                 key.offset = rec->max_size;
10070         }
10071
10072         trans = btrfs_start_transaction(root, 0);
10073         if (IS_ERR(trans))
10074                 return PTR_ERR(trans);
10075
10076         btrfs_init_path(&path);
10077         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10078         if (ret < 0) {
10079                 btrfs_release_path(&path);
10080                 btrfs_commit_transaction(trans, root);
10081                 return ret;
10082         } else if (ret) {
10083                 fprintf(stderr, "Didn't find extent for %llu\n",
10084                         (unsigned long long)rec->start);
10085                 btrfs_release_path(&path);
10086                 btrfs_commit_transaction(trans, root);
10087                 return -ENOENT;
10088         }
10089
10090         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10091                             struct btrfs_extent_item);
10092         flags = btrfs_extent_flags(path.nodes[0], ei);
10093         if (rec->flag_block_full_backref) {
10094                 fprintf(stderr, "setting full backref on %llu\n",
10095                         (unsigned long long)key.objectid);
10096                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10097         } else {
10098                 fprintf(stderr, "clearing full backref on %llu\n",
10099                         (unsigned long long)key.objectid);
10100                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10101         }
10102         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10103         btrfs_mark_buffer_dirty(path.nodes[0]);
10104         btrfs_release_path(&path);
10105         ret = btrfs_commit_transaction(trans, root);
10106         if (!ret)
10107                 fprintf(stderr, "Repaired extent flags for %llu\n",
10108                                 (unsigned long long)rec->start);
10109
10110         return ret;
10111 }
10112
10113 /* right now we only prune from the extent allocation tree */
10114 static int prune_one_block(struct btrfs_trans_handle *trans,
10115                            struct btrfs_fs_info *info,
10116                            struct btrfs_corrupt_block *corrupt)
10117 {
10118         int ret;
10119         struct btrfs_path path;
10120         struct extent_buffer *eb;
10121         u64 found;
10122         int slot;
10123         int nritems;
10124         int level = corrupt->level + 1;
10125
10126         btrfs_init_path(&path);
10127 again:
10128         /* we want to stop at the parent to our busted block */
10129         path.lowest_level = level;
10130
10131         ret = btrfs_search_slot(trans, info->extent_root,
10132                                 &corrupt->key, &path, -1, 1);
10133
10134         if (ret < 0)
10135                 goto out;
10136
10137         eb = path.nodes[level];
10138         if (!eb) {
10139                 ret = -ENOENT;
10140                 goto out;
10141         }
10142
10143         /*
10144          * hopefully the search gave us the block we want to prune,
10145          * lets try that first
10146          */
10147         slot = path.slots[level];
10148         found =  btrfs_node_blockptr(eb, slot);
10149         if (found == corrupt->cache.start)
10150                 goto del_ptr;
10151
10152         nritems = btrfs_header_nritems(eb);
10153
10154         /* the search failed, lets scan this node and hope we find it */
10155         for (slot = 0; slot < nritems; slot++) {
10156                 found =  btrfs_node_blockptr(eb, slot);
10157                 if (found == corrupt->cache.start)
10158                         goto del_ptr;
10159         }
10160         /*
10161          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10162          * to this block
10163          */
10164         if (eb == info->extent_root->node) {
10165                 ret = -ENOENT;
10166                 goto out;
10167         } else {
10168                 level++;
10169                 btrfs_release_path(&path);
10170                 goto again;
10171         }
10172
10173 del_ptr:
10174         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10175         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10176
10177 out:
10178         btrfs_release_path(&path);
10179         return ret;
10180 }
10181
10182 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10183 {
10184         struct btrfs_trans_handle *trans = NULL;
10185         struct cache_extent *cache;
10186         struct btrfs_corrupt_block *corrupt;
10187
10188         while (1) {
10189                 cache = search_cache_extent(info->corrupt_blocks, 0);
10190                 if (!cache)
10191                         break;
10192                 if (!trans) {
10193                         trans = btrfs_start_transaction(info->extent_root, 1);
10194                         if (IS_ERR(trans))
10195                                 return PTR_ERR(trans);
10196                 }
10197                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10198                 prune_one_block(trans, info, corrupt);
10199                 remove_cache_extent(info->corrupt_blocks, cache);
10200         }
10201         if (trans)
10202                 return btrfs_commit_transaction(trans, info->extent_root);
10203         return 0;
10204 }
10205
10206 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10207 {
10208         struct btrfs_block_group_cache *cache;
10209         u64 start, end;
10210         int ret;
10211
10212         while (1) {
10213                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10214                                             &start, &end, EXTENT_DIRTY);
10215                 if (ret)
10216                         break;
10217                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10218         }
10219
10220         start = 0;
10221         while (1) {
10222                 cache = btrfs_lookup_first_block_group(fs_info, start);
10223                 if (!cache)
10224                         break;
10225                 if (cache->cached)
10226                         cache->cached = 0;
10227                 start = cache->key.objectid + cache->key.offset;
10228         }
10229 }
10230
10231 static int check_extent_refs(struct btrfs_root *root,
10232                              struct cache_tree *extent_cache)
10233 {
10234         struct extent_record *rec;
10235         struct cache_extent *cache;
10236         int ret = 0;
10237         int had_dups = 0;
10238
10239         if (repair) {
10240                 /*
10241                  * if we're doing a repair, we have to make sure
10242                  * we don't allocate from the problem extents.
10243                  * In the worst case, this will be all the
10244                  * extents in the FS
10245                  */
10246                 cache = search_cache_extent(extent_cache, 0);
10247                 while(cache) {
10248                         rec = container_of(cache, struct extent_record, cache);
10249                         set_extent_dirty(root->fs_info->excluded_extents,
10250                                          rec->start,
10251                                          rec->start + rec->max_size - 1);
10252                         cache = next_cache_extent(cache);
10253                 }
10254
10255                 /* pin down all the corrupted blocks too */
10256                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10257                 while(cache) {
10258                         set_extent_dirty(root->fs_info->excluded_extents,
10259                                          cache->start,
10260                                          cache->start + cache->size - 1);
10261                         cache = next_cache_extent(cache);
10262                 }
10263                 prune_corrupt_blocks(root->fs_info);
10264                 reset_cached_block_groups(root->fs_info);
10265         }
10266
10267         reset_cached_block_groups(root->fs_info);
10268
10269         /*
10270          * We need to delete any duplicate entries we find first otherwise we
10271          * could mess up the extent tree when we have backrefs that actually
10272          * belong to a different extent item and not the weird duplicate one.
10273          */
10274         while (repair && !list_empty(&duplicate_extents)) {
10275                 rec = to_extent_record(duplicate_extents.next);
10276                 list_del_init(&rec->list);
10277
10278                 /* Sometimes we can find a backref before we find an actual
10279                  * extent, so we need to process it a little bit to see if there
10280                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10281                  * if this is a backref screwup.  If we need to delete stuff
10282                  * process_duplicates() will return 0, otherwise it will return
10283                  * 1 and we
10284                  */
10285                 if (process_duplicates(extent_cache, rec))
10286                         continue;
10287                 ret = delete_duplicate_records(root, rec);
10288                 if (ret < 0)
10289                         return ret;
10290                 /*
10291                  * delete_duplicate_records will return the number of entries
10292                  * deleted, so if it's greater than 0 then we know we actually
10293                  * did something and we need to remove.
10294                  */
10295                 if (ret)
10296                         had_dups = 1;
10297         }
10298
10299         if (had_dups)
10300                 return -EAGAIN;
10301
10302         while(1) {
10303                 int cur_err = 0;
10304                 int fix = 0;
10305
10306                 cache = search_cache_extent(extent_cache, 0);
10307                 if (!cache)
10308                         break;
10309                 rec = container_of(cache, struct extent_record, cache);
10310                 if (rec->num_duplicates) {
10311                         fprintf(stderr, "extent item %llu has multiple extent "
10312                                 "items\n", (unsigned long long)rec->start);
10313                         cur_err = 1;
10314                 }
10315
10316                 if (rec->refs != rec->extent_item_refs) {
10317                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10318                                 (unsigned long long)rec->start,
10319                                 (unsigned long long)rec->nr);
10320                         fprintf(stderr, "extent item %llu, found %llu\n",
10321                                 (unsigned long long)rec->extent_item_refs,
10322                                 (unsigned long long)rec->refs);
10323                         ret = record_orphan_data_extents(root->fs_info, rec);
10324                         if (ret < 0)
10325                                 goto repair_abort;
10326                         fix = ret;
10327                         cur_err = 1;
10328                 }
10329                 if (all_backpointers_checked(rec, 1)) {
10330                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10331                                 (unsigned long long)rec->start,
10332                                 (unsigned long long)rec->nr);
10333                         fix = 1;
10334                         cur_err = 1;
10335                 }
10336                 if (!rec->owner_ref_checked) {
10337                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10338                                 (unsigned long long)rec->start,
10339                                 (unsigned long long)rec->nr);
10340                         fix = 1;
10341                         cur_err = 1;
10342                 }
10343
10344                 if (repair && fix) {
10345                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10346                         if (ret)
10347                                 goto repair_abort;
10348                 }
10349
10350
10351                 if (rec->bad_full_backref) {
10352                         fprintf(stderr, "bad full backref, on [%llu]\n",
10353                                 (unsigned long long)rec->start);
10354                         if (repair) {
10355                                 ret = fixup_extent_flags(root->fs_info, rec);
10356                                 if (ret)
10357                                         goto repair_abort;
10358                                 fix = 1;
10359                         }
10360                         cur_err = 1;
10361                 }
10362                 /*
10363                  * Although it's not a extent ref's problem, we reuse this
10364                  * routine for error reporting.
10365                  * No repair function yet.
10366                  */
10367                 if (rec->crossing_stripes) {
10368                         fprintf(stderr,
10369                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10370                                 rec->start, rec->start + rec->max_size);
10371                         cur_err = 1;
10372                 }
10373
10374                 if (rec->wrong_chunk_type) {
10375                         fprintf(stderr,
10376                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10377                                 rec->start, rec->start + rec->max_size);
10378                         cur_err = 1;
10379                 }
10380
10381                 remove_cache_extent(extent_cache, cache);
10382                 free_all_extent_backrefs(rec);
10383                 if (!init_extent_tree && repair && (!cur_err || fix))
10384                         clear_extent_dirty(root->fs_info->excluded_extents,
10385                                            rec->start,
10386                                            rec->start + rec->max_size - 1);
10387                 free(rec);
10388         }
10389 repair_abort:
10390         if (repair) {
10391                 if (ret && ret != -EAGAIN) {
10392                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10393                         exit(1);
10394                 } else if (!ret) {
10395                         struct btrfs_trans_handle *trans;
10396
10397                         root = root->fs_info->extent_root;
10398                         trans = btrfs_start_transaction(root, 1);
10399                         if (IS_ERR(trans)) {
10400                                 ret = PTR_ERR(trans);
10401                                 goto repair_abort;
10402                         }
10403
10404                         ret = btrfs_fix_block_accounting(trans, root);
10405                         if (ret)
10406                                 goto repair_abort;
10407                         ret = btrfs_commit_transaction(trans, root);
10408                         if (ret)
10409                                 goto repair_abort;
10410                 }
10411                 return ret;
10412         }
10413         return 0;
10414 }
10415
10416 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10417 {
10418         u64 stripe_size;
10419
10420         if (type & BTRFS_BLOCK_GROUP_RAID0) {
10421                 stripe_size = length;
10422                 stripe_size /= num_stripes;
10423         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10424                 stripe_size = length * 2;
10425                 stripe_size /= num_stripes;
10426         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10427                 stripe_size = length;
10428                 stripe_size /= (num_stripes - 1);
10429         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10430                 stripe_size = length;
10431                 stripe_size /= (num_stripes - 2);
10432         } else {
10433                 stripe_size = length;
10434         }
10435         return stripe_size;
10436 }
10437
10438 /*
10439  * Check the chunk with its block group/dev list ref:
10440  * Return 0 if all refs seems valid.
10441  * Return 1 if part of refs seems valid, need later check for rebuild ref
10442  * like missing block group and needs to search extent tree to rebuild them.
10443  * Return -1 if essential refs are missing and unable to rebuild.
10444  */
10445 static int check_chunk_refs(struct chunk_record *chunk_rec,
10446                             struct block_group_tree *block_group_cache,
10447                             struct device_extent_tree *dev_extent_cache,
10448                             int silent)
10449 {
10450         struct cache_extent *block_group_item;
10451         struct block_group_record *block_group_rec;
10452         struct cache_extent *dev_extent_item;
10453         struct device_extent_record *dev_extent_rec;
10454         u64 devid;
10455         u64 offset;
10456         u64 length;
10457         int metadump_v2 = 0;
10458         int i;
10459         int ret = 0;
10460
10461         block_group_item = lookup_cache_extent(&block_group_cache->tree,
10462                                                chunk_rec->offset,
10463                                                chunk_rec->length);
10464         if (block_group_item) {
10465                 block_group_rec = container_of(block_group_item,
10466                                                struct block_group_record,
10467                                                cache);
10468                 if (chunk_rec->length != block_group_rec->offset ||
10469                     chunk_rec->offset != block_group_rec->objectid ||
10470                     (!metadump_v2 &&
10471                      chunk_rec->type_flags != block_group_rec->flags)) {
10472                         if (!silent)
10473                                 fprintf(stderr,
10474                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10475                                         chunk_rec->objectid,
10476                                         chunk_rec->type,
10477                                         chunk_rec->offset,
10478                                         chunk_rec->length,
10479                                         chunk_rec->offset,
10480                                         chunk_rec->type_flags,
10481                                         block_group_rec->objectid,
10482                                         block_group_rec->type,
10483                                         block_group_rec->offset,
10484                                         block_group_rec->offset,
10485                                         block_group_rec->objectid,
10486                                         block_group_rec->flags);
10487                         ret = -1;
10488                 } else {
10489                         list_del_init(&block_group_rec->list);
10490                         chunk_rec->bg_rec = block_group_rec;
10491                 }
10492         } else {
10493                 if (!silent)
10494                         fprintf(stderr,
10495                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10496                                 chunk_rec->objectid,
10497                                 chunk_rec->type,
10498                                 chunk_rec->offset,
10499                                 chunk_rec->length,
10500                                 chunk_rec->offset,
10501                                 chunk_rec->type_flags);
10502                 ret = 1;
10503         }
10504
10505         if (metadump_v2)
10506                 return ret;
10507
10508         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10509                                     chunk_rec->num_stripes);
10510         for (i = 0; i < chunk_rec->num_stripes; ++i) {
10511                 devid = chunk_rec->stripes[i].devid;
10512                 offset = chunk_rec->stripes[i].offset;
10513                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10514                                                        devid, offset, length);
10515                 if (dev_extent_item) {
10516                         dev_extent_rec = container_of(dev_extent_item,
10517                                                 struct device_extent_record,
10518                                                 cache);
10519                         if (dev_extent_rec->objectid != devid ||
10520                             dev_extent_rec->offset != offset ||
10521                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
10522                             dev_extent_rec->length != length) {
10523                                 if (!silent)
10524                                         fprintf(stderr,
10525                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10526                                                 chunk_rec->objectid,
10527                                                 chunk_rec->type,
10528                                                 chunk_rec->offset,
10529                                                 chunk_rec->stripes[i].devid,
10530                                                 chunk_rec->stripes[i].offset,
10531                                                 dev_extent_rec->objectid,
10532                                                 dev_extent_rec->offset,
10533                                                 dev_extent_rec->length);
10534                                 ret = -1;
10535                         } else {
10536                                 list_move(&dev_extent_rec->chunk_list,
10537                                           &chunk_rec->dextents);
10538                         }
10539                 } else {
10540                         if (!silent)
10541                                 fprintf(stderr,
10542                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10543                                         chunk_rec->objectid,
10544                                         chunk_rec->type,
10545                                         chunk_rec->offset,
10546                                         chunk_rec->stripes[i].devid,
10547                                         chunk_rec->stripes[i].offset);
10548                         ret = -1;
10549                 }
10550         }
10551         return ret;
10552 }
10553
10554 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10555 int check_chunks(struct cache_tree *chunk_cache,
10556                  struct block_group_tree *block_group_cache,
10557                  struct device_extent_tree *dev_extent_cache,
10558                  struct list_head *good, struct list_head *bad,
10559                  struct list_head *rebuild, int silent)
10560 {
10561         struct cache_extent *chunk_item;
10562         struct chunk_record *chunk_rec;
10563         struct block_group_record *bg_rec;
10564         struct device_extent_record *dext_rec;
10565         int err;
10566         int ret = 0;
10567
10568         chunk_item = first_cache_extent(chunk_cache);
10569         while (chunk_item) {
10570                 chunk_rec = container_of(chunk_item, struct chunk_record,
10571                                          cache);
10572                 err = check_chunk_refs(chunk_rec, block_group_cache,
10573                                        dev_extent_cache, silent);
10574                 if (err < 0)
10575                         ret = err;
10576                 if (err == 0 && good)
10577                         list_add_tail(&chunk_rec->list, good);
10578                 if (err > 0 && rebuild)
10579                         list_add_tail(&chunk_rec->list, rebuild);
10580                 if (err < 0 && bad)
10581                         list_add_tail(&chunk_rec->list, bad);
10582                 chunk_item = next_cache_extent(chunk_item);
10583         }
10584
10585         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10586                 if (!silent)
10587                         fprintf(stderr,
10588                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10589                                 bg_rec->objectid,
10590                                 bg_rec->offset,
10591                                 bg_rec->flags);
10592                 if (!ret)
10593                         ret = 1;
10594         }
10595
10596         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10597                             chunk_list) {
10598                 if (!silent)
10599                         fprintf(stderr,
10600                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10601                                 dext_rec->objectid,
10602                                 dext_rec->offset,
10603                                 dext_rec->length);
10604                 if (!ret)
10605                         ret = 1;
10606         }
10607         return ret;
10608 }
10609
10610
10611 static int check_device_used(struct device_record *dev_rec,
10612                              struct device_extent_tree *dext_cache)
10613 {
10614         struct cache_extent *cache;
10615         struct device_extent_record *dev_extent_rec;
10616         u64 total_byte = 0;
10617
10618         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10619         while (cache) {
10620                 dev_extent_rec = container_of(cache,
10621                                               struct device_extent_record,
10622                                               cache);
10623                 if (dev_extent_rec->objectid != dev_rec->devid)
10624                         break;
10625
10626                 list_del_init(&dev_extent_rec->device_list);
10627                 total_byte += dev_extent_rec->length;
10628                 cache = next_cache_extent(cache);
10629         }
10630
10631         if (total_byte != dev_rec->byte_used) {
10632                 fprintf(stderr,
10633                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10634                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10635                         dev_rec->type, dev_rec->offset);
10636                 return -1;
10637         } else {
10638                 return 0;
10639         }
10640 }
10641
10642 /* check btrfs_dev_item -> btrfs_dev_extent */
10643 static int check_devices(struct rb_root *dev_cache,
10644                          struct device_extent_tree *dev_extent_cache)
10645 {
10646         struct rb_node *dev_node;
10647         struct device_record *dev_rec;
10648         struct device_extent_record *dext_rec;
10649         int err;
10650         int ret = 0;
10651
10652         dev_node = rb_first(dev_cache);
10653         while (dev_node) {
10654                 dev_rec = container_of(dev_node, struct device_record, node);
10655                 err = check_device_used(dev_rec, dev_extent_cache);
10656                 if (err)
10657                         ret = err;
10658
10659                 dev_node = rb_next(dev_node);
10660         }
10661         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10662                             device_list) {
10663                 fprintf(stderr,
10664                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10665                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
10666                 if (!ret)
10667                         ret = 1;
10668         }
10669         return ret;
10670 }
10671
10672 static int add_root_item_to_list(struct list_head *head,
10673                                   u64 objectid, u64 bytenr, u64 last_snapshot,
10674                                   u8 level, u8 drop_level,
10675                                   struct btrfs_key *drop_key)
10676 {
10677
10678         struct root_item_record *ri_rec;
10679         ri_rec = malloc(sizeof(*ri_rec));
10680         if (!ri_rec)
10681                 return -ENOMEM;
10682         ri_rec->bytenr = bytenr;
10683         ri_rec->objectid = objectid;
10684         ri_rec->level = level;
10685         ri_rec->drop_level = drop_level;
10686         ri_rec->last_snapshot = last_snapshot;
10687         if (drop_key)
10688                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10689         list_add_tail(&ri_rec->list, head);
10690
10691         return 0;
10692 }
10693
10694 static void free_root_item_list(struct list_head *list)
10695 {
10696         struct root_item_record *ri_rec;
10697
10698         while (!list_empty(list)) {
10699                 ri_rec = list_first_entry(list, struct root_item_record,
10700                                           list);
10701                 list_del_init(&ri_rec->list);
10702                 free(ri_rec);
10703         }
10704 }
10705
10706 static int deal_root_from_list(struct list_head *list,
10707                                struct btrfs_root *root,
10708                                struct block_info *bits,
10709                                int bits_nr,
10710                                struct cache_tree *pending,
10711                                struct cache_tree *seen,
10712                                struct cache_tree *reada,
10713                                struct cache_tree *nodes,
10714                                struct cache_tree *extent_cache,
10715                                struct cache_tree *chunk_cache,
10716                                struct rb_root *dev_cache,
10717                                struct block_group_tree *block_group_cache,
10718                                struct device_extent_tree *dev_extent_cache)
10719 {
10720         int ret = 0;
10721         u64 last;
10722
10723         while (!list_empty(list)) {
10724                 struct root_item_record *rec;
10725                 struct extent_buffer *buf;
10726                 rec = list_entry(list->next,
10727                                  struct root_item_record, list);
10728                 last = 0;
10729                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10730                 if (!extent_buffer_uptodate(buf)) {
10731                         free_extent_buffer(buf);
10732                         ret = -EIO;
10733                         break;
10734                 }
10735                 ret = add_root_to_pending(buf, extent_cache, pending,
10736                                     seen, nodes, rec->objectid);
10737                 if (ret < 0)
10738                         break;
10739                 /*
10740                  * To rebuild extent tree, we need deal with snapshot
10741                  * one by one, otherwise we deal with node firstly which
10742                  * can maximize readahead.
10743                  */
10744                 while (1) {
10745                         ret = run_next_block(root, bits, bits_nr, &last,
10746                                              pending, seen, reada, nodes,
10747                                              extent_cache, chunk_cache,
10748                                              dev_cache, block_group_cache,
10749                                              dev_extent_cache, rec);
10750                         if (ret != 0)
10751                                 break;
10752                 }
10753                 free_extent_buffer(buf);
10754                 list_del(&rec->list);
10755                 free(rec);
10756                 if (ret < 0)
10757                         break;
10758         }
10759         while (ret >= 0) {
10760                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10761                                      reada, nodes, extent_cache, chunk_cache,
10762                                      dev_cache, block_group_cache,
10763                                      dev_extent_cache, NULL);
10764                 if (ret != 0) {
10765                         if (ret > 0)
10766                                 ret = 0;
10767                         break;
10768                 }
10769         }
10770         return ret;
10771 }
10772
10773 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10774 {
10775         struct rb_root dev_cache;
10776         struct cache_tree chunk_cache;
10777         struct block_group_tree block_group_cache;
10778         struct device_extent_tree dev_extent_cache;
10779         struct cache_tree extent_cache;
10780         struct cache_tree seen;
10781         struct cache_tree pending;
10782         struct cache_tree reada;
10783         struct cache_tree nodes;
10784         struct extent_io_tree excluded_extents;
10785         struct cache_tree corrupt_blocks;
10786         struct btrfs_path path;
10787         struct btrfs_key key;
10788         struct btrfs_key found_key;
10789         int ret, err = 0;
10790         struct block_info *bits;
10791         int bits_nr;
10792         struct extent_buffer *leaf;
10793         int slot;
10794         struct btrfs_root_item ri;
10795         struct list_head dropping_trees;
10796         struct list_head normal_trees;
10797         struct btrfs_root *root1;
10798         struct btrfs_root *root;
10799         u64 objectid;
10800         u8 level;
10801
10802         root = fs_info->fs_root;
10803         dev_cache = RB_ROOT;
10804         cache_tree_init(&chunk_cache);
10805         block_group_tree_init(&block_group_cache);
10806         device_extent_tree_init(&dev_extent_cache);
10807
10808         cache_tree_init(&extent_cache);
10809         cache_tree_init(&seen);
10810         cache_tree_init(&pending);
10811         cache_tree_init(&nodes);
10812         cache_tree_init(&reada);
10813         cache_tree_init(&corrupt_blocks);
10814         extent_io_tree_init(&excluded_extents);
10815         INIT_LIST_HEAD(&dropping_trees);
10816         INIT_LIST_HEAD(&normal_trees);
10817
10818         if (repair) {
10819                 fs_info->excluded_extents = &excluded_extents;
10820                 fs_info->fsck_extent_cache = &extent_cache;
10821                 fs_info->free_extent_hook = free_extent_hook;
10822                 fs_info->corrupt_blocks = &corrupt_blocks;
10823         }
10824
10825         bits_nr = 1024;
10826         bits = malloc(bits_nr * sizeof(struct block_info));
10827         if (!bits) {
10828                 perror("malloc");
10829                 exit(1);
10830         }
10831
10832         if (ctx.progress_enabled) {
10833                 ctx.tp = TASK_EXTENTS;
10834                 task_start(ctx.info);
10835         }
10836
10837 again:
10838         root1 = fs_info->tree_root;
10839         level = btrfs_header_level(root1->node);
10840         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10841                                     root1->node->start, 0, level, 0, NULL);
10842         if (ret < 0)
10843                 goto out;
10844         root1 = fs_info->chunk_root;
10845         level = btrfs_header_level(root1->node);
10846         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10847                                     root1->node->start, 0, level, 0, NULL);
10848         if (ret < 0)
10849                 goto out;
10850         btrfs_init_path(&path);
10851         key.offset = 0;
10852         key.objectid = 0;
10853         key.type = BTRFS_ROOT_ITEM_KEY;
10854         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10855         if (ret < 0)
10856                 goto out;
10857         while(1) {
10858                 leaf = path.nodes[0];
10859                 slot = path.slots[0];
10860                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10861                         ret = btrfs_next_leaf(root, &path);
10862                         if (ret != 0)
10863                                 break;
10864                         leaf = path.nodes[0];
10865                         slot = path.slots[0];
10866                 }
10867                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10868                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10869                         unsigned long offset;
10870                         u64 last_snapshot;
10871
10872                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10873                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10874                         last_snapshot = btrfs_root_last_snapshot(&ri);
10875                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10876                                 level = btrfs_root_level(&ri);
10877                                 ret = add_root_item_to_list(&normal_trees,
10878                                                 found_key.objectid,
10879                                                 btrfs_root_bytenr(&ri),
10880                                                 last_snapshot, level,
10881                                                 0, NULL);
10882                                 if (ret < 0)
10883                                         goto out;
10884                         } else {
10885                                 level = btrfs_root_level(&ri);
10886                                 objectid = found_key.objectid;
10887                                 btrfs_disk_key_to_cpu(&found_key,
10888                                                       &ri.drop_progress);
10889                                 ret = add_root_item_to_list(&dropping_trees,
10890                                                 objectid,
10891                                                 btrfs_root_bytenr(&ri),
10892                                                 last_snapshot, level,
10893                                                 ri.drop_level, &found_key);
10894                                 if (ret < 0)
10895                                         goto out;
10896                         }
10897                 }
10898                 path.slots[0]++;
10899         }
10900         btrfs_release_path(&path);
10901
10902         /*
10903          * check_block can return -EAGAIN if it fixes something, please keep
10904          * this in mind when dealing with return values from these functions, if
10905          * we get -EAGAIN we want to fall through and restart the loop.
10906          */
10907         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10908                                   &seen, &reada, &nodes, &extent_cache,
10909                                   &chunk_cache, &dev_cache, &block_group_cache,
10910                                   &dev_extent_cache);
10911         if (ret < 0) {
10912                 if (ret == -EAGAIN)
10913                         goto loop;
10914                 goto out;
10915         }
10916         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10917                                   &pending, &seen, &reada, &nodes,
10918                                   &extent_cache, &chunk_cache, &dev_cache,
10919                                   &block_group_cache, &dev_extent_cache);
10920         if (ret < 0) {
10921                 if (ret == -EAGAIN)
10922                         goto loop;
10923                 goto out;
10924         }
10925
10926         ret = check_chunks(&chunk_cache, &block_group_cache,
10927                            &dev_extent_cache, NULL, NULL, NULL, 0);
10928         if (ret) {
10929                 if (ret == -EAGAIN)
10930                         goto loop;
10931                 err = ret;
10932         }
10933
10934         ret = check_extent_refs(root, &extent_cache);
10935         if (ret < 0) {
10936                 if (ret == -EAGAIN)
10937                         goto loop;
10938                 goto out;
10939         }
10940
10941         ret = check_devices(&dev_cache, &dev_extent_cache);
10942         if (ret && err)
10943                 ret = err;
10944
10945 out:
10946         task_stop(ctx.info);
10947         if (repair) {
10948                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10949                 extent_io_tree_cleanup(&excluded_extents);
10950                 fs_info->fsck_extent_cache = NULL;
10951                 fs_info->free_extent_hook = NULL;
10952                 fs_info->corrupt_blocks = NULL;
10953                 fs_info->excluded_extents = NULL;
10954         }
10955         free(bits);
10956         free_chunk_cache_tree(&chunk_cache);
10957         free_device_cache_tree(&dev_cache);
10958         free_block_group_tree(&block_group_cache);
10959         free_device_extent_tree(&dev_extent_cache);
10960         free_extent_cache_tree(&seen);
10961         free_extent_cache_tree(&pending);
10962         free_extent_cache_tree(&reada);
10963         free_extent_cache_tree(&nodes);
10964         free_root_item_list(&normal_trees);
10965         free_root_item_list(&dropping_trees);
10966         return ret;
10967 loop:
10968         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10969         free_extent_cache_tree(&seen);
10970         free_extent_cache_tree(&pending);
10971         free_extent_cache_tree(&reada);
10972         free_extent_cache_tree(&nodes);
10973         free_chunk_cache_tree(&chunk_cache);
10974         free_block_group_tree(&block_group_cache);
10975         free_device_cache_tree(&dev_cache);
10976         free_device_extent_tree(&dev_extent_cache);
10977         free_extent_record_cache(&extent_cache);
10978         free_root_item_list(&normal_trees);
10979         free_root_item_list(&dropping_trees);
10980         extent_io_tree_cleanup(&excluded_extents);
10981         goto again;
10982 }
10983
10984 /*
10985  * Check backrefs of a tree block given by @bytenr or @eb.
10986  *
10987  * @root:       the root containing the @bytenr or @eb
10988  * @eb:         tree block extent buffer, can be NULL
10989  * @bytenr:     bytenr of the tree block to search
10990  * @level:      tree level of the tree block
10991  * @owner:      owner of the tree block
10992  *
10993  * Return >0 for any error found and output error message
10994  * Return 0 for no error found
10995  */
10996 static int check_tree_block_ref(struct btrfs_root *root,
10997                                 struct extent_buffer *eb, u64 bytenr,
10998                                 int level, u64 owner)
10999 {
11000         struct btrfs_key key;
11001         struct btrfs_root *extent_root = root->fs_info->extent_root;
11002         struct btrfs_path path;
11003         struct btrfs_extent_item *ei;
11004         struct btrfs_extent_inline_ref *iref;
11005         struct extent_buffer *leaf;
11006         unsigned long end;
11007         unsigned long ptr;
11008         int slot;
11009         int skinny_level;
11010         int type;
11011         u32 nodesize = root->fs_info->nodesize;
11012         u32 item_size;
11013         u64 offset;
11014         int tree_reloc_root = 0;
11015         int found_ref = 0;
11016         int err = 0;
11017         int ret;
11018
11019         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
11020             btrfs_header_bytenr(root->node) == bytenr)
11021                 tree_reloc_root = 1;
11022
11023         btrfs_init_path(&path);
11024         key.objectid = bytenr;
11025         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11026                 key.type = BTRFS_METADATA_ITEM_KEY;
11027         else
11028                 key.type = BTRFS_EXTENT_ITEM_KEY;
11029         key.offset = (u64)-1;
11030
11031         /* Search for the backref in extent tree */
11032         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11033         if (ret < 0) {
11034                 err |= BACKREF_MISSING;
11035                 goto out;
11036         }
11037         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11038         if (ret) {
11039                 err |= BACKREF_MISSING;
11040                 goto out;
11041         }
11042
11043         leaf = path.nodes[0];
11044         slot = path.slots[0];
11045         btrfs_item_key_to_cpu(leaf, &key, slot);
11046
11047         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11048
11049         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11050                 skinny_level = (int)key.offset;
11051                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11052         } else {
11053                 struct btrfs_tree_block_info *info;
11054
11055                 info = (struct btrfs_tree_block_info *)(ei + 1);
11056                 skinny_level = btrfs_tree_block_level(leaf, info);
11057                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11058         }
11059
11060         if (eb) {
11061                 u64 header_gen;
11062                 u64 extent_gen;
11063
11064                 if (!(btrfs_extent_flags(leaf, ei) &
11065                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11066                         error(
11067                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11068                                 key.objectid, nodesize,
11069                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11070                         err = BACKREF_MISMATCH;
11071                 }
11072                 header_gen = btrfs_header_generation(eb);
11073                 extent_gen = btrfs_extent_generation(leaf, ei);
11074                 if (header_gen != extent_gen) {
11075                         error(
11076         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11077                                 key.objectid, nodesize, header_gen,
11078                                 extent_gen);
11079                         err = BACKREF_MISMATCH;
11080                 }
11081                 if (level != skinny_level) {
11082                         error(
11083                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11084                                 key.objectid, nodesize, level, skinny_level);
11085                         err = BACKREF_MISMATCH;
11086                 }
11087                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11088                         error(
11089                         "extent[%llu %u] is referred by other roots than %llu",
11090                                 key.objectid, nodesize, root->objectid);
11091                         err = BACKREF_MISMATCH;
11092                 }
11093         }
11094
11095         /*
11096          * Iterate the extent/metadata item to find the exact backref
11097          */
11098         item_size = btrfs_item_size_nr(leaf, slot);
11099         ptr = (unsigned long)iref;
11100         end = (unsigned long)ei + item_size;
11101         while (ptr < end) {
11102                 iref = (struct btrfs_extent_inline_ref *)ptr;
11103                 type = btrfs_extent_inline_ref_type(leaf, iref);
11104                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11105
11106                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11107                         (offset == root->objectid || offset == owner)) {
11108                         found_ref = 1;
11109                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11110                         /*
11111                          * Backref of tree reloc root points to itself, no need
11112                          * to check backref any more.
11113                          */
11114                         if (tree_reloc_root)
11115                                 found_ref = 1;
11116                         else
11117                         /* Check if the backref points to valid referencer */
11118                                 found_ref = !check_tree_block_ref(root, NULL,
11119                                                 offset, level + 1, owner);
11120                 }
11121
11122                 if (found_ref)
11123                         break;
11124                 ptr += btrfs_extent_inline_ref_size(type);
11125         }
11126
11127         /*
11128          * Inlined extent item doesn't have what we need, check
11129          * TREE_BLOCK_REF_KEY
11130          */
11131         if (!found_ref) {
11132                 btrfs_release_path(&path);
11133                 key.objectid = bytenr;
11134                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11135                 key.offset = root->objectid;
11136
11137                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11138                 if (!ret)
11139                         found_ref = 1;
11140         }
11141         if (!found_ref)
11142                 err |= BACKREF_MISSING;
11143 out:
11144         btrfs_release_path(&path);
11145         if (eb && (err & BACKREF_MISSING))
11146                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
11147                         bytenr, nodesize, owner, level);
11148         return err;
11149 }
11150
11151 /*
11152  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11153  *
11154  * Return >0 any error found and output error message
11155  * Return 0 for no error found
11156  */
11157 static int check_extent_data_item(struct btrfs_root *root,
11158                                   struct extent_buffer *eb, int slot)
11159 {
11160         struct btrfs_file_extent_item *fi;
11161         struct btrfs_path path;
11162         struct btrfs_root *extent_root = root->fs_info->extent_root;
11163         struct btrfs_key fi_key;
11164         struct btrfs_key dbref_key;
11165         struct extent_buffer *leaf;
11166         struct btrfs_extent_item *ei;
11167         struct btrfs_extent_inline_ref *iref;
11168         struct btrfs_extent_data_ref *dref;
11169         u64 owner;
11170         u64 disk_bytenr;
11171         u64 disk_num_bytes;
11172         u64 extent_num_bytes;
11173         u64 extent_flags;
11174         u32 item_size;
11175         unsigned long end;
11176         unsigned long ptr;
11177         int type;
11178         u64 ref_root;
11179         int found_dbackref = 0;
11180         int err = 0;
11181         int ret;
11182
11183         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11184         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11185
11186         /* Nothing to check for hole and inline data extents */
11187         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11188             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11189                 return 0;
11190
11191         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11192         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11193         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11194
11195         /* Check unaligned disk_num_bytes and num_bytes */
11196         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11197                 error(
11198 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11199                         fi_key.objectid, fi_key.offset, disk_num_bytes,
11200                         root->fs_info->sectorsize);
11201                 err |= BYTES_UNALIGNED;
11202         } else {
11203                 data_bytes_allocated += disk_num_bytes;
11204         }
11205         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11206                 error(
11207 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11208                         fi_key.objectid, fi_key.offset, extent_num_bytes,
11209                         root->fs_info->sectorsize);
11210                 err |= BYTES_UNALIGNED;
11211         } else {
11212                 data_bytes_referenced += extent_num_bytes;
11213         }
11214         owner = btrfs_header_owner(eb);
11215
11216         /* Check the extent item of the file extent in extent tree */
11217         btrfs_init_path(&path);
11218         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11219         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11220         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11221
11222         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11223         if (ret)
11224                 goto out;
11225
11226         leaf = path.nodes[0];
11227         slot = path.slots[0];
11228         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11229
11230         extent_flags = btrfs_extent_flags(leaf, ei);
11231
11232         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11233                 error(
11234                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11235                     disk_bytenr, disk_num_bytes,
11236                     BTRFS_EXTENT_FLAG_DATA);
11237                 err |= BACKREF_MISMATCH;
11238         }
11239
11240         /* Check data backref inside that extent item */
11241         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11242         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11243         ptr = (unsigned long)iref;
11244         end = (unsigned long)ei + item_size;
11245         while (ptr < end) {
11246                 iref = (struct btrfs_extent_inline_ref *)ptr;
11247                 type = btrfs_extent_inline_ref_type(leaf, iref);
11248                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11249
11250                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11251                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
11252                         if (ref_root == owner || ref_root == root->objectid)
11253                                 found_dbackref = 1;
11254                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11255                         found_dbackref = !check_tree_block_ref(root, NULL,
11256                                 btrfs_extent_inline_ref_offset(leaf, iref),
11257                                 0, owner);
11258                 }
11259
11260                 if (found_dbackref)
11261                         break;
11262                 ptr += btrfs_extent_inline_ref_size(type);
11263         }
11264
11265         if (!found_dbackref) {
11266                 btrfs_release_path(&path);
11267
11268                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11269                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11270                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11271                 dbref_key.offset = hash_extent_data_ref(root->objectid,
11272                                 fi_key.objectid, fi_key.offset);
11273
11274                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11275                                         &dbref_key, &path, 0, 0);
11276                 if (!ret) {
11277                         found_dbackref = 1;
11278                         goto out;
11279                 }
11280
11281                 btrfs_release_path(&path);
11282
11283                 /*
11284                  * Neither inlined nor EXTENT_DATA_REF found, try
11285                  * SHARED_DATA_REF as last chance.
11286                  */
11287                 dbref_key.objectid = disk_bytenr;
11288                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11289                 dbref_key.offset = eb->start;
11290
11291                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11292                                         &dbref_key, &path, 0, 0);
11293                 if (!ret) {
11294                         found_dbackref = 1;
11295                         goto out;
11296                 }
11297         }
11298
11299 out:
11300         if (!found_dbackref)
11301                 err |= BACKREF_MISSING;
11302         btrfs_release_path(&path);
11303         if (err & BACKREF_MISSING) {
11304                 error("data extent[%llu %llu] backref lost",
11305                       disk_bytenr, disk_num_bytes);
11306         }
11307         return err;
11308 }
11309
11310 /*
11311  * Get real tree block level for the case like shared block
11312  * Return >= 0 as tree level
11313  * Return <0 for error
11314  */
11315 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11316 {
11317         struct extent_buffer *eb;
11318         struct btrfs_path path;
11319         struct btrfs_key key;
11320         struct btrfs_extent_item *ei;
11321         u64 flags;
11322         u64 transid;
11323         u8 backref_level;
11324         u8 header_level;
11325         int ret;
11326
11327         /* Search extent tree for extent generation and level */
11328         key.objectid = bytenr;
11329         key.type = BTRFS_METADATA_ITEM_KEY;
11330         key.offset = (u64)-1;
11331
11332         btrfs_init_path(&path);
11333         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11334         if (ret < 0)
11335                 goto release_out;
11336         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11337         if (ret < 0)
11338                 goto release_out;
11339         if (ret > 0) {
11340                 ret = -ENOENT;
11341                 goto release_out;
11342         }
11343
11344         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11345         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11346                             struct btrfs_extent_item);
11347         flags = btrfs_extent_flags(path.nodes[0], ei);
11348         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11349                 ret = -ENOENT;
11350                 goto release_out;
11351         }
11352
11353         /* Get transid for later read_tree_block() check */
11354         transid = btrfs_extent_generation(path.nodes[0], ei);
11355
11356         /* Get backref level as one source */
11357         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11358                 backref_level = key.offset;
11359         } else {
11360                 struct btrfs_tree_block_info *info;
11361
11362                 info = (struct btrfs_tree_block_info *)(ei + 1);
11363                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11364         }
11365         btrfs_release_path(&path);
11366
11367         /* Get level from tree block as an alternative source */
11368         eb = read_tree_block(fs_info, bytenr, transid);
11369         if (!extent_buffer_uptodate(eb)) {
11370                 free_extent_buffer(eb);
11371                 return -EIO;
11372         }
11373         header_level = btrfs_header_level(eb);
11374         free_extent_buffer(eb);
11375
11376         if (header_level != backref_level)
11377                 return -EIO;
11378         return header_level;
11379
11380 release_out:
11381         btrfs_release_path(&path);
11382         return ret;
11383 }
11384
11385 /*
11386  * Check if a tree block backref is valid (points to a valid tree block)
11387  * if level == -1, level will be resolved
11388  * Return >0 for any error found and print error message
11389  */
11390 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11391                                     u64 bytenr, int level)
11392 {
11393         struct btrfs_root *root;
11394         struct btrfs_key key;
11395         struct btrfs_path path;
11396         struct extent_buffer *eb;
11397         struct extent_buffer *node;
11398         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11399         int err = 0;
11400         int ret;
11401
11402         /* Query level for level == -1 special case */
11403         if (level == -1)
11404                 level = query_tree_block_level(fs_info, bytenr);
11405         if (level < 0) {
11406                 err |= REFERENCER_MISSING;
11407                 goto out;
11408         }
11409
11410         key.objectid = root_id;
11411         key.type = BTRFS_ROOT_ITEM_KEY;
11412         key.offset = (u64)-1;
11413
11414         root = btrfs_read_fs_root(fs_info, &key);
11415         if (IS_ERR(root)) {
11416                 err |= REFERENCER_MISSING;
11417                 goto out;
11418         }
11419
11420         /* Read out the tree block to get item/node key */
11421         eb = read_tree_block(fs_info, bytenr, 0);
11422         if (!extent_buffer_uptodate(eb)) {
11423                 err |= REFERENCER_MISSING;
11424                 free_extent_buffer(eb);
11425                 goto out;
11426         }
11427
11428         /* Empty tree, no need to check key */
11429         if (!btrfs_header_nritems(eb) && !level) {
11430                 free_extent_buffer(eb);
11431                 goto out;
11432         }
11433
11434         if (level)
11435                 btrfs_node_key_to_cpu(eb, &key, 0);
11436         else
11437                 btrfs_item_key_to_cpu(eb, &key, 0);
11438
11439         free_extent_buffer(eb);
11440
11441         btrfs_init_path(&path);
11442         path.lowest_level = level;
11443         /* Search with the first key, to ensure we can reach it */
11444         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11445         if (ret < 0) {
11446                 err |= REFERENCER_MISSING;
11447                 goto release_out;
11448         }
11449
11450         node = path.nodes[level];
11451         if (btrfs_header_bytenr(node) != bytenr) {
11452                 error(
11453         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11454                         bytenr, nodesize, bytenr,
11455                         btrfs_header_bytenr(node));
11456                 err |= REFERENCER_MISMATCH;
11457         }
11458         if (btrfs_header_level(node) != level) {
11459                 error(
11460         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11461                         bytenr, nodesize, level,
11462                         btrfs_header_level(node));
11463                 err |= REFERENCER_MISMATCH;
11464         }
11465
11466 release_out:
11467         btrfs_release_path(&path);
11468 out:
11469         if (err & REFERENCER_MISSING) {
11470                 if (level < 0)
11471                         error("extent [%llu %d] lost referencer (owner: %llu)",
11472                                 bytenr, nodesize, root_id);
11473                 else
11474                         error(
11475                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11476                                 bytenr, nodesize, root_id, level);
11477         }
11478
11479         return err;
11480 }
11481
11482 /*
11483  * Check if tree block @eb is tree reloc root.
11484  * Return 0 if it's not or any problem happens
11485  * Return 1 if it's a tree reloc root
11486  */
11487 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11488                                  struct extent_buffer *eb)
11489 {
11490         struct btrfs_root *tree_reloc_root;
11491         struct btrfs_key key;
11492         u64 bytenr = btrfs_header_bytenr(eb);
11493         u64 owner = btrfs_header_owner(eb);
11494         int ret = 0;
11495
11496         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11497         key.offset = owner;
11498         key.type = BTRFS_ROOT_ITEM_KEY;
11499
11500         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11501         if (IS_ERR(tree_reloc_root))
11502                 return 0;
11503
11504         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11505                 ret = 1;
11506         btrfs_free_fs_root(tree_reloc_root);
11507         return ret;
11508 }
11509
11510 /*
11511  * Check referencer for shared block backref
11512  * If level == -1, this function will resolve the level.
11513  */
11514 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11515                                      u64 parent, u64 bytenr, int level)
11516 {
11517         struct extent_buffer *eb;
11518         u32 nr;
11519         int found_parent = 0;
11520         int i;
11521
11522         eb = read_tree_block(fs_info, parent, 0);
11523         if (!extent_buffer_uptodate(eb))
11524                 goto out;
11525
11526         if (level == -1)
11527                 level = query_tree_block_level(fs_info, bytenr);
11528         if (level < 0)
11529                 goto out;
11530
11531         /* It's possible it's a tree reloc root */
11532         if (parent == bytenr) {
11533                 if (is_tree_reloc_root(fs_info, eb))
11534                         found_parent = 1;
11535                 goto out;
11536         }
11537
11538         if (level + 1 != btrfs_header_level(eb))
11539                 goto out;
11540
11541         nr = btrfs_header_nritems(eb);
11542         for (i = 0; i < nr; i++) {
11543                 if (bytenr == btrfs_node_blockptr(eb, i)) {
11544                         found_parent = 1;
11545                         break;
11546                 }
11547         }
11548 out:
11549         free_extent_buffer(eb);
11550         if (!found_parent) {
11551                 error(
11552         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11553                         bytenr, fs_info->nodesize, parent, level);
11554                 return REFERENCER_MISSING;
11555         }
11556         return 0;
11557 }
11558
11559 /*
11560  * Check referencer for normal (inlined) data ref
11561  * If len == 0, it will be resolved by searching in extent tree
11562  */
11563 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11564                                      u64 root_id, u64 objectid, u64 offset,
11565                                      u64 bytenr, u64 len, u32 count)
11566 {
11567         struct btrfs_root *root;
11568         struct btrfs_root *extent_root = fs_info->extent_root;
11569         struct btrfs_key key;
11570         struct btrfs_path path;
11571         struct extent_buffer *leaf;
11572         struct btrfs_file_extent_item *fi;
11573         u32 found_count = 0;
11574         int slot;
11575         int ret = 0;
11576
11577         if (!len) {
11578                 key.objectid = bytenr;
11579                 key.type = BTRFS_EXTENT_ITEM_KEY;
11580                 key.offset = (u64)-1;
11581
11582                 btrfs_init_path(&path);
11583                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11584                 if (ret < 0)
11585                         goto out;
11586                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11587                 if (ret)
11588                         goto out;
11589                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11590                 if (key.objectid != bytenr ||
11591                     key.type != BTRFS_EXTENT_ITEM_KEY)
11592                         goto out;
11593                 len = key.offset;
11594                 btrfs_release_path(&path);
11595         }
11596         key.objectid = root_id;
11597         key.type = BTRFS_ROOT_ITEM_KEY;
11598         key.offset = (u64)-1;
11599         btrfs_init_path(&path);
11600
11601         root = btrfs_read_fs_root(fs_info, &key);
11602         if (IS_ERR(root))
11603                 goto out;
11604
11605         key.objectid = objectid;
11606         key.type = BTRFS_EXTENT_DATA_KEY;
11607         /*
11608          * It can be nasty as data backref offset is
11609          * file offset - file extent offset, which is smaller or
11610          * equal to original backref offset.  The only special case is
11611          * overflow.  So we need to special check and do further search.
11612          */
11613         key.offset = offset & (1ULL << 63) ? 0 : offset;
11614
11615         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11616         if (ret < 0)
11617                 goto out;
11618
11619         /*
11620          * Search afterwards to get correct one
11621          * NOTE: As we must do a comprehensive check on the data backref to
11622          * make sure the dref count also matches, we must iterate all file
11623          * extents for that inode.
11624          */
11625         while (1) {
11626                 leaf = path.nodes[0];
11627                 slot = path.slots[0];
11628
11629                 if (slot >= btrfs_header_nritems(leaf))
11630                         goto next;
11631                 btrfs_item_key_to_cpu(leaf, &key, slot);
11632                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11633                         break;
11634                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11635                 /*
11636                  * Except normal disk bytenr and disk num bytes, we still
11637                  * need to do extra check on dbackref offset as
11638                  * dbackref offset = file_offset - file_extent_offset
11639                  */
11640                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11641                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11642                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11643                     offset)
11644                         found_count++;
11645
11646 next:
11647                 ret = btrfs_next_item(root, &path);
11648                 if (ret)
11649                         break;
11650         }
11651 out:
11652         btrfs_release_path(&path);
11653         if (found_count != count) {
11654                 error(
11655 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11656                         bytenr, len, root_id, objectid, offset, count, found_count);
11657                 return REFERENCER_MISSING;
11658         }
11659         return 0;
11660 }
11661
11662 /*
11663  * Check if the referencer of a shared data backref exists
11664  */
11665 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11666                                      u64 parent, u64 bytenr)
11667 {
11668         struct extent_buffer *eb;
11669         struct btrfs_key key;
11670         struct btrfs_file_extent_item *fi;
11671         u32 nr;
11672         int found_parent = 0;
11673         int i;
11674
11675         eb = read_tree_block(fs_info, parent, 0);
11676         if (!extent_buffer_uptodate(eb))
11677                 goto out;
11678
11679         nr = btrfs_header_nritems(eb);
11680         for (i = 0; i < nr; i++) {
11681                 btrfs_item_key_to_cpu(eb, &key, i);
11682                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11683                         continue;
11684
11685                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11686                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11687                         continue;
11688
11689                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11690                         found_parent = 1;
11691                         break;
11692                 }
11693         }
11694
11695 out:
11696         free_extent_buffer(eb);
11697         if (!found_parent) {
11698                 error("shared extent %llu referencer lost (parent: %llu)",
11699                         bytenr, parent);
11700                 return REFERENCER_MISSING;
11701         }
11702         return 0;
11703 }
11704
11705 /*
11706  * This function will check a given extent item, including its backref and
11707  * itself (like crossing stripe boundary and type)
11708  *
11709  * Since we don't use extent_record anymore, introduce new error bit
11710  */
11711 static int check_extent_item(struct btrfs_fs_info *fs_info,
11712                              struct extent_buffer *eb, int slot)
11713 {
11714         struct btrfs_extent_item *ei;
11715         struct btrfs_extent_inline_ref *iref;
11716         struct btrfs_extent_data_ref *dref;
11717         unsigned long end;
11718         unsigned long ptr;
11719         int type;
11720         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11721         u32 item_size = btrfs_item_size_nr(eb, slot);
11722         u64 flags;
11723         u64 offset;
11724         int metadata = 0;
11725         int level;
11726         struct btrfs_key key;
11727         int ret;
11728         int err = 0;
11729
11730         btrfs_item_key_to_cpu(eb, &key, slot);
11731         if (key.type == BTRFS_EXTENT_ITEM_KEY)
11732                 bytes_used += key.offset;
11733         else
11734                 bytes_used += nodesize;
11735
11736         if (item_size < sizeof(*ei)) {
11737                 /*
11738                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11739                  * old thing when on disk format is still un-determined.
11740                  * No need to care about it anymore
11741                  */
11742                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11743                 return -ENOTTY;
11744         }
11745
11746         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11747         flags = btrfs_extent_flags(eb, ei);
11748
11749         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11750                 metadata = 1;
11751         if (metadata && check_crossing_stripes(global_info, key.objectid,
11752                                                eb->len)) {
11753                 error("bad metadata [%llu, %llu) crossing stripe boundary",
11754                       key.objectid, key.objectid + nodesize);
11755                 err |= CROSSING_STRIPE_BOUNDARY;
11756         }
11757
11758         ptr = (unsigned long)(ei + 1);
11759
11760         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11761                 /* Old EXTENT_ITEM metadata */
11762                 struct btrfs_tree_block_info *info;
11763
11764                 info = (struct btrfs_tree_block_info *)ptr;
11765                 level = btrfs_tree_block_level(eb, info);
11766                 ptr += sizeof(struct btrfs_tree_block_info);
11767         } else {
11768                 /* New METADATA_ITEM */
11769                 level = key.offset;
11770         }
11771         end = (unsigned long)ei + item_size;
11772
11773 next:
11774         /* Reached extent item end normally */
11775         if (ptr == end)
11776                 goto out;
11777
11778         /* Beyond extent item end, wrong item size */
11779         if (ptr > end) {
11780                 err |= ITEM_SIZE_MISMATCH;
11781                 error("extent item at bytenr %llu slot %d has wrong size",
11782                         eb->start, slot);
11783                 goto out;
11784         }
11785
11786         /* Now check every backref in this extent item */
11787         iref = (struct btrfs_extent_inline_ref *)ptr;
11788         type = btrfs_extent_inline_ref_type(eb, iref);
11789         offset = btrfs_extent_inline_ref_offset(eb, iref);
11790         switch (type) {
11791         case BTRFS_TREE_BLOCK_REF_KEY:
11792                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11793                                                level);
11794                 err |= ret;
11795                 break;
11796         case BTRFS_SHARED_BLOCK_REF_KEY:
11797                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11798                                                  level);
11799                 err |= ret;
11800                 break;
11801         case BTRFS_EXTENT_DATA_REF_KEY:
11802                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11803                 ret = check_extent_data_backref(fs_info,
11804                                 btrfs_extent_data_ref_root(eb, dref),
11805                                 btrfs_extent_data_ref_objectid(eb, dref),
11806                                 btrfs_extent_data_ref_offset(eb, dref),
11807                                 key.objectid, key.offset,
11808                                 btrfs_extent_data_ref_count(eb, dref));
11809                 err |= ret;
11810                 break;
11811         case BTRFS_SHARED_DATA_REF_KEY:
11812                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11813                 err |= ret;
11814                 break;
11815         default:
11816                 error("extent[%llu %d %llu] has unknown ref type: %d",
11817                         key.objectid, key.type, key.offset, type);
11818                 err |= UNKNOWN_TYPE;
11819                 goto out;
11820         }
11821
11822         ptr += btrfs_extent_inline_ref_size(type);
11823         goto next;
11824
11825 out:
11826         return err;
11827 }
11828
11829 /*
11830  * Check if a dev extent item is referred correctly by its chunk
11831  */
11832 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11833                                  struct extent_buffer *eb, int slot)
11834 {
11835         struct btrfs_root *chunk_root = fs_info->chunk_root;
11836         struct btrfs_dev_extent *ptr;
11837         struct btrfs_path path;
11838         struct btrfs_key chunk_key;
11839         struct btrfs_key devext_key;
11840         struct btrfs_chunk *chunk;
11841         struct extent_buffer *l;
11842         int num_stripes;
11843         u64 length;
11844         int i;
11845         int found_chunk = 0;
11846         int ret;
11847
11848         btrfs_item_key_to_cpu(eb, &devext_key, slot);
11849         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11850         length = btrfs_dev_extent_length(eb, ptr);
11851
11852         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11853         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11854         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11855
11856         btrfs_init_path(&path);
11857         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11858         if (ret)
11859                 goto out;
11860
11861         l = path.nodes[0];
11862         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11863         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11864                                       chunk_key.offset);
11865         if (ret < 0)
11866                 goto out;
11867
11868         if (btrfs_stripe_length(fs_info, l, chunk) != length)
11869                 goto out;
11870
11871         num_stripes = btrfs_chunk_num_stripes(l, chunk);
11872         for (i = 0; i < num_stripes; i++) {
11873                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11874                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11875
11876                 if (devid == devext_key.objectid &&
11877                     offset == devext_key.offset) {
11878                         found_chunk = 1;
11879                         break;
11880                 }
11881         }
11882 out:
11883         btrfs_release_path(&path);
11884         if (!found_chunk) {
11885                 error(
11886                 "device extent[%llu, %llu, %llu] did not find the related chunk",
11887                         devext_key.objectid, devext_key.offset, length);
11888                 return REFERENCER_MISSING;
11889         }
11890         return 0;
11891 }
11892
11893 /*
11894  * Check if the used space is correct with the dev item
11895  */
11896 static int check_dev_item(struct btrfs_fs_info *fs_info,
11897                           struct extent_buffer *eb, int slot)
11898 {
11899         struct btrfs_root *dev_root = fs_info->dev_root;
11900         struct btrfs_dev_item *dev_item;
11901         struct btrfs_path path;
11902         struct btrfs_key key;
11903         struct btrfs_dev_extent *ptr;
11904         u64 dev_id;
11905         u64 used;
11906         u64 total = 0;
11907         int ret;
11908
11909         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11910         dev_id = btrfs_device_id(eb, dev_item);
11911         used = btrfs_device_bytes_used(eb, dev_item);
11912
11913         key.objectid = dev_id;
11914         key.type = BTRFS_DEV_EXTENT_KEY;
11915         key.offset = 0;
11916
11917         btrfs_init_path(&path);
11918         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11919         if (ret < 0) {
11920                 btrfs_item_key_to_cpu(eb, &key, slot);
11921                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11922                         key.objectid, key.type, key.offset);
11923                 btrfs_release_path(&path);
11924                 return REFERENCER_MISSING;
11925         }
11926
11927         /* Iterate dev_extents to calculate the used space of a device */
11928         while (1) {
11929                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11930                         goto next;
11931
11932                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11933                 if (key.objectid > dev_id)
11934                         break;
11935                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11936                         goto next;
11937
11938                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11939                                      struct btrfs_dev_extent);
11940                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11941 next:
11942                 ret = btrfs_next_item(dev_root, &path);
11943                 if (ret)
11944                         break;
11945         }
11946         btrfs_release_path(&path);
11947
11948         if (used != total) {
11949                 btrfs_item_key_to_cpu(eb, &key, slot);
11950                 error(
11951 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11952                         total, used, BTRFS_ROOT_TREE_OBJECTID,
11953                         BTRFS_DEV_EXTENT_KEY, dev_id);
11954                 return ACCOUNTING_MISMATCH;
11955         }
11956         return 0;
11957 }
11958
11959 /*
11960  * Check a block group item with its referener (chunk) and its used space
11961  * with extent/metadata item
11962  */
11963 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11964                                   struct extent_buffer *eb, int slot)
11965 {
11966         struct btrfs_root *extent_root = fs_info->extent_root;
11967         struct btrfs_root *chunk_root = fs_info->chunk_root;
11968         struct btrfs_block_group_item *bi;
11969         struct btrfs_block_group_item bg_item;
11970         struct btrfs_path path;
11971         struct btrfs_key bg_key;
11972         struct btrfs_key chunk_key;
11973         struct btrfs_key extent_key;
11974         struct btrfs_chunk *chunk;
11975         struct extent_buffer *leaf;
11976         struct btrfs_extent_item *ei;
11977         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11978         u64 flags;
11979         u64 bg_flags;
11980         u64 used;
11981         u64 total = 0;
11982         int ret;
11983         int err = 0;
11984
11985         btrfs_item_key_to_cpu(eb, &bg_key, slot);
11986         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11987         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11988         used = btrfs_block_group_used(&bg_item);
11989         bg_flags = btrfs_block_group_flags(&bg_item);
11990
11991         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11992         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11993         chunk_key.offset = bg_key.objectid;
11994
11995         btrfs_init_path(&path);
11996         /* Search for the referencer chunk */
11997         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11998         if (ret) {
11999                 error(
12000                 "block group[%llu %llu] did not find the related chunk item",
12001                         bg_key.objectid, bg_key.offset);
12002                 err |= REFERENCER_MISSING;
12003         } else {
12004                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12005                                         struct btrfs_chunk);
12006                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12007                                                 bg_key.offset) {
12008                         error(
12009         "block group[%llu %llu] related chunk item length does not match",
12010                                 bg_key.objectid, bg_key.offset);
12011                         err |= REFERENCER_MISMATCH;
12012                 }
12013         }
12014         btrfs_release_path(&path);
12015
12016         /* Search from the block group bytenr */
12017         extent_key.objectid = bg_key.objectid;
12018         extent_key.type = 0;
12019         extent_key.offset = 0;
12020
12021         btrfs_init_path(&path);
12022         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12023         if (ret < 0)
12024                 goto out;
12025
12026         /* Iterate extent tree to account used space */
12027         while (1) {
12028                 leaf = path.nodes[0];
12029
12030                 /* Search slot can point to the last item beyond leaf nritems */
12031                 if (path.slots[0] >= btrfs_header_nritems(leaf))
12032                         goto next;
12033
12034                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12035                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12036                         break;
12037
12038                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12039                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12040                         goto next;
12041                 if (extent_key.objectid < bg_key.objectid)
12042                         goto next;
12043
12044                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12045                         total += nodesize;
12046                 else
12047                         total += extent_key.offset;
12048
12049                 ei = btrfs_item_ptr(leaf, path.slots[0],
12050                                     struct btrfs_extent_item);
12051                 flags = btrfs_extent_flags(leaf, ei);
12052                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12053                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12054                                 error(
12055                         "bad extent[%llu, %llu) type mismatch with chunk",
12056                                         extent_key.objectid,
12057                                         extent_key.objectid + extent_key.offset);
12058                                 err |= CHUNK_TYPE_MISMATCH;
12059                         }
12060                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12061                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12062                                     BTRFS_BLOCK_GROUP_METADATA))) {
12063                                 error(
12064                         "bad extent[%llu, %llu) type mismatch with chunk",
12065                                         extent_key.objectid,
12066                                         extent_key.objectid + nodesize);
12067                                 err |= CHUNK_TYPE_MISMATCH;
12068                         }
12069                 }
12070 next:
12071                 ret = btrfs_next_item(extent_root, &path);
12072                 if (ret)
12073                         break;
12074         }
12075
12076 out:
12077         btrfs_release_path(&path);
12078
12079         if (total != used) {
12080                 error(
12081                 "block group[%llu %llu] used %llu but extent items used %llu",
12082                         bg_key.objectid, bg_key.offset, used, total);
12083                 err |= ACCOUNTING_MISMATCH;
12084         }
12085         return err;
12086 }
12087
12088 /*
12089  * Check a chunk item.
12090  * Including checking all referred dev_extents and block group
12091  */
12092 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12093                             struct extent_buffer *eb, int slot)
12094 {
12095         struct btrfs_root *extent_root = fs_info->extent_root;
12096         struct btrfs_root *dev_root = fs_info->dev_root;
12097         struct btrfs_path path;
12098         struct btrfs_key chunk_key;
12099         struct btrfs_key bg_key;
12100         struct btrfs_key devext_key;
12101         struct btrfs_chunk *chunk;
12102         struct extent_buffer *leaf;
12103         struct btrfs_block_group_item *bi;
12104         struct btrfs_block_group_item bg_item;
12105         struct btrfs_dev_extent *ptr;
12106         u64 length;
12107         u64 chunk_end;
12108         u64 stripe_len;
12109         u64 type;
12110         int num_stripes;
12111         u64 offset;
12112         u64 objectid;
12113         int i;
12114         int ret;
12115         int err = 0;
12116
12117         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12118         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12119         length = btrfs_chunk_length(eb, chunk);
12120         chunk_end = chunk_key.offset + length;
12121         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12122                                       chunk_key.offset);
12123         if (ret < 0) {
12124                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12125                         chunk_end);
12126                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12127                 goto out;
12128         }
12129         type = btrfs_chunk_type(eb, chunk);
12130
12131         bg_key.objectid = chunk_key.offset;
12132         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12133         bg_key.offset = length;
12134
12135         btrfs_init_path(&path);
12136         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12137         if (ret) {
12138                 error(
12139                 "chunk[%llu %llu) did not find the related block group item",
12140                         chunk_key.offset, chunk_end);
12141                 err |= REFERENCER_MISSING;
12142         } else{
12143                 leaf = path.nodes[0];
12144                 bi = btrfs_item_ptr(leaf, path.slots[0],
12145                                     struct btrfs_block_group_item);
12146                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12147                                    sizeof(bg_item));
12148                 if (btrfs_block_group_flags(&bg_item) != type) {
12149                         error(
12150 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12151                                 chunk_key.offset, chunk_end, type,
12152                                 btrfs_block_group_flags(&bg_item));
12153                         err |= REFERENCER_MISSING;
12154                 }
12155         }
12156
12157         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12158         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12159         for (i = 0; i < num_stripes; i++) {
12160                 btrfs_release_path(&path);
12161                 btrfs_init_path(&path);
12162                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12163                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12164                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12165
12166                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12167                                         0, 0);
12168                 if (ret)
12169                         goto not_match_dev;
12170
12171                 leaf = path.nodes[0];
12172                 ptr = btrfs_item_ptr(leaf, path.slots[0],
12173                                      struct btrfs_dev_extent);
12174                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12175                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12176                 if (objectid != chunk_key.objectid ||
12177                     offset != chunk_key.offset ||
12178                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12179                         goto not_match_dev;
12180                 continue;
12181 not_match_dev:
12182                 err |= BACKREF_MISSING;
12183                 error(
12184                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12185                         chunk_key.objectid, chunk_end, i);
12186                 continue;
12187         }
12188         btrfs_release_path(&path);
12189 out:
12190         return err;
12191 }
12192
12193 /*
12194  * Main entry function to check known items and update related accounting info
12195  */
12196 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
12197 {
12198         struct btrfs_fs_info *fs_info = root->fs_info;
12199         struct btrfs_key key;
12200         int slot = 0;
12201         int type;
12202         struct btrfs_extent_data_ref *dref;
12203         int ret;
12204         int err = 0;
12205
12206 next:
12207         btrfs_item_key_to_cpu(eb, &key, slot);
12208         type = key.type;
12209
12210         switch (type) {
12211         case BTRFS_EXTENT_DATA_KEY:
12212                 ret = check_extent_data_item(root, eb, slot);
12213                 err |= ret;
12214                 break;
12215         case BTRFS_BLOCK_GROUP_ITEM_KEY:
12216                 ret = check_block_group_item(fs_info, eb, slot);
12217                 err |= ret;
12218                 break;
12219         case BTRFS_DEV_ITEM_KEY:
12220                 ret = check_dev_item(fs_info, eb, slot);
12221                 err |= ret;
12222                 break;
12223         case BTRFS_CHUNK_ITEM_KEY:
12224                 ret = check_chunk_item(fs_info, eb, slot);
12225                 err |= ret;
12226                 break;
12227         case BTRFS_DEV_EXTENT_KEY:
12228                 ret = check_dev_extent_item(fs_info, eb, slot);
12229                 err |= ret;
12230                 break;
12231         case BTRFS_EXTENT_ITEM_KEY:
12232         case BTRFS_METADATA_ITEM_KEY:
12233                 ret = check_extent_item(fs_info, eb, slot);
12234                 err |= ret;
12235                 break;
12236         case BTRFS_EXTENT_CSUM_KEY:
12237                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12238                 break;
12239         case BTRFS_TREE_BLOCK_REF_KEY:
12240                 ret = check_tree_block_backref(fs_info, key.offset,
12241                                                key.objectid, -1);
12242                 err |= ret;
12243                 break;
12244         case BTRFS_EXTENT_DATA_REF_KEY:
12245                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12246                 ret = check_extent_data_backref(fs_info,
12247                                 btrfs_extent_data_ref_root(eb, dref),
12248                                 btrfs_extent_data_ref_objectid(eb, dref),
12249                                 btrfs_extent_data_ref_offset(eb, dref),
12250                                 key.objectid, 0,
12251                                 btrfs_extent_data_ref_count(eb, dref));
12252                 err |= ret;
12253                 break;
12254         case BTRFS_SHARED_BLOCK_REF_KEY:
12255                 ret = check_shared_block_backref(fs_info, key.offset,
12256                                                  key.objectid, -1);
12257                 err |= ret;
12258                 break;
12259         case BTRFS_SHARED_DATA_REF_KEY:
12260                 ret = check_shared_data_backref(fs_info, key.offset,
12261                                                 key.objectid);
12262                 err |= ret;
12263                 break;
12264         default:
12265                 break;
12266         }
12267
12268         if (++slot < btrfs_header_nritems(eb))
12269                 goto next;
12270
12271         return err;
12272 }
12273
12274 /*
12275  * Helper function for later fs/subvol tree check.  To determine if a tree
12276  * block should be checked.
12277  * This function will ensure only the direct referencer with lowest rootid to
12278  * check a fs/subvolume tree block.
12279  *
12280  * Backref check at extent tree would detect errors like missing subvolume
12281  * tree, so we can do aggressive check to reduce duplicated checks.
12282  */
12283 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
12284 {
12285         struct btrfs_root *extent_root = root->fs_info->extent_root;
12286         struct btrfs_key key;
12287         struct btrfs_path path;
12288         struct extent_buffer *leaf;
12289         int slot;
12290         struct btrfs_extent_item *ei;
12291         unsigned long ptr;
12292         unsigned long end;
12293         int type;
12294         u32 item_size;
12295         u64 offset;
12296         struct btrfs_extent_inline_ref *iref;
12297         int ret;
12298
12299         btrfs_init_path(&path);
12300         key.objectid = btrfs_header_bytenr(eb);
12301         key.type = BTRFS_METADATA_ITEM_KEY;
12302         key.offset = (u64)-1;
12303
12304         /*
12305          * Any failure in backref resolving means we can't determine
12306          * whom the tree block belongs to.
12307          * So in that case, we need to check that tree block
12308          */
12309         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12310         if (ret < 0)
12311                 goto need_check;
12312
12313         ret = btrfs_previous_extent_item(extent_root, &path,
12314                                          btrfs_header_bytenr(eb));
12315         if (ret)
12316                 goto need_check;
12317
12318         leaf = path.nodes[0];
12319         slot = path.slots[0];
12320         btrfs_item_key_to_cpu(leaf, &key, slot);
12321         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12322
12323         if (key.type == BTRFS_METADATA_ITEM_KEY) {
12324                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12325         } else {
12326                 struct btrfs_tree_block_info *info;
12327
12328                 info = (struct btrfs_tree_block_info *)(ei + 1);
12329                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
12330         }
12331
12332         item_size = btrfs_item_size_nr(leaf, slot);
12333         ptr = (unsigned long)iref;
12334         end = (unsigned long)ei + item_size;
12335         while (ptr < end) {
12336                 iref = (struct btrfs_extent_inline_ref *)ptr;
12337                 type = btrfs_extent_inline_ref_type(leaf, iref);
12338                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
12339
12340                 /*
12341                  * We only check the tree block if current root is
12342                  * the lowest referencer of it.
12343                  */
12344                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
12345                     offset < root->objectid) {
12346                         btrfs_release_path(&path);
12347                         return 0;
12348                 }
12349
12350                 ptr += btrfs_extent_inline_ref_size(type);
12351         }
12352         /*
12353          * Normally we should also check keyed tree block ref, but that may be
12354          * very time consuming.  Inlined ref should already make us skip a lot
12355          * of refs now.  So skip search keyed tree block ref.
12356          */
12357
12358 need_check:
12359         btrfs_release_path(&path);
12360         return 1;
12361 }
12362
12363 /*
12364  * Traversal function for tree block. We will do:
12365  * 1) Skip shared fs/subvolume tree blocks
12366  * 2) Update related bytes accounting
12367  * 3) Pre-order traversal
12368  */
12369 static int traverse_tree_block(struct btrfs_root *root,
12370                                 struct extent_buffer *node)
12371 {
12372         struct extent_buffer *eb;
12373         struct btrfs_key key;
12374         struct btrfs_key drop_key;
12375         int level;
12376         u64 nr;
12377         int i;
12378         int err = 0;
12379         int ret;
12380
12381         /*
12382          * Skip shared fs/subvolume tree block, in that case they will
12383          * be checked by referencer with lowest rootid
12384          */
12385         if (is_fstree(root->objectid) && !should_check(root, node))
12386                 return 0;
12387
12388         /* Update bytes accounting */
12389         total_btree_bytes += node->len;
12390         if (fs_root_objectid(btrfs_header_owner(node)))
12391                 total_fs_tree_bytes += node->len;
12392         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
12393                 total_extent_tree_bytes += node->len;
12394
12395         /* pre-order tranversal, check itself first */
12396         level = btrfs_header_level(node);
12397         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
12398                                    btrfs_header_level(node),
12399                                    btrfs_header_owner(node));
12400         err |= ret;
12401         if (err)
12402                 error(
12403         "check %s failed root %llu bytenr %llu level %d, force continue check",
12404                         level ? "node":"leaf", root->objectid,
12405                         btrfs_header_bytenr(node), btrfs_header_level(node));
12406
12407         if (!level) {
12408                 btree_space_waste += btrfs_leaf_free_space(root, node);
12409                 ret = check_leaf_items(root, node);
12410                 err |= ret;
12411                 return err;
12412         }
12413
12414         nr = btrfs_header_nritems(node);
12415         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
12416         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
12417                 sizeof(struct btrfs_key_ptr);
12418
12419         /* Then check all its children */
12420         for (i = 0; i < nr; i++) {
12421                 u64 blocknr = btrfs_node_blockptr(node, i);
12422
12423                 btrfs_node_key_to_cpu(node, &key, i);
12424                 if (level == root->root_item.drop_level &&
12425                     is_dropped_key(&key, &drop_key))
12426                         continue;
12427
12428                 /*
12429                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
12430                  * to call the function itself.
12431                  */
12432                 eb = read_tree_block(root->fs_info, blocknr, 0);
12433                 if (extent_buffer_uptodate(eb)) {
12434                         ret = traverse_tree_block(root, eb);
12435                         err |= ret;
12436                 }
12437                 free_extent_buffer(eb);
12438         }
12439
12440         return err;
12441 }
12442
12443 /*
12444  * Low memory usage version check_chunks_and_extents.
12445  */
12446 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12447 {
12448         struct btrfs_path path;
12449         struct btrfs_key key;
12450         struct btrfs_root *root1;
12451         struct btrfs_root *root;
12452         struct btrfs_root *cur_root;
12453         int err = 0;
12454         int ret;
12455
12456         root = fs_info->fs_root;
12457
12458         root1 = root->fs_info->chunk_root;
12459         ret = traverse_tree_block(root1, root1->node);
12460         err |= ret;
12461
12462         root1 = root->fs_info->tree_root;
12463         ret = traverse_tree_block(root1, root1->node);
12464         err |= ret;
12465
12466         btrfs_init_path(&path);
12467         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12468         key.offset = 0;
12469         key.type = BTRFS_ROOT_ITEM_KEY;
12470
12471         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12472         if (ret) {
12473                 error("cannot find extent treet in tree_root");
12474                 goto out;
12475         }
12476
12477         while (1) {
12478                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12479                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12480                         goto next;
12481                 key.offset = (u64)-1;
12482
12483                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12484                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12485                                         &key);
12486                 else
12487                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
12488                 if (IS_ERR(cur_root) || !cur_root) {
12489                         error("failed to read tree: %lld", key.objectid);
12490                         goto next;
12491                 }
12492
12493                 ret = traverse_tree_block(cur_root, cur_root->node);
12494                 err |= ret;
12495
12496                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12497                         btrfs_free_fs_root(cur_root);
12498 next:
12499                 ret = btrfs_next_item(root1, &path);
12500                 if (ret)
12501                         goto out;
12502         }
12503
12504 out:
12505         btrfs_release_path(&path);
12506         return err;
12507 }
12508
12509 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12510 {
12511         int ret;
12512
12513         if (!ctx.progress_enabled)
12514                 fprintf(stderr, "checking extents\n");
12515         if (check_mode == CHECK_MODE_LOWMEM)
12516                 ret = check_chunks_and_extents_v2(fs_info);
12517         else
12518                 ret = check_chunks_and_extents(fs_info);
12519
12520         return ret;
12521 }
12522
12523 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12524                            struct btrfs_root *root, int overwrite)
12525 {
12526         struct extent_buffer *c;
12527         struct extent_buffer *old = root->node;
12528         int level;
12529         int ret;
12530         struct btrfs_disk_key disk_key = {0,0,0};
12531
12532         level = 0;
12533
12534         if (overwrite) {
12535                 c = old;
12536                 extent_buffer_get(c);
12537                 goto init;
12538         }
12539         c = btrfs_alloc_free_block(trans, root,
12540                                    root->fs_info->nodesize,
12541                                    root->root_key.objectid,
12542                                    &disk_key, level, 0, 0);
12543         if (IS_ERR(c)) {
12544                 c = old;
12545                 extent_buffer_get(c);
12546                 overwrite = 1;
12547         }
12548 init:
12549         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12550         btrfs_set_header_level(c, level);
12551         btrfs_set_header_bytenr(c, c->start);
12552         btrfs_set_header_generation(c, trans->transid);
12553         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12554         btrfs_set_header_owner(c, root->root_key.objectid);
12555
12556         write_extent_buffer(c, root->fs_info->fsid,
12557                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
12558
12559         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12560                             btrfs_header_chunk_tree_uuid(c),
12561                             BTRFS_UUID_SIZE);
12562
12563         btrfs_mark_buffer_dirty(c);
12564         /*
12565          * this case can happen in the following case:
12566          *
12567          * 1.overwrite previous root.
12568          *
12569          * 2.reinit reloc data root, this is because we skip pin
12570          * down reloc data tree before which means we can allocate
12571          * same block bytenr here.
12572          */
12573         if (old->start == c->start) {
12574                 btrfs_set_root_generation(&root->root_item,
12575                                           trans->transid);
12576                 root->root_item.level = btrfs_header_level(root->node);
12577                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12578                                         &root->root_key, &root->root_item);
12579                 if (ret) {
12580                         free_extent_buffer(c);
12581                         return ret;
12582                 }
12583         }
12584         free_extent_buffer(old);
12585         root->node = c;
12586         add_root_to_dirty_list(root);
12587         return 0;
12588 }
12589
12590 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12591                                 struct extent_buffer *eb, int tree_root)
12592 {
12593         struct extent_buffer *tmp;
12594         struct btrfs_root_item *ri;
12595         struct btrfs_key key;
12596         u64 bytenr;
12597         int level = btrfs_header_level(eb);
12598         int nritems;
12599         int ret;
12600         int i;
12601
12602         /*
12603          * If we have pinned this block before, don't pin it again.
12604          * This can not only avoid forever loop with broken filesystem
12605          * but also give us some speedups.
12606          */
12607         if (test_range_bit(&fs_info->pinned_extents, eb->start,
12608                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12609                 return 0;
12610
12611         btrfs_pin_extent(fs_info, eb->start, eb->len);
12612
12613         nritems = btrfs_header_nritems(eb);
12614         for (i = 0; i < nritems; i++) {
12615                 if (level == 0) {
12616                         btrfs_item_key_to_cpu(eb, &key, i);
12617                         if (key.type != BTRFS_ROOT_ITEM_KEY)
12618                                 continue;
12619                         /* Skip the extent root and reloc roots */
12620                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12621                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12622                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12623                                 continue;
12624                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12625                         bytenr = btrfs_disk_root_bytenr(eb, ri);
12626
12627                         /*
12628                          * If at any point we start needing the real root we
12629                          * will have to build a stump root for the root we are
12630                          * in, but for now this doesn't actually use the root so
12631                          * just pass in extent_root.
12632                          */
12633                         tmp = read_tree_block(fs_info, bytenr, 0);
12634                         if (!extent_buffer_uptodate(tmp)) {
12635                                 fprintf(stderr, "Error reading root block\n");
12636                                 return -EIO;
12637                         }
12638                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
12639                         free_extent_buffer(tmp);
12640                         if (ret)
12641                                 return ret;
12642                 } else {
12643                         bytenr = btrfs_node_blockptr(eb, i);
12644
12645                         /* If we aren't the tree root don't read the block */
12646                         if (level == 1 && !tree_root) {
12647                                 btrfs_pin_extent(fs_info, bytenr,
12648                                                 fs_info->nodesize);
12649                                 continue;
12650                         }
12651
12652                         tmp = read_tree_block(fs_info, bytenr, 0);
12653                         if (!extent_buffer_uptodate(tmp)) {
12654                                 fprintf(stderr, "Error reading tree block\n");
12655                                 return -EIO;
12656                         }
12657                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12658                         free_extent_buffer(tmp);
12659                         if (ret)
12660                                 return ret;
12661                 }
12662         }
12663
12664         return 0;
12665 }
12666
12667 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12668 {
12669         int ret;
12670
12671         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12672         if (ret)
12673                 return ret;
12674
12675         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12676 }
12677
12678 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12679 {
12680         struct btrfs_block_group_cache *cache;
12681         struct btrfs_path path;
12682         struct extent_buffer *leaf;
12683         struct btrfs_chunk *chunk;
12684         struct btrfs_key key;
12685         int ret;
12686         u64 start;
12687
12688         btrfs_init_path(&path);
12689         key.objectid = 0;
12690         key.type = BTRFS_CHUNK_ITEM_KEY;
12691         key.offset = 0;
12692         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12693         if (ret < 0) {
12694                 btrfs_release_path(&path);
12695                 return ret;
12696         }
12697
12698         /*
12699          * We do this in case the block groups were screwed up and had alloc
12700          * bits that aren't actually set on the chunks.  This happens with
12701          * restored images every time and could happen in real life I guess.
12702          */
12703         fs_info->avail_data_alloc_bits = 0;
12704         fs_info->avail_metadata_alloc_bits = 0;
12705         fs_info->avail_system_alloc_bits = 0;
12706
12707         /* First we need to create the in-memory block groups */
12708         while (1) {
12709                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12710                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12711                         if (ret < 0) {
12712                                 btrfs_release_path(&path);
12713                                 return ret;
12714                         }
12715                         if (ret) {
12716                                 ret = 0;
12717                                 break;
12718                         }
12719                 }
12720                 leaf = path.nodes[0];
12721                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12722                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12723                         path.slots[0]++;
12724                         continue;
12725                 }
12726
12727                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12728                 btrfs_add_block_group(fs_info, 0,
12729                                       btrfs_chunk_type(leaf, chunk),
12730                                       key.objectid, key.offset,
12731                                       btrfs_chunk_length(leaf, chunk));
12732                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12733                                  key.offset + btrfs_chunk_length(leaf, chunk));
12734                 path.slots[0]++;
12735         }
12736         start = 0;
12737         while (1) {
12738                 cache = btrfs_lookup_first_block_group(fs_info, start);
12739                 if (!cache)
12740                         break;
12741                 cache->cached = 1;
12742                 start = cache->key.objectid + cache->key.offset;
12743         }
12744
12745         btrfs_release_path(&path);
12746         return 0;
12747 }
12748
12749 static int reset_balance(struct btrfs_trans_handle *trans,
12750                          struct btrfs_fs_info *fs_info)
12751 {
12752         struct btrfs_root *root = fs_info->tree_root;
12753         struct btrfs_path path;
12754         struct extent_buffer *leaf;
12755         struct btrfs_key key;
12756         int del_slot, del_nr = 0;
12757         int ret;
12758         int found = 0;
12759
12760         btrfs_init_path(&path);
12761         key.objectid = BTRFS_BALANCE_OBJECTID;
12762         key.type = BTRFS_BALANCE_ITEM_KEY;
12763         key.offset = 0;
12764         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12765         if (ret) {
12766                 if (ret > 0)
12767                         ret = 0;
12768                 if (!ret)
12769                         goto reinit_data_reloc;
12770                 else
12771                         goto out;
12772         }
12773
12774         ret = btrfs_del_item(trans, root, &path);
12775         if (ret)
12776                 goto out;
12777         btrfs_release_path(&path);
12778
12779         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12780         key.type = BTRFS_ROOT_ITEM_KEY;
12781         key.offset = 0;
12782         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12783         if (ret < 0)
12784                 goto out;
12785         while (1) {
12786                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12787                         if (!found)
12788                                 break;
12789
12790                         if (del_nr) {
12791                                 ret = btrfs_del_items(trans, root, &path,
12792                                                       del_slot, del_nr);
12793                                 del_nr = 0;
12794                                 if (ret)
12795                                         goto out;
12796                         }
12797                         key.offset++;
12798                         btrfs_release_path(&path);
12799
12800                         found = 0;
12801                         ret = btrfs_search_slot(trans, root, &key, &path,
12802                                                 -1, 1);
12803                         if (ret < 0)
12804                                 goto out;
12805                         continue;
12806                 }
12807                 found = 1;
12808                 leaf = path.nodes[0];
12809                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12810                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12811                         break;
12812                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12813                         path.slots[0]++;
12814                         continue;
12815                 }
12816                 if (!del_nr) {
12817                         del_slot = path.slots[0];
12818                         del_nr = 1;
12819                 } else {
12820                         del_nr++;
12821                 }
12822                 path.slots[0]++;
12823         }
12824
12825         if (del_nr) {
12826                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12827                 if (ret)
12828                         goto out;
12829         }
12830         btrfs_release_path(&path);
12831
12832 reinit_data_reloc:
12833         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12834         key.type = BTRFS_ROOT_ITEM_KEY;
12835         key.offset = (u64)-1;
12836         root = btrfs_read_fs_root(fs_info, &key);
12837         if (IS_ERR(root)) {
12838                 fprintf(stderr, "Error reading data reloc tree\n");
12839                 ret = PTR_ERR(root);
12840                 goto out;
12841         }
12842         record_root_in_trans(trans, root);
12843         ret = btrfs_fsck_reinit_root(trans, root, 0);
12844         if (ret)
12845                 goto out;
12846         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12847 out:
12848         btrfs_release_path(&path);
12849         return ret;
12850 }
12851
12852 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12853                               struct btrfs_fs_info *fs_info)
12854 {
12855         u64 start = 0;
12856         int ret;
12857
12858         /*
12859          * The only reason we don't do this is because right now we're just
12860          * walking the trees we find and pinning down their bytes, we don't look
12861          * at any of the leaves.  In order to do mixed groups we'd have to check
12862          * the leaves of any fs roots and pin down the bytes for any file
12863          * extents we find.  Not hard but why do it if we don't have to?
12864          */
12865         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12866                 fprintf(stderr, "We don't support re-initing the extent tree "
12867                         "for mixed block groups yet, please notify a btrfs "
12868                         "developer you want to do this so they can add this "
12869                         "functionality.\n");
12870                 return -EINVAL;
12871         }
12872
12873         /*
12874          * first we need to walk all of the trees except the extent tree and pin
12875          * down the bytes that are in use so we don't overwrite any existing
12876          * metadata.
12877          */
12878         ret = pin_metadata_blocks(fs_info);
12879         if (ret) {
12880                 fprintf(stderr, "error pinning down used bytes\n");
12881                 return ret;
12882         }
12883
12884         /*
12885          * Need to drop all the block groups since we're going to recreate all
12886          * of them again.
12887          */
12888         btrfs_free_block_groups(fs_info);
12889         ret = reset_block_groups(fs_info);
12890         if (ret) {
12891                 fprintf(stderr, "error resetting the block groups\n");
12892                 return ret;
12893         }
12894
12895         /* Ok we can allocate now, reinit the extent root */
12896         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12897         if (ret) {
12898                 fprintf(stderr, "extent root initialization failed\n");
12899                 /*
12900                  * When the transaction code is updated we should end the
12901                  * transaction, but for now progs only knows about commit so
12902                  * just return an error.
12903                  */
12904                 return ret;
12905         }
12906
12907         /*
12908          * Now we have all the in-memory block groups setup so we can make
12909          * allocations properly, and the metadata we care about is safe since we
12910          * pinned all of it above.
12911          */
12912         while (1) {
12913                 struct btrfs_block_group_cache *cache;
12914
12915                 cache = btrfs_lookup_first_block_group(fs_info, start);
12916                 if (!cache)
12917                         break;
12918                 start = cache->key.objectid + cache->key.offset;
12919                 ret = btrfs_insert_item(trans, fs_info->extent_root,
12920                                         &cache->key, &cache->item,
12921                                         sizeof(cache->item));
12922                 if (ret) {
12923                         fprintf(stderr, "Error adding block group\n");
12924                         return ret;
12925                 }
12926                 btrfs_extent_post_op(trans, fs_info->extent_root);
12927         }
12928
12929         ret = reset_balance(trans, fs_info);
12930         if (ret)
12931                 fprintf(stderr, "error resetting the pending balance\n");
12932
12933         return ret;
12934 }
12935
12936 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12937 {
12938         struct btrfs_path path;
12939         struct btrfs_trans_handle *trans;
12940         struct btrfs_key key;
12941         int ret;
12942
12943         printf("Recowing metadata block %llu\n", eb->start);
12944         key.objectid = btrfs_header_owner(eb);
12945         key.type = BTRFS_ROOT_ITEM_KEY;
12946         key.offset = (u64)-1;
12947
12948         root = btrfs_read_fs_root(root->fs_info, &key);
12949         if (IS_ERR(root)) {
12950                 fprintf(stderr, "Couldn't find owner root %llu\n",
12951                         key.objectid);
12952                 return PTR_ERR(root);
12953         }
12954
12955         trans = btrfs_start_transaction(root, 1);
12956         if (IS_ERR(trans))
12957                 return PTR_ERR(trans);
12958
12959         btrfs_init_path(&path);
12960         path.lowest_level = btrfs_header_level(eb);
12961         if (path.lowest_level)
12962                 btrfs_node_key_to_cpu(eb, &key, 0);
12963         else
12964                 btrfs_item_key_to_cpu(eb, &key, 0);
12965
12966         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12967         btrfs_commit_transaction(trans, root);
12968         btrfs_release_path(&path);
12969         return ret;
12970 }
12971
12972 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12973 {
12974         struct btrfs_path path;
12975         struct btrfs_trans_handle *trans;
12976         struct btrfs_key key;
12977         int ret;
12978
12979         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12980                bad->key.type, bad->key.offset);
12981         key.objectid = bad->root_id;
12982         key.type = BTRFS_ROOT_ITEM_KEY;
12983         key.offset = (u64)-1;
12984
12985         root = btrfs_read_fs_root(root->fs_info, &key);
12986         if (IS_ERR(root)) {
12987                 fprintf(stderr, "Couldn't find owner root %llu\n",
12988                         key.objectid);
12989                 return PTR_ERR(root);
12990         }
12991
12992         trans = btrfs_start_transaction(root, 1);
12993         if (IS_ERR(trans))
12994                 return PTR_ERR(trans);
12995
12996         btrfs_init_path(&path);
12997         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12998         if (ret) {
12999                 if (ret > 0)
13000                         ret = 0;
13001                 goto out;
13002         }
13003         ret = btrfs_del_item(trans, root, &path);
13004 out:
13005         btrfs_commit_transaction(trans, root);
13006         btrfs_release_path(&path);
13007         return ret;
13008 }
13009
13010 static int zero_log_tree(struct btrfs_root *root)
13011 {
13012         struct btrfs_trans_handle *trans;
13013         int ret;
13014
13015         trans = btrfs_start_transaction(root, 1);
13016         if (IS_ERR(trans)) {
13017                 ret = PTR_ERR(trans);
13018                 return ret;
13019         }
13020         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13021         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13022         ret = btrfs_commit_transaction(trans, root);
13023         return ret;
13024 }
13025
13026 static int populate_csum(struct btrfs_trans_handle *trans,
13027                          struct btrfs_root *csum_root, char *buf, u64 start,
13028                          u64 len)
13029 {
13030         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13031         u64 offset = 0;
13032         u64 sectorsize;
13033         int ret = 0;
13034
13035         while (offset < len) {
13036                 sectorsize = fs_info->sectorsize;
13037                 ret = read_extent_data(fs_info, buf, start + offset,
13038                                        &sectorsize, 0);
13039                 if (ret)
13040                         break;
13041                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13042                                             start + offset, buf, sectorsize);
13043                 if (ret)
13044                         break;
13045                 offset += sectorsize;
13046         }
13047         return ret;
13048 }
13049
13050 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13051                                       struct btrfs_root *csum_root,
13052                                       struct btrfs_root *cur_root)
13053 {
13054         struct btrfs_path path;
13055         struct btrfs_key key;
13056         struct extent_buffer *node;
13057         struct btrfs_file_extent_item *fi;
13058         char *buf = NULL;
13059         u64 start = 0;
13060         u64 len = 0;
13061         int slot = 0;
13062         int ret = 0;
13063
13064         buf = malloc(cur_root->fs_info->sectorsize);
13065         if (!buf)
13066                 return -ENOMEM;
13067
13068         btrfs_init_path(&path);
13069         key.objectid = 0;
13070         key.offset = 0;
13071         key.type = 0;
13072         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13073         if (ret < 0)
13074                 goto out;
13075         /* Iterate all regular file extents and fill its csum */
13076         while (1) {
13077                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13078
13079                 if (key.type != BTRFS_EXTENT_DATA_KEY)
13080                         goto next;
13081                 node = path.nodes[0];
13082                 slot = path.slots[0];
13083                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13084                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13085                         goto next;
13086                 start = btrfs_file_extent_disk_bytenr(node, fi);
13087                 len = btrfs_file_extent_disk_num_bytes(node, fi);
13088
13089                 ret = populate_csum(trans, csum_root, buf, start, len);
13090                 if (ret == -EEXIST)
13091                         ret = 0;
13092                 if (ret < 0)
13093                         goto out;
13094 next:
13095                 /*
13096                  * TODO: if next leaf is corrupted, jump to nearest next valid
13097                  * leaf.
13098                  */
13099                 ret = btrfs_next_item(cur_root, &path);
13100                 if (ret < 0)
13101                         goto out;
13102                 if (ret > 0) {
13103                         ret = 0;
13104                         goto out;
13105                 }
13106         }
13107
13108 out:
13109         btrfs_release_path(&path);
13110         free(buf);
13111         return ret;
13112 }
13113
13114 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13115                                   struct btrfs_root *csum_root)
13116 {
13117         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13118         struct btrfs_path path;
13119         struct btrfs_root *tree_root = fs_info->tree_root;
13120         struct btrfs_root *cur_root;
13121         struct extent_buffer *node;
13122         struct btrfs_key key;
13123         int slot = 0;
13124         int ret = 0;
13125
13126         btrfs_init_path(&path);
13127         key.objectid = BTRFS_FS_TREE_OBJECTID;
13128         key.offset = 0;
13129         key.type = BTRFS_ROOT_ITEM_KEY;
13130         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13131         if (ret < 0)
13132                 goto out;
13133         if (ret > 0) {
13134                 ret = -ENOENT;
13135                 goto out;
13136         }
13137
13138         while (1) {
13139                 node = path.nodes[0];
13140                 slot = path.slots[0];
13141                 btrfs_item_key_to_cpu(node, &key, slot);
13142                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13143                         goto out;
13144                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13145                         goto next;
13146                 if (!is_fstree(key.objectid))
13147                         goto next;
13148                 key.offset = (u64)-1;
13149
13150                 cur_root = btrfs_read_fs_root(fs_info, &key);
13151                 if (IS_ERR(cur_root) || !cur_root) {
13152                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13153                                 key.objectid);
13154                         goto out;
13155                 }
13156                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13157                                 cur_root);
13158                 if (ret < 0)
13159                         goto out;
13160 next:
13161                 ret = btrfs_next_item(tree_root, &path);
13162                 if (ret > 0) {
13163                         ret = 0;
13164                         goto out;
13165                 }
13166                 if (ret < 0)
13167                         goto out;
13168         }
13169
13170 out:
13171         btrfs_release_path(&path);
13172         return ret;
13173 }
13174
13175 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13176                                       struct btrfs_root *csum_root)
13177 {
13178         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13179         struct btrfs_path path;
13180         struct btrfs_extent_item *ei;
13181         struct extent_buffer *leaf;
13182         char *buf;
13183         struct btrfs_key key;
13184         int ret;
13185
13186         btrfs_init_path(&path);
13187         key.objectid = 0;
13188         key.type = BTRFS_EXTENT_ITEM_KEY;
13189         key.offset = 0;
13190         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13191         if (ret < 0) {
13192                 btrfs_release_path(&path);
13193                 return ret;
13194         }
13195
13196         buf = malloc(csum_root->fs_info->sectorsize);
13197         if (!buf) {
13198                 btrfs_release_path(&path);
13199                 return -ENOMEM;
13200         }
13201
13202         while (1) {
13203                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13204                         ret = btrfs_next_leaf(extent_root, &path);
13205                         if (ret < 0)
13206                                 break;
13207                         if (ret) {
13208                                 ret = 0;
13209                                 break;
13210                         }
13211                 }
13212                 leaf = path.nodes[0];
13213
13214                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13215                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13216                         path.slots[0]++;
13217                         continue;
13218                 }
13219
13220                 ei = btrfs_item_ptr(leaf, path.slots[0],
13221                                     struct btrfs_extent_item);
13222                 if (!(btrfs_extent_flags(leaf, ei) &
13223                       BTRFS_EXTENT_FLAG_DATA)) {
13224                         path.slots[0]++;
13225                         continue;
13226                 }
13227
13228                 ret = populate_csum(trans, csum_root, buf, key.objectid,
13229                                     key.offset);
13230                 if (ret)
13231                         break;
13232                 path.slots[0]++;
13233         }
13234
13235         btrfs_release_path(&path);
13236         free(buf);
13237         return ret;
13238 }
13239
13240 /*
13241  * Recalculate the csum and put it into the csum tree.
13242  *
13243  * Extent tree init will wipe out all the extent info, so in that case, we
13244  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
13245  * will use fs/subvol trees to init the csum tree.
13246  */
13247 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13248                           struct btrfs_root *csum_root,
13249                           int search_fs_tree)
13250 {
13251         if (search_fs_tree)
13252                 return fill_csum_tree_from_fs(trans, csum_root);
13253         else
13254                 return fill_csum_tree_from_extent(trans, csum_root);
13255 }
13256
13257 static void free_roots_info_cache(void)
13258 {
13259         if (!roots_info_cache)
13260                 return;
13261
13262         while (!cache_tree_empty(roots_info_cache)) {
13263                 struct cache_extent *entry;
13264                 struct root_item_info *rii;
13265
13266                 entry = first_cache_extent(roots_info_cache);
13267                 if (!entry)
13268                         break;
13269                 remove_cache_extent(roots_info_cache, entry);
13270                 rii = container_of(entry, struct root_item_info, cache_extent);
13271                 free(rii);
13272         }
13273
13274         free(roots_info_cache);
13275         roots_info_cache = NULL;
13276 }
13277
13278 static int build_roots_info_cache(struct btrfs_fs_info *info)
13279 {
13280         int ret = 0;
13281         struct btrfs_key key;
13282         struct extent_buffer *leaf;
13283         struct btrfs_path path;
13284
13285         if (!roots_info_cache) {
13286                 roots_info_cache = malloc(sizeof(*roots_info_cache));
13287                 if (!roots_info_cache)
13288                         return -ENOMEM;
13289                 cache_tree_init(roots_info_cache);
13290         }
13291
13292         btrfs_init_path(&path);
13293         key.objectid = 0;
13294         key.type = BTRFS_EXTENT_ITEM_KEY;
13295         key.offset = 0;
13296         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13297         if (ret < 0)
13298                 goto out;
13299         leaf = path.nodes[0];
13300
13301         while (1) {
13302                 struct btrfs_key found_key;
13303                 struct btrfs_extent_item *ei;
13304                 struct btrfs_extent_inline_ref *iref;
13305                 int slot = path.slots[0];
13306                 int type;
13307                 u64 flags;
13308                 u64 root_id;
13309                 u8 level;
13310                 struct cache_extent *entry;
13311                 struct root_item_info *rii;
13312
13313                 if (slot >= btrfs_header_nritems(leaf)) {
13314                         ret = btrfs_next_leaf(info->extent_root, &path);
13315                         if (ret < 0) {
13316                                 break;
13317                         } else if (ret) {
13318                                 ret = 0;
13319                                 break;
13320                         }
13321                         leaf = path.nodes[0];
13322                         slot = path.slots[0];
13323                 }
13324
13325                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13326
13327                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13328                     found_key.type != BTRFS_METADATA_ITEM_KEY)
13329                         goto next;
13330
13331                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13332                 flags = btrfs_extent_flags(leaf, ei);
13333
13334                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13335                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13336                         goto next;
13337
13338                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13339                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13340                         level = found_key.offset;
13341                 } else {
13342                         struct btrfs_tree_block_info *binfo;
13343
13344                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
13345                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13346                         level = btrfs_tree_block_level(leaf, binfo);
13347                 }
13348
13349                 /*
13350                  * For a root extent, it must be of the following type and the
13351                  * first (and only one) iref in the item.
13352                  */
13353                 type = btrfs_extent_inline_ref_type(leaf, iref);
13354                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13355                         goto next;
13356
13357                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13358                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13359                 if (!entry) {
13360                         rii = malloc(sizeof(struct root_item_info));
13361                         if (!rii) {
13362                                 ret = -ENOMEM;
13363                                 goto out;
13364                         }
13365                         rii->cache_extent.start = root_id;
13366                         rii->cache_extent.size = 1;
13367                         rii->level = (u8)-1;
13368                         entry = &rii->cache_extent;
13369                         ret = insert_cache_extent(roots_info_cache, entry);
13370                         ASSERT(ret == 0);
13371                 } else {
13372                         rii = container_of(entry, struct root_item_info,
13373                                            cache_extent);
13374                 }
13375
13376                 ASSERT(rii->cache_extent.start == root_id);
13377                 ASSERT(rii->cache_extent.size == 1);
13378
13379                 if (level > rii->level || rii->level == (u8)-1) {
13380                         rii->level = level;
13381                         rii->bytenr = found_key.objectid;
13382                         rii->gen = btrfs_extent_generation(leaf, ei);
13383                         rii->node_count = 1;
13384                 } else if (level == rii->level) {
13385                         rii->node_count++;
13386                 }
13387 next:
13388                 path.slots[0]++;
13389         }
13390
13391 out:
13392         btrfs_release_path(&path);
13393
13394         return ret;
13395 }
13396
13397 static int maybe_repair_root_item(struct btrfs_path *path,
13398                                   const struct btrfs_key *root_key,
13399                                   const int read_only_mode)
13400 {
13401         const u64 root_id = root_key->objectid;
13402         struct cache_extent *entry;
13403         struct root_item_info *rii;
13404         struct btrfs_root_item ri;
13405         unsigned long offset;
13406
13407         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13408         if (!entry) {
13409                 fprintf(stderr,
13410                         "Error: could not find extent items for root %llu\n",
13411                         root_key->objectid);
13412                 return -ENOENT;
13413         }
13414
13415         rii = container_of(entry, struct root_item_info, cache_extent);
13416         ASSERT(rii->cache_extent.start == root_id);
13417         ASSERT(rii->cache_extent.size == 1);
13418
13419         if (rii->node_count != 1) {
13420                 fprintf(stderr,
13421                         "Error: could not find btree root extent for root %llu\n",
13422                         root_id);
13423                 return -ENOENT;
13424         }
13425
13426         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13427         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13428
13429         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13430             btrfs_root_level(&ri) != rii->level ||
13431             btrfs_root_generation(&ri) != rii->gen) {
13432
13433                 /*
13434                  * If we're in repair mode but our caller told us to not update
13435                  * the root item, i.e. just check if it needs to be updated, don't
13436                  * print this message, since the caller will call us again shortly
13437                  * for the same root item without read only mode (the caller will
13438                  * open a transaction first).
13439                  */
13440                 if (!(read_only_mode && repair))
13441                         fprintf(stderr,
13442                                 "%sroot item for root %llu,"
13443                                 " current bytenr %llu, current gen %llu, current level %u,"
13444                                 " new bytenr %llu, new gen %llu, new level %u\n",
13445                                 (read_only_mode ? "" : "fixing "),
13446                                 root_id,
13447                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13448                                 btrfs_root_level(&ri),
13449                                 rii->bytenr, rii->gen, rii->level);
13450
13451                 if (btrfs_root_generation(&ri) > rii->gen) {
13452                         fprintf(stderr,
13453                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13454                                 root_id, btrfs_root_generation(&ri), rii->gen);
13455                         return -EINVAL;
13456                 }
13457
13458                 if (!read_only_mode) {
13459                         btrfs_set_root_bytenr(&ri, rii->bytenr);
13460                         btrfs_set_root_level(&ri, rii->level);
13461                         btrfs_set_root_generation(&ri, rii->gen);
13462                         write_extent_buffer(path->nodes[0], &ri,
13463                                             offset, sizeof(ri));
13464                 }
13465
13466                 return 1;
13467         }
13468
13469         return 0;
13470 }
13471
13472 /*
13473  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13474  * caused read-only snapshots to be corrupted if they were created at a moment
13475  * when the source subvolume/snapshot had orphan items. The issue was that the
13476  * on-disk root items became incorrect, referring to the pre orphan cleanup root
13477  * node instead of the post orphan cleanup root node.
13478  * So this function, and its callees, just detects and fixes those cases. Even
13479  * though the regression was for read-only snapshots, this function applies to
13480  * any snapshot/subvolume root.
13481  * This must be run before any other repair code - not doing it so, makes other
13482  * repair code delete or modify backrefs in the extent tree for example, which
13483  * will result in an inconsistent fs after repairing the root items.
13484  */
13485 static int repair_root_items(struct btrfs_fs_info *info)
13486 {
13487         struct btrfs_path path;
13488         struct btrfs_key key;
13489         struct extent_buffer *leaf;
13490         struct btrfs_trans_handle *trans = NULL;
13491         int ret = 0;
13492         int bad_roots = 0;
13493         int need_trans = 0;
13494
13495         btrfs_init_path(&path);
13496
13497         ret = build_roots_info_cache(info);
13498         if (ret)
13499                 goto out;
13500
13501         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13502         key.type = BTRFS_ROOT_ITEM_KEY;
13503         key.offset = 0;
13504
13505 again:
13506         /*
13507          * Avoid opening and committing transactions if a leaf doesn't have
13508          * any root items that need to be fixed, so that we avoid rotating
13509          * backup roots unnecessarily.
13510          */
13511         if (need_trans) {
13512                 trans = btrfs_start_transaction(info->tree_root, 1);
13513                 if (IS_ERR(trans)) {
13514                         ret = PTR_ERR(trans);
13515                         goto out;
13516                 }
13517         }
13518
13519         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13520                                 0, trans ? 1 : 0);
13521         if (ret < 0)
13522                 goto out;
13523         leaf = path.nodes[0];
13524
13525         while (1) {
13526                 struct btrfs_key found_key;
13527
13528                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13529                         int no_more_keys = find_next_key(&path, &key);
13530
13531                         btrfs_release_path(&path);
13532                         if (trans) {
13533                                 ret = btrfs_commit_transaction(trans,
13534                                                                info->tree_root);
13535                                 trans = NULL;
13536                                 if (ret < 0)
13537                                         goto out;
13538                         }
13539                         need_trans = 0;
13540                         if (no_more_keys)
13541                                 break;
13542                         goto again;
13543                 }
13544
13545                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13546
13547                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13548                         goto next;
13549                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13550                         goto next;
13551
13552                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13553                 if (ret < 0)
13554                         goto out;
13555                 if (ret) {
13556                         if (!trans && repair) {
13557                                 need_trans = 1;
13558                                 key = found_key;
13559                                 btrfs_release_path(&path);
13560                                 goto again;
13561                         }
13562                         bad_roots++;
13563                 }
13564 next:
13565                 path.slots[0]++;
13566         }
13567         ret = 0;
13568 out:
13569         free_roots_info_cache();
13570         btrfs_release_path(&path);
13571         if (trans)
13572                 btrfs_commit_transaction(trans, info->tree_root);
13573         if (ret < 0)
13574                 return ret;
13575
13576         return bad_roots;
13577 }
13578
13579 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13580 {
13581         struct btrfs_trans_handle *trans;
13582         struct btrfs_block_group_cache *bg_cache;
13583         u64 current = 0;
13584         int ret = 0;
13585
13586         /* Clear all free space cache inodes and its extent data */
13587         while (1) {
13588                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13589                 if (!bg_cache)
13590                         break;
13591                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13592                 if (ret < 0)
13593                         return ret;
13594                 current = bg_cache->key.objectid + bg_cache->key.offset;
13595         }
13596
13597         /* Don't forget to set cache_generation to -1 */
13598         trans = btrfs_start_transaction(fs_info->tree_root, 0);
13599         if (IS_ERR(trans)) {
13600                 error("failed to update super block cache generation");
13601                 return PTR_ERR(trans);
13602         }
13603         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13604         btrfs_commit_transaction(trans, fs_info->tree_root);
13605
13606         return ret;
13607 }
13608
13609 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13610                 int clear_version)
13611 {
13612         int ret = 0;
13613
13614         if (clear_version == 1) {
13615                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13616                         error(
13617                 "free space cache v2 detected, use --clear-space-cache v2");
13618                         ret = 1;
13619                         goto close_out;
13620                 }
13621                 printf("Clearing free space cache\n");
13622                 ret = clear_free_space_cache(fs_info);
13623                 if (ret) {
13624                         error("failed to clear free space cache");
13625                         ret = 1;
13626                 } else {
13627                         printf("Free space cache cleared\n");
13628                 }
13629         } else if (clear_version == 2) {
13630                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13631                         printf("no free space cache v2 to clear\n");
13632                         ret = 0;
13633                         goto close_out;
13634                 }
13635                 printf("Clear free space cache v2\n");
13636                 ret = btrfs_clear_free_space_tree(fs_info);
13637                 if (ret) {
13638                         error("failed to clear free space cache v2: %d", ret);
13639                         ret = 1;
13640                 } else {
13641                         printf("free space cache v2 cleared\n");
13642                 }
13643         }
13644 close_out:
13645         return ret;
13646 }
13647
13648 const char * const cmd_check_usage[] = {
13649         "btrfs check [options] <device>",
13650         "Check structural integrity of a filesystem (unmounted).",
13651         "Check structural integrity of an unmounted filesystem. Verify internal",
13652         "trees' consistency and item connectivity. In the repair mode try to",
13653         "fix the problems found. ",
13654         "WARNING: the repair mode is considered dangerous",
13655         "",
13656         "-s|--super <superblock>     use this superblock copy",
13657         "-b|--backup                 use the first valid backup root copy",
13658         "--force                     skip mount checks, repair is not possible",
13659         "--repair                    try to repair the filesystem",
13660         "--readonly                  run in read-only mode (default)",
13661         "--init-csum-tree            create a new CRC tree",
13662         "--init-extent-tree          create a new extent tree",
13663         "--mode <MODE>               allows choice of memory/IO trade-offs",
13664         "                            where MODE is one of:",
13665         "                            original - read inodes and extents to memory (requires",
13666         "                                       more memory, does less IO)",
13667         "                            lowmem   - try to use less memory but read blocks again",
13668         "                                       when needed",
13669         "--check-data-csum           verify checksums of data blocks",
13670         "-Q|--qgroup-report          print a report on qgroup consistency",
13671         "-E|--subvol-extents <subvolid>",
13672         "                            print subvolume extents and sharing state",
13673         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
13674         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
13675         "-p|--progress               indicate progress",
13676         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
13677         NULL
13678 };
13679
13680 int cmd_check(int argc, char **argv)
13681 {
13682         struct cache_tree root_cache;
13683         struct btrfs_root *root;
13684         struct btrfs_fs_info *info;
13685         u64 bytenr = 0;
13686         u64 subvolid = 0;
13687         u64 tree_root_bytenr = 0;
13688         u64 chunk_root_bytenr = 0;
13689         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13690         int ret = 0;
13691         int err = 0;
13692         u64 num;
13693         int init_csum_tree = 0;
13694         int readonly = 0;
13695         int clear_space_cache = 0;
13696         int qgroup_report = 0;
13697         int qgroups_repaired = 0;
13698         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13699         int force = 0;
13700
13701         while(1) {
13702                 int c;
13703                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13704                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13705                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13706                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13707                         GETOPT_VAL_FORCE };
13708                 static const struct option long_options[] = {
13709                         { "super", required_argument, NULL, 's' },
13710                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13711                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13712                         { "init-csum-tree", no_argument, NULL,
13713                                 GETOPT_VAL_INIT_CSUM },
13714                         { "init-extent-tree", no_argument, NULL,
13715                                 GETOPT_VAL_INIT_EXTENT },
13716                         { "check-data-csum", no_argument, NULL,
13717                                 GETOPT_VAL_CHECK_CSUM },
13718                         { "backup", no_argument, NULL, 'b' },
13719                         { "subvol-extents", required_argument, NULL, 'E' },
13720                         { "qgroup-report", no_argument, NULL, 'Q' },
13721                         { "tree-root", required_argument, NULL, 'r' },
13722                         { "chunk-root", required_argument, NULL,
13723                                 GETOPT_VAL_CHUNK_TREE },
13724                         { "progress", no_argument, NULL, 'p' },
13725                         { "mode", required_argument, NULL,
13726                                 GETOPT_VAL_MODE },
13727                         { "clear-space-cache", required_argument, NULL,
13728                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
13729                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13730                         { NULL, 0, NULL, 0}
13731                 };
13732
13733                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13734                 if (c < 0)
13735                         break;
13736                 switch(c) {
13737                         case 'a': /* ignored */ break;
13738                         case 'b':
13739                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13740                                 break;
13741                         case 's':
13742                                 num = arg_strtou64(optarg);
13743                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13744                                         error(
13745                                         "super mirror should be less than %d",
13746                                                 BTRFS_SUPER_MIRROR_MAX);
13747                                         exit(1);
13748                                 }
13749                                 bytenr = btrfs_sb_offset(((int)num));
13750                                 printf("using SB copy %llu, bytenr %llu\n", num,
13751                                        (unsigned long long)bytenr);
13752                                 break;
13753                         case 'Q':
13754                                 qgroup_report = 1;
13755                                 break;
13756                         case 'E':
13757                                 subvolid = arg_strtou64(optarg);
13758                                 break;
13759                         case 'r':
13760                                 tree_root_bytenr = arg_strtou64(optarg);
13761                                 break;
13762                         case GETOPT_VAL_CHUNK_TREE:
13763                                 chunk_root_bytenr = arg_strtou64(optarg);
13764                                 break;
13765                         case 'p':
13766                                 ctx.progress_enabled = true;
13767                                 break;
13768                         case '?':
13769                         case 'h':
13770                                 usage(cmd_check_usage);
13771                         case GETOPT_VAL_REPAIR:
13772                                 printf("enabling repair mode\n");
13773                                 repair = 1;
13774                                 ctree_flags |= OPEN_CTREE_WRITES;
13775                                 break;
13776                         case GETOPT_VAL_READONLY:
13777                                 readonly = 1;
13778                                 break;
13779                         case GETOPT_VAL_INIT_CSUM:
13780                                 printf("Creating a new CRC tree\n");
13781                                 init_csum_tree = 1;
13782                                 repair = 1;
13783                                 ctree_flags |= OPEN_CTREE_WRITES;
13784                                 break;
13785                         case GETOPT_VAL_INIT_EXTENT:
13786                                 init_extent_tree = 1;
13787                                 ctree_flags |= (OPEN_CTREE_WRITES |
13788                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
13789                                 repair = 1;
13790                                 break;
13791                         case GETOPT_VAL_CHECK_CSUM:
13792                                 check_data_csum = 1;
13793                                 break;
13794                         case GETOPT_VAL_MODE:
13795                                 check_mode = parse_check_mode(optarg);
13796                                 if (check_mode == CHECK_MODE_UNKNOWN) {
13797                                         error("unknown mode: %s", optarg);
13798                                         exit(1);
13799                                 }
13800                                 break;
13801                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
13802                                 if (strcmp(optarg, "v1") == 0) {
13803                                         clear_space_cache = 1;
13804                                 } else if (strcmp(optarg, "v2") == 0) {
13805                                         clear_space_cache = 2;
13806                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13807                                 } else {
13808                                         error(
13809                 "invalid argument to --clear-space-cache, must be v1 or v2");
13810                                         exit(1);
13811                                 }
13812                                 ctree_flags |= OPEN_CTREE_WRITES;
13813                                 break;
13814                         case GETOPT_VAL_FORCE:
13815                                 force = 1;
13816                                 break;
13817                 }
13818         }
13819
13820         if (check_argc_exact(argc - optind, 1))
13821                 usage(cmd_check_usage);
13822
13823         if (ctx.progress_enabled) {
13824                 ctx.tp = TASK_NOTHING;
13825                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13826         }
13827
13828         /* This check is the only reason for --readonly to exist */
13829         if (readonly && repair) {
13830                 error("repair options are not compatible with --readonly");
13831                 exit(1);
13832         }
13833
13834         /*
13835          * experimental and dangerous
13836          */
13837         if (repair && check_mode == CHECK_MODE_LOWMEM)
13838                 warning("low-memory mode repair support is only partial");
13839
13840         radix_tree_init();
13841         cache_tree_init(&root_cache);
13842
13843         ret = check_mounted(argv[optind]);
13844         if (!force) {
13845                 if (ret < 0) {
13846                         error("could not check mount status: %s",
13847                                         strerror(-ret));
13848                         err |= !!ret;
13849                         goto err_out;
13850                 } else if (ret) {
13851                         error(
13852 "%s is currently mounted, use --force if you really intend to check the filesystem",
13853                                 argv[optind]);
13854                         ret = -EBUSY;
13855                         err |= !!ret;
13856                         goto err_out;
13857                 }
13858         } else {
13859                 if (repair) {
13860                         error("repair and --force is not yet supported");
13861                         ret = 1;
13862                         err |= !!ret;
13863                         goto err_out;
13864                 }
13865                 if (ret < 0) {
13866                         warning(
13867 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13868                                 argv[optind]);
13869                 } else if (ret) {
13870                         warning(
13871                         "filesystem mounted, continuing because of --force");
13872                 }
13873                 /* A block device is mounted in exclusive mode by kernel */
13874                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
13875         }
13876
13877         /* only allow partial opening under repair mode */
13878         if (repair)
13879                 ctree_flags |= OPEN_CTREE_PARTIAL;
13880
13881         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13882                                   chunk_root_bytenr, ctree_flags);
13883         if (!info) {
13884                 error("cannot open file system");
13885                 ret = -EIO;
13886                 err |= !!ret;
13887                 goto err_out;
13888         }
13889
13890         global_info = info;
13891         root = info->fs_root;
13892         uuid_unparse(info->super_copy->fsid, uuidbuf);
13893
13894         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13895
13896         /*
13897          * Check the bare minimum before starting anything else that could rely
13898          * on it, namely the tree roots, any local consistency checks
13899          */
13900         if (!extent_buffer_uptodate(info->tree_root->node) ||
13901             !extent_buffer_uptodate(info->dev_root->node) ||
13902             !extent_buffer_uptodate(info->chunk_root->node)) {
13903                 error("critical roots corrupted, unable to check the filesystem");
13904                 err |= !!ret;
13905                 ret = -EIO;
13906                 goto close_out;
13907         }
13908
13909         if (clear_space_cache) {
13910                 ret = do_clear_free_space_cache(info, clear_space_cache);
13911                 err |= !!ret;
13912                 goto close_out;
13913         }
13914
13915         /*
13916          * repair mode will force us to commit transaction which
13917          * will make us fail to load log tree when mounting.
13918          */
13919         if (repair && btrfs_super_log_root(info->super_copy)) {
13920                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13921                 if (!ret) {
13922                         ret = 1;
13923                         err |= !!ret;
13924                         goto close_out;
13925                 }
13926                 ret = zero_log_tree(root);
13927                 err |= !!ret;
13928                 if (ret) {
13929                         error("failed to zero log tree: %d", ret);
13930                         goto close_out;
13931                 }
13932         }
13933
13934         if (qgroup_report) {
13935                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13936                        uuidbuf);
13937                 ret = qgroup_verify_all(info);
13938                 err |= !!ret;
13939                 if (ret == 0)
13940                         report_qgroups(1);
13941                 goto close_out;
13942         }
13943         if (subvolid) {
13944                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13945                        subvolid, argv[optind], uuidbuf);
13946                 ret = print_extent_state(info, subvolid);
13947                 err |= !!ret;
13948                 goto close_out;
13949         }
13950
13951         if (init_extent_tree || init_csum_tree) {
13952                 struct btrfs_trans_handle *trans;
13953
13954                 trans = btrfs_start_transaction(info->extent_root, 0);
13955                 if (IS_ERR(trans)) {
13956                         error("error starting transaction");
13957                         ret = PTR_ERR(trans);
13958                         err |= !!ret;
13959                         goto close_out;
13960                 }
13961
13962                 if (init_extent_tree) {
13963                         printf("Creating a new extent tree\n");
13964                         ret = reinit_extent_tree(trans, info);
13965                         err |= !!ret;
13966                         if (ret)
13967                                 goto close_out;
13968                 }
13969
13970                 if (init_csum_tree) {
13971                         printf("Reinitialize checksum tree\n");
13972                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13973                         if (ret) {
13974                                 error("checksum tree initialization failed: %d",
13975                                                 ret);
13976                                 ret = -EIO;
13977                                 err |= !!ret;
13978                                 goto close_out;
13979                         }
13980
13981                         ret = fill_csum_tree(trans, info->csum_root,
13982                                              init_extent_tree);
13983                         err |= !!ret;
13984                         if (ret) {
13985                                 error("checksum tree refilling failed: %d", ret);
13986                                 return -EIO;
13987                         }
13988                 }
13989                 /*
13990                  * Ok now we commit and run the normal fsck, which will add
13991                  * extent entries for all of the items it finds.
13992                  */
13993                 ret = btrfs_commit_transaction(trans, info->extent_root);
13994                 err |= !!ret;
13995                 if (ret)
13996                         goto close_out;
13997         }
13998         if (!extent_buffer_uptodate(info->extent_root->node)) {
13999                 error("critical: extent_root, unable to check the filesystem");
14000                 ret = -EIO;
14001                 err |= !!ret;
14002                 goto close_out;
14003         }
14004         if (!extent_buffer_uptodate(info->csum_root->node)) {
14005                 error("critical: csum_root, unable to check the filesystem");
14006                 ret = -EIO;
14007                 err |= !!ret;
14008                 goto close_out;
14009         }
14010
14011         ret = do_check_chunks_and_extents(info);
14012         err |= !!ret;
14013         if (ret)
14014                 error(
14015                 "errors found in extent allocation tree or chunk allocation");
14016
14017         ret = repair_root_items(info);
14018         err |= !!ret;
14019         if (ret < 0) {
14020                 error("failed to repair root items: %s", strerror(-ret));
14021                 goto close_out;
14022         }
14023         if (repair) {
14024                 fprintf(stderr, "Fixed %d roots.\n", ret);
14025                 ret = 0;
14026         } else if (ret > 0) {
14027                 fprintf(stderr,
14028                        "Found %d roots with an outdated root item.\n",
14029                        ret);
14030                 fprintf(stderr,
14031                         "Please run a filesystem check with the option --repair to fix them.\n");
14032                 ret = 1;
14033                 err |= !!ret;
14034                 goto close_out;
14035         }
14036
14037         if (!ctx.progress_enabled) {
14038                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14039                         fprintf(stderr, "checking free space tree\n");
14040                 else
14041                         fprintf(stderr, "checking free space cache\n");
14042         }
14043         ret = check_space_cache(root);
14044         err |= !!ret;
14045         if (ret) {
14046                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14047                         error("errors found in free space tree");
14048                 else
14049                         error("errors found in free space cache");
14050                 goto out;
14051         }
14052
14053         /*
14054          * We used to have to have these hole extents in between our real
14055          * extents so if we don't have this flag set we need to make sure there
14056          * are no gaps in the file extents for inodes, otherwise we can just
14057          * ignore it when this happens.
14058          */
14059         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14060         ret = do_check_fs_roots(info, &root_cache);
14061         err |= !!ret;
14062         if (ret) {
14063                 error("errors found in fs roots");
14064                 goto out;
14065         }
14066
14067         fprintf(stderr, "checking csums\n");
14068         ret = check_csums(root);
14069         err |= !!ret;
14070         if (ret) {
14071                 error("errors found in csum tree");
14072                 goto out;
14073         }
14074
14075         fprintf(stderr, "checking root refs\n");
14076         /* For low memory mode, check_fs_roots_v2 handles root refs */
14077         if (check_mode != CHECK_MODE_LOWMEM) {
14078                 ret = check_root_refs(root, &root_cache);
14079                 err |= !!ret;
14080                 if (ret) {
14081                         error("errors found in root refs");
14082                         goto out;
14083                 }
14084         }
14085
14086         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14087                 struct extent_buffer *eb;
14088
14089                 eb = list_first_entry(&root->fs_info->recow_ebs,
14090                                       struct extent_buffer, recow);
14091                 list_del_init(&eb->recow);
14092                 ret = recow_extent_buffer(root, eb);
14093                 err |= !!ret;
14094                 if (ret) {
14095                         error("fails to fix transid errors");
14096                         break;
14097                 }
14098         }
14099
14100         while (!list_empty(&delete_items)) {
14101                 struct bad_item *bad;
14102
14103                 bad = list_first_entry(&delete_items, struct bad_item, list);
14104                 list_del_init(&bad->list);
14105                 if (repair) {
14106                         ret = delete_bad_item(root, bad);
14107                         err |= !!ret;
14108                 }
14109                 free(bad);
14110         }
14111
14112         if (info->quota_enabled) {
14113                 fprintf(stderr, "checking quota groups\n");
14114                 ret = qgroup_verify_all(info);
14115                 err |= !!ret;
14116                 if (ret) {
14117                         error("failed to check quota groups");
14118                         goto out;
14119                 }
14120                 report_qgroups(0);
14121                 ret = repair_qgroups(info, &qgroups_repaired);
14122                 err |= !!ret;
14123                 if (err) {
14124                         error("failed to repair quota groups");
14125                         goto out;
14126                 }
14127                 ret = 0;
14128         }
14129
14130         if (!list_empty(&root->fs_info->recow_ebs)) {
14131                 error("transid errors in file system");
14132                 ret = 1;
14133                 err |= !!ret;
14134         }
14135 out:
14136         printf("found %llu bytes used, ",
14137                (unsigned long long)bytes_used);
14138         if (err)
14139                 printf("error(s) found\n");
14140         else
14141                 printf("no error found\n");
14142         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14143         printf("total tree bytes: %llu\n",
14144                (unsigned long long)total_btree_bytes);
14145         printf("total fs tree bytes: %llu\n",
14146                (unsigned long long)total_fs_tree_bytes);
14147         printf("total extent tree bytes: %llu\n",
14148                (unsigned long long)total_extent_tree_bytes);
14149         printf("btree space waste bytes: %llu\n",
14150                (unsigned long long)btree_space_waste);
14151         printf("file data blocks allocated: %llu\n referenced %llu\n",
14152                 (unsigned long long)data_bytes_allocated,
14153                 (unsigned long long)data_bytes_referenced);
14154
14155         free_qgroup_counts();
14156         free_root_recs_tree(&root_cache);
14157 close_out:
14158         close_ctree(root);
14159 err_out:
14160         if (ctx.progress_enabled)
14161                 task_deinit(ctx.info);
14162
14163         return err;
14164 }