btrfs-progs: check: introduce repair_chunk_item()
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING       (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH      (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN         (1<<20) /* DIR isize should be recalculated */
136
137 static inline struct data_backref* to_data_backref(struct extent_backref *back)
138 {
139         return container_of(back, struct data_backref, node);
140 }
141
142 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
143 {
144         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
145         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
146         struct data_backref *back1 = to_data_backref(ext1);
147         struct data_backref *back2 = to_data_backref(ext2);
148
149         WARN_ON(!ext1->is_data);
150         WARN_ON(!ext2->is_data);
151
152         /* parent and root are a union, so this covers both */
153         if (back1->parent > back2->parent)
154                 return 1;
155         if (back1->parent < back2->parent)
156                 return -1;
157
158         /* This is a full backref and the parents match. */
159         if (back1->node.full_backref)
160                 return 0;
161
162         if (back1->owner > back2->owner)
163                 return 1;
164         if (back1->owner < back2->owner)
165                 return -1;
166
167         if (back1->offset > back2->offset)
168                 return 1;
169         if (back1->offset < back2->offset)
170                 return -1;
171
172         if (back1->found_ref && back2->found_ref) {
173                 if (back1->disk_bytenr > back2->disk_bytenr)
174                         return 1;
175                 if (back1->disk_bytenr < back2->disk_bytenr)
176                         return -1;
177
178                 if (back1->bytes > back2->bytes)
179                         return 1;
180                 if (back1->bytes < back2->bytes)
181                         return -1;
182         }
183
184         return 0;
185 }
186
187 /*
188  * Much like data_backref, just removed the undetermined members
189  * and change it to use list_head.
190  * During extent scan, it is stored in root->orphan_data_extent.
191  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
192  */
193 struct orphan_data_extent {
194         struct list_head list;
195         u64 root;
196         u64 objectid;
197         u64 offset;
198         u64 disk_bytenr;
199         u64 disk_len;
200 };
201
202 struct tree_backref {
203         struct extent_backref node;
204         union {
205                 u64 parent;
206                 u64 root;
207         };
208 };
209
210 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
211 {
212         return container_of(back, struct tree_backref, node);
213 }
214
215 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
216 {
217         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
218         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
219         struct tree_backref *back1 = to_tree_backref(ext1);
220         struct tree_backref *back2 = to_tree_backref(ext2);
221
222         WARN_ON(ext1->is_data);
223         WARN_ON(ext2->is_data);
224
225         /* parent and root are a union, so this covers both */
226         if (back1->parent > back2->parent)
227                 return 1;
228         if (back1->parent < back2->parent)
229                 return -1;
230
231         return 0;
232 }
233
234 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
235 {
236         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
237         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
238
239         if (ext1->is_data > ext2->is_data)
240                 return 1;
241
242         if (ext1->is_data < ext2->is_data)
243                 return -1;
244
245         if (ext1->full_backref > ext2->full_backref)
246                 return 1;
247         if (ext1->full_backref < ext2->full_backref)
248                 return -1;
249
250         if (ext1->is_data)
251                 return compare_data_backref(node1, node2);
252         else
253                 return compare_tree_backref(node1, node2);
254 }
255
256 /* Explicit initialization for extent_record::flag_block_full_backref */
257 enum { FLAG_UNSET = 2 };
258
259 struct extent_record {
260         struct list_head backrefs;
261         struct list_head dups;
262         struct rb_root backref_tree;
263         struct list_head list;
264         struct cache_extent cache;
265         struct btrfs_disk_key parent_key;
266         u64 start;
267         u64 max_size;
268         u64 nr;
269         u64 refs;
270         u64 extent_item_refs;
271         u64 generation;
272         u64 parent_generation;
273         u64 info_objectid;
274         u32 num_duplicates;
275         u8 info_level;
276         unsigned int flag_block_full_backref:2;
277         unsigned int found_rec:1;
278         unsigned int content_checked:1;
279         unsigned int owner_ref_checked:1;
280         unsigned int is_root:1;
281         unsigned int metadata:1;
282         unsigned int bad_full_backref:1;
283         unsigned int crossing_stripes:1;
284         unsigned int wrong_chunk_type:1;
285 };
286
287 static inline struct extent_record* to_extent_record(struct list_head *entry)
288 {
289         return container_of(entry, struct extent_record, list);
290 }
291
292 struct inode_backref {
293         struct list_head list;
294         unsigned int found_dir_item:1;
295         unsigned int found_dir_index:1;
296         unsigned int found_inode_ref:1;
297         u8 filetype;
298         u8 ref_type;
299         int errors;
300         u64 dir;
301         u64 index;
302         u16 namelen;
303         char name[0];
304 };
305
306 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
307 {
308         return list_entry(entry, struct inode_backref, list);
309 }
310
311 struct root_item_record {
312         struct list_head list;
313         u64 objectid;
314         u64 bytenr;
315         u64 last_snapshot;
316         u8 level;
317         u8 drop_level;
318         struct btrfs_key drop_key;
319 };
320
321 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
322 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
323 #define REF_ERR_NO_INODE_REF            (1 << 2)
324 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
325 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
326 #define REF_ERR_DUP_INODE_REF           (1 << 5)
327 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
328 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
329 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
330 #define REF_ERR_NO_ROOT_REF             (1 << 9)
331 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
332 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
333 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
334
335 struct file_extent_hole {
336         struct rb_node node;
337         u64 start;
338         u64 len;
339 };
340
341 struct inode_record {
342         struct list_head backrefs;
343         unsigned int checked:1;
344         unsigned int merging:1;
345         unsigned int found_inode_item:1;
346         unsigned int found_dir_item:1;
347         unsigned int found_file_extent:1;
348         unsigned int found_csum_item:1;
349         unsigned int some_csum_missing:1;
350         unsigned int nodatasum:1;
351         int errors;
352
353         u64 ino;
354         u32 nlink;
355         u32 imode;
356         u64 isize;
357         u64 nbytes;
358
359         u32 found_link;
360         u64 found_size;
361         u64 extent_start;
362         u64 extent_end;
363         struct rb_root holes;
364         struct list_head orphan_extents;
365
366         u32 refs;
367 };
368
369 #define I_ERR_NO_INODE_ITEM             (1 << 0)
370 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
371 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
372 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
373 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
374 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
375 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
376 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
377 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
378 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
379 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
380 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
381 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
382 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
383 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
384
385 struct root_backref {
386         struct list_head list;
387         unsigned int found_dir_item:1;
388         unsigned int found_dir_index:1;
389         unsigned int found_back_ref:1;
390         unsigned int found_forward_ref:1;
391         unsigned int reachable:1;
392         int errors;
393         u64 ref_root;
394         u64 dir;
395         u64 index;
396         u16 namelen;
397         char name[0];
398 };
399
400 static inline struct root_backref* to_root_backref(struct list_head *entry)
401 {
402         return list_entry(entry, struct root_backref, list);
403 }
404
405 struct root_record {
406         struct list_head backrefs;
407         struct cache_extent cache;
408         unsigned int found_root_item:1;
409         u64 objectid;
410         u32 found_ref;
411 };
412
413 struct ptr_node {
414         struct cache_extent cache;
415         void *data;
416 };
417
418 struct shared_node {
419         struct cache_extent cache;
420         struct cache_tree root_cache;
421         struct cache_tree inode_cache;
422         struct inode_record *current;
423         u32 refs;
424 };
425
426 struct block_info {
427         u64 start;
428         u32 size;
429 };
430
431 struct walk_control {
432         struct cache_tree shared;
433         struct shared_node *nodes[BTRFS_MAX_LEVEL];
434         int active_node;
435         int root_level;
436 };
437
438 struct bad_item {
439         struct btrfs_key key;
440         u64 root_id;
441         struct list_head list;
442 };
443
444 struct extent_entry {
445         u64 bytenr;
446         u64 bytes;
447         int count;
448         int broken;
449         struct list_head list;
450 };
451
452 struct root_item_info {
453         /* level of the root */
454         u8 level;
455         /* number of nodes at this level, must be 1 for a root */
456         int node_count;
457         u64 bytenr;
458         u64 gen;
459         struct cache_extent cache_extent;
460 };
461
462 /*
463  * Error bit for low memory mode check.
464  *
465  * Currently no caller cares about it yet.  Just internal use for error
466  * classification.
467  */
468 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
469 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
470 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
471 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
472 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
473 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
474 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
475 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
476 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
477 #define CHUNK_TYPE_MISMATCH     (1 << 8)
478
479 static void *print_status_check(void *p)
480 {
481         struct task_ctx *priv = p;
482         const char work_indicator[] = { '.', 'o', 'O', 'o' };
483         uint32_t count = 0;
484         static char *task_position_string[] = {
485                 "checking extents",
486                 "checking free space cache",
487                 "checking fs roots",
488         };
489
490         task_period_start(priv->info, 1000 /* 1s */);
491
492         if (priv->tp == TASK_NOTHING)
493                 return NULL;
494
495         while (1) {
496                 printf("%s [%c]\r", task_position_string[priv->tp],
497                                 work_indicator[count % 4]);
498                 count++;
499                 fflush(stdout);
500                 task_period_wait(priv->info);
501         }
502         return NULL;
503 }
504
505 static int print_status_return(void *p)
506 {
507         printf("\n");
508         fflush(stdout);
509
510         return 0;
511 }
512
513 static enum btrfs_check_mode parse_check_mode(const char *str)
514 {
515         if (strcmp(str, "lowmem") == 0)
516                 return CHECK_MODE_LOWMEM;
517         if (strcmp(str, "orig") == 0)
518                 return CHECK_MODE_ORIGINAL;
519         if (strcmp(str, "original") == 0)
520                 return CHECK_MODE_ORIGINAL;
521
522         return CHECK_MODE_UNKNOWN;
523 }
524
525 /* Compatible function to allow reuse of old codes */
526 static u64 first_extent_gap(struct rb_root *holes)
527 {
528         struct file_extent_hole *hole;
529
530         if (RB_EMPTY_ROOT(holes))
531                 return (u64)-1;
532
533         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
534         return hole->start;
535 }
536
537 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
538 {
539         struct file_extent_hole *hole1;
540         struct file_extent_hole *hole2;
541
542         hole1 = rb_entry(node1, struct file_extent_hole, node);
543         hole2 = rb_entry(node2, struct file_extent_hole, node);
544
545         if (hole1->start > hole2->start)
546                 return -1;
547         if (hole1->start < hole2->start)
548                 return 1;
549         /* Now hole1->start == hole2->start */
550         if (hole1->len >= hole2->len)
551                 /*
552                  * Hole 1 will be merge center
553                  * Same hole will be merged later
554                  */
555                 return -1;
556         /* Hole 2 will be merge center */
557         return 1;
558 }
559
560 /*
561  * Add a hole to the record
562  *
563  * This will do hole merge for copy_file_extent_holes(),
564  * which will ensure there won't be continuous holes.
565  */
566 static int add_file_extent_hole(struct rb_root *holes,
567                                 u64 start, u64 len)
568 {
569         struct file_extent_hole *hole;
570         struct file_extent_hole *prev = NULL;
571         struct file_extent_hole *next = NULL;
572
573         hole = malloc(sizeof(*hole));
574         if (!hole)
575                 return -ENOMEM;
576         hole->start = start;
577         hole->len = len;
578         /* Since compare will not return 0, no -EEXIST will happen */
579         rb_insert(holes, &hole->node, compare_hole);
580
581         /* simple merge with previous hole */
582         if (rb_prev(&hole->node))
583                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
584                                 node);
585         if (prev && prev->start + prev->len >= hole->start) {
586                 hole->len = hole->start + hole->len - prev->start;
587                 hole->start = prev->start;
588                 rb_erase(&prev->node, holes);
589                 free(prev);
590                 prev = NULL;
591         }
592
593         /* iterate merge with next holes */
594         while (1) {
595                 if (!rb_next(&hole->node))
596                         break;
597                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
598                                         node);
599                 if (hole->start + hole->len >= next->start) {
600                         if (hole->start + hole->len <= next->start + next->len)
601                                 hole->len = next->start + next->len -
602                                             hole->start;
603                         rb_erase(&next->node, holes);
604                         free(next);
605                         next = NULL;
606                 } else
607                         break;
608         }
609         return 0;
610 }
611
612 static int compare_hole_range(struct rb_node *node, void *data)
613 {
614         struct file_extent_hole *hole;
615         u64 start;
616
617         hole = (struct file_extent_hole *)data;
618         start = hole->start;
619
620         hole = rb_entry(node, struct file_extent_hole, node);
621         if (start < hole->start)
622                 return -1;
623         if (start >= hole->start && start < hole->start + hole->len)
624                 return 0;
625         return 1;
626 }
627
628 /*
629  * Delete a hole in the record
630  *
631  * This will do the hole split and is much restrict than add.
632  */
633 static int del_file_extent_hole(struct rb_root *holes,
634                                 u64 start, u64 len)
635 {
636         struct file_extent_hole *hole;
637         struct file_extent_hole tmp;
638         u64 prev_start = 0;
639         u64 prev_len = 0;
640         u64 next_start = 0;
641         u64 next_len = 0;
642         struct rb_node *node;
643         int have_prev = 0;
644         int have_next = 0;
645         int ret = 0;
646
647         tmp.start = start;
648         tmp.len = len;
649         node = rb_search(holes, &tmp, compare_hole_range, NULL);
650         if (!node)
651                 return -EEXIST;
652         hole = rb_entry(node, struct file_extent_hole, node);
653         if (start + len > hole->start + hole->len)
654                 return -EEXIST;
655
656         /*
657          * Now there will be no overlap, delete the hole and re-add the
658          * split(s) if they exists.
659          */
660         if (start > hole->start) {
661                 prev_start = hole->start;
662                 prev_len = start - hole->start;
663                 have_prev = 1;
664         }
665         if (hole->start + hole->len > start + len) {
666                 next_start = start + len;
667                 next_len = hole->start + hole->len - start - len;
668                 have_next = 1;
669         }
670         rb_erase(node, holes);
671         free(hole);
672         if (have_prev) {
673                 ret = add_file_extent_hole(holes, prev_start, prev_len);
674                 if (ret < 0)
675                         return ret;
676         }
677         if (have_next) {
678                 ret = add_file_extent_hole(holes, next_start, next_len);
679                 if (ret < 0)
680                         return ret;
681         }
682         return 0;
683 }
684
685 static int copy_file_extent_holes(struct rb_root *dst,
686                                   struct rb_root *src)
687 {
688         struct file_extent_hole *hole;
689         struct rb_node *node;
690         int ret = 0;
691
692         node = rb_first(src);
693         while (node) {
694                 hole = rb_entry(node, struct file_extent_hole, node);
695                 ret = add_file_extent_hole(dst, hole->start, hole->len);
696                 if (ret)
697                         break;
698                 node = rb_next(node);
699         }
700         return ret;
701 }
702
703 static void free_file_extent_holes(struct rb_root *holes)
704 {
705         struct rb_node *node;
706         struct file_extent_hole *hole;
707
708         node = rb_first(holes);
709         while (node) {
710                 hole = rb_entry(node, struct file_extent_hole, node);
711                 rb_erase(node, holes);
712                 free(hole);
713                 node = rb_first(holes);
714         }
715 }
716
717 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
718
719 static void record_root_in_trans(struct btrfs_trans_handle *trans,
720                                  struct btrfs_root *root)
721 {
722         if (root->last_trans != trans->transid) {
723                 root->track_dirty = 1;
724                 root->last_trans = trans->transid;
725                 root->commit_root = root->node;
726                 extent_buffer_get(root->node);
727         }
728 }
729
730 static u8 imode_to_type(u32 imode)
731 {
732 #define S_SHIFT 12
733         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
734                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
735                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
736                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
737                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
738                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
739                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
740                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
741         };
742
743         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
744 #undef S_SHIFT
745 }
746
747 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
748 {
749         struct device_record *rec1;
750         struct device_record *rec2;
751
752         rec1 = rb_entry(node1, struct device_record, node);
753         rec2 = rb_entry(node2, struct device_record, node);
754         if (rec1->devid > rec2->devid)
755                 return -1;
756         else if (rec1->devid < rec2->devid)
757                 return 1;
758         else
759                 return 0;
760 }
761
762 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
763 {
764         struct inode_record *rec;
765         struct inode_backref *backref;
766         struct inode_backref *orig;
767         struct inode_backref *tmp;
768         struct orphan_data_extent *src_orphan;
769         struct orphan_data_extent *dst_orphan;
770         struct rb_node *rb;
771         size_t size;
772         int ret;
773
774         rec = malloc(sizeof(*rec));
775         if (!rec)
776                 return ERR_PTR(-ENOMEM);
777         memcpy(rec, orig_rec, sizeof(*rec));
778         rec->refs = 1;
779         INIT_LIST_HEAD(&rec->backrefs);
780         INIT_LIST_HEAD(&rec->orphan_extents);
781         rec->holes = RB_ROOT;
782
783         list_for_each_entry(orig, &orig_rec->backrefs, list) {
784                 size = sizeof(*orig) + orig->namelen + 1;
785                 backref = malloc(size);
786                 if (!backref) {
787                         ret = -ENOMEM;
788                         goto cleanup;
789                 }
790                 memcpy(backref, orig, size);
791                 list_add_tail(&backref->list, &rec->backrefs);
792         }
793         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
794                 dst_orphan = malloc(sizeof(*dst_orphan));
795                 if (!dst_orphan) {
796                         ret = -ENOMEM;
797                         goto cleanup;
798                 }
799                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
800                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
801         }
802         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
803         if (ret < 0)
804                 goto cleanup_rb;
805
806         return rec;
807
808 cleanup_rb:
809         rb = rb_first(&rec->holes);
810         while (rb) {
811                 struct file_extent_hole *hole;
812
813                 hole = rb_entry(rb, struct file_extent_hole, node);
814                 rb = rb_next(rb);
815                 free(hole);
816         }
817
818 cleanup:
819         if (!list_empty(&rec->backrefs))
820                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
821                         list_del(&orig->list);
822                         free(orig);
823                 }
824
825         if (!list_empty(&rec->orphan_extents))
826                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
827                         list_del(&orig->list);
828                         free(orig);
829                 }
830
831         free(rec);
832
833         return ERR_PTR(ret);
834 }
835
836 static void print_orphan_data_extents(struct list_head *orphan_extents,
837                                       u64 objectid)
838 {
839         struct orphan_data_extent *orphan;
840
841         if (list_empty(orphan_extents))
842                 return;
843         printf("The following data extent is lost in tree %llu:\n",
844                objectid);
845         list_for_each_entry(orphan, orphan_extents, list) {
846                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
847                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
848                        orphan->disk_len);
849         }
850 }
851
852 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
853 {
854         u64 root_objectid = root->root_key.objectid;
855         int errors = rec->errors;
856
857         if (!errors)
858                 return;
859         /* reloc root errors, we print its corresponding fs root objectid*/
860         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
861                 root_objectid = root->root_key.offset;
862                 fprintf(stderr, "reloc");
863         }
864         fprintf(stderr, "root %llu inode %llu errors %x",
865                 (unsigned long long) root_objectid,
866                 (unsigned long long) rec->ino, rec->errors);
867
868         if (errors & I_ERR_NO_INODE_ITEM)
869                 fprintf(stderr, ", no inode item");
870         if (errors & I_ERR_NO_ORPHAN_ITEM)
871                 fprintf(stderr, ", no orphan item");
872         if (errors & I_ERR_DUP_INODE_ITEM)
873                 fprintf(stderr, ", dup inode item");
874         if (errors & I_ERR_DUP_DIR_INDEX)
875                 fprintf(stderr, ", dup dir index");
876         if (errors & I_ERR_ODD_DIR_ITEM)
877                 fprintf(stderr, ", odd dir item");
878         if (errors & I_ERR_ODD_FILE_EXTENT)
879                 fprintf(stderr, ", odd file extent");
880         if (errors & I_ERR_BAD_FILE_EXTENT)
881                 fprintf(stderr, ", bad file extent");
882         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
883                 fprintf(stderr, ", file extent overlap");
884         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
885                 fprintf(stderr, ", file extent discount");
886         if (errors & I_ERR_DIR_ISIZE_WRONG)
887                 fprintf(stderr, ", dir isize wrong");
888         if (errors & I_ERR_FILE_NBYTES_WRONG)
889                 fprintf(stderr, ", nbytes wrong");
890         if (errors & I_ERR_ODD_CSUM_ITEM)
891                 fprintf(stderr, ", odd csum item");
892         if (errors & I_ERR_SOME_CSUM_MISSING)
893                 fprintf(stderr, ", some csum missing");
894         if (errors & I_ERR_LINK_COUNT_WRONG)
895                 fprintf(stderr, ", link count wrong");
896         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
897                 fprintf(stderr, ", orphan file extent");
898         fprintf(stderr, "\n");
899         /* Print the orphan extents if needed */
900         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
901                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
902
903         /* Print the holes if needed */
904         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
905                 struct file_extent_hole *hole;
906                 struct rb_node *node;
907                 int found = 0;
908
909                 node = rb_first(&rec->holes);
910                 fprintf(stderr, "Found file extent holes:\n");
911                 while (node) {
912                         found = 1;
913                         hole = rb_entry(node, struct file_extent_hole, node);
914                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
915                                 hole->start, hole->len);
916                         node = rb_next(node);
917                 }
918                 if (!found)
919                         fprintf(stderr, "\tstart: 0, len: %llu\n",
920                                 round_up(rec->isize,
921                                          root->fs_info->sectorsize));
922         }
923 }
924
925 static void print_ref_error(int errors)
926 {
927         if (errors & REF_ERR_NO_DIR_ITEM)
928                 fprintf(stderr, ", no dir item");
929         if (errors & REF_ERR_NO_DIR_INDEX)
930                 fprintf(stderr, ", no dir index");
931         if (errors & REF_ERR_NO_INODE_REF)
932                 fprintf(stderr, ", no inode ref");
933         if (errors & REF_ERR_DUP_DIR_ITEM)
934                 fprintf(stderr, ", dup dir item");
935         if (errors & REF_ERR_DUP_DIR_INDEX)
936                 fprintf(stderr, ", dup dir index");
937         if (errors & REF_ERR_DUP_INODE_REF)
938                 fprintf(stderr, ", dup inode ref");
939         if (errors & REF_ERR_INDEX_UNMATCH)
940                 fprintf(stderr, ", index mismatch");
941         if (errors & REF_ERR_FILETYPE_UNMATCH)
942                 fprintf(stderr, ", filetype mismatch");
943         if (errors & REF_ERR_NAME_TOO_LONG)
944                 fprintf(stderr, ", name too long");
945         if (errors & REF_ERR_NO_ROOT_REF)
946                 fprintf(stderr, ", no root ref");
947         if (errors & REF_ERR_NO_ROOT_BACKREF)
948                 fprintf(stderr, ", no root backref");
949         if (errors & REF_ERR_DUP_ROOT_REF)
950                 fprintf(stderr, ", dup root ref");
951         if (errors & REF_ERR_DUP_ROOT_BACKREF)
952                 fprintf(stderr, ", dup root backref");
953         fprintf(stderr, "\n");
954 }
955
956 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
957                                           u64 ino, int mod)
958 {
959         struct ptr_node *node;
960         struct cache_extent *cache;
961         struct inode_record *rec = NULL;
962         int ret;
963
964         cache = lookup_cache_extent(inode_cache, ino, 1);
965         if (cache) {
966                 node = container_of(cache, struct ptr_node, cache);
967                 rec = node->data;
968                 if (mod && rec->refs > 1) {
969                         node->data = clone_inode_rec(rec);
970                         if (IS_ERR(node->data))
971                                 return node->data;
972                         rec->refs--;
973                         rec = node->data;
974                 }
975         } else if (mod) {
976                 rec = calloc(1, sizeof(*rec));
977                 if (!rec)
978                         return ERR_PTR(-ENOMEM);
979                 rec->ino = ino;
980                 rec->extent_start = (u64)-1;
981                 rec->refs = 1;
982                 INIT_LIST_HEAD(&rec->backrefs);
983                 INIT_LIST_HEAD(&rec->orphan_extents);
984                 rec->holes = RB_ROOT;
985
986                 node = malloc(sizeof(*node));
987                 if (!node) {
988                         free(rec);
989                         return ERR_PTR(-ENOMEM);
990                 }
991                 node->cache.start = ino;
992                 node->cache.size = 1;
993                 node->data = rec;
994
995                 if (ino == BTRFS_FREE_INO_OBJECTID)
996                         rec->found_link = 1;
997
998                 ret = insert_cache_extent(inode_cache, &node->cache);
999                 if (ret)
1000                         return ERR_PTR(-EEXIST);
1001         }
1002         return rec;
1003 }
1004
1005 static void free_orphan_data_extents(struct list_head *orphan_extents)
1006 {
1007         struct orphan_data_extent *orphan;
1008
1009         while (!list_empty(orphan_extents)) {
1010                 orphan = list_entry(orphan_extents->next,
1011                                     struct orphan_data_extent, list);
1012                 list_del(&orphan->list);
1013                 free(orphan);
1014         }
1015 }
1016
1017 static void free_inode_rec(struct inode_record *rec)
1018 {
1019         struct inode_backref *backref;
1020
1021         if (--rec->refs > 0)
1022                 return;
1023
1024         while (!list_empty(&rec->backrefs)) {
1025                 backref = to_inode_backref(rec->backrefs.next);
1026                 list_del(&backref->list);
1027                 free(backref);
1028         }
1029         free_orphan_data_extents(&rec->orphan_extents);
1030         free_file_extent_holes(&rec->holes);
1031         free(rec);
1032 }
1033
1034 static int can_free_inode_rec(struct inode_record *rec)
1035 {
1036         if (!rec->errors && rec->checked && rec->found_inode_item &&
1037             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1038                 return 1;
1039         return 0;
1040 }
1041
1042 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1043                                  struct inode_record *rec)
1044 {
1045         struct cache_extent *cache;
1046         struct inode_backref *tmp, *backref;
1047         struct ptr_node *node;
1048         u8 filetype;
1049
1050         if (!rec->found_inode_item)
1051                 return;
1052
1053         filetype = imode_to_type(rec->imode);
1054         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1055                 if (backref->found_dir_item && backref->found_dir_index) {
1056                         if (backref->filetype != filetype)
1057                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1058                         if (!backref->errors && backref->found_inode_ref &&
1059                             rec->nlink == rec->found_link) {
1060                                 list_del(&backref->list);
1061                                 free(backref);
1062                         }
1063                 }
1064         }
1065
1066         if (!rec->checked || rec->merging)
1067                 return;
1068
1069         if (S_ISDIR(rec->imode)) {
1070                 if (rec->found_size != rec->isize)
1071                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1072                 if (rec->found_file_extent)
1073                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1074         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1075                 if (rec->found_dir_item)
1076                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1077                 if (rec->found_size != rec->nbytes)
1078                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1079                 if (rec->nlink > 0 && !no_holes &&
1080                     (rec->extent_end < rec->isize ||
1081                      first_extent_gap(&rec->holes) < rec->isize))
1082                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1083         }
1084
1085         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1086                 if (rec->found_csum_item && rec->nodatasum)
1087                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1088                 if (rec->some_csum_missing && !rec->nodatasum)
1089                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1090         }
1091
1092         BUG_ON(rec->refs != 1);
1093         if (can_free_inode_rec(rec)) {
1094                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1095                 node = container_of(cache, struct ptr_node, cache);
1096                 BUG_ON(node->data != rec);
1097                 remove_cache_extent(inode_cache, &node->cache);
1098                 free(node);
1099                 free_inode_rec(rec);
1100         }
1101 }
1102
1103 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1104 {
1105         struct btrfs_path path;
1106         struct btrfs_key key;
1107         int ret;
1108
1109         key.objectid = BTRFS_ORPHAN_OBJECTID;
1110         key.type = BTRFS_ORPHAN_ITEM_KEY;
1111         key.offset = ino;
1112
1113         btrfs_init_path(&path);
1114         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1115         btrfs_release_path(&path);
1116         if (ret > 0)
1117                 ret = -ENOENT;
1118         return ret;
1119 }
1120
1121 static int process_inode_item(struct extent_buffer *eb,
1122                               int slot, struct btrfs_key *key,
1123                               struct shared_node *active_node)
1124 {
1125         struct inode_record *rec;
1126         struct btrfs_inode_item *item;
1127
1128         rec = active_node->current;
1129         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1130         if (rec->found_inode_item) {
1131                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1132                 return 1;
1133         }
1134         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1135         rec->nlink = btrfs_inode_nlink(eb, item);
1136         rec->isize = btrfs_inode_size(eb, item);
1137         rec->nbytes = btrfs_inode_nbytes(eb, item);
1138         rec->imode = btrfs_inode_mode(eb, item);
1139         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1140                 rec->nodatasum = 1;
1141         rec->found_inode_item = 1;
1142         if (rec->nlink == 0)
1143                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1144         maybe_free_inode_rec(&active_node->inode_cache, rec);
1145         return 0;
1146 }
1147
1148 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1149                                                 const char *name,
1150                                                 int namelen, u64 dir)
1151 {
1152         struct inode_backref *backref;
1153
1154         list_for_each_entry(backref, &rec->backrefs, list) {
1155                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1156                         break;
1157                 if (backref->dir != dir || backref->namelen != namelen)
1158                         continue;
1159                 if (memcmp(name, backref->name, namelen))
1160                         continue;
1161                 return backref;
1162         }
1163
1164         backref = malloc(sizeof(*backref) + namelen + 1);
1165         if (!backref)
1166                 return NULL;
1167         memset(backref, 0, sizeof(*backref));
1168         backref->dir = dir;
1169         backref->namelen = namelen;
1170         memcpy(backref->name, name, namelen);
1171         backref->name[namelen] = '\0';
1172         list_add_tail(&backref->list, &rec->backrefs);
1173         return backref;
1174 }
1175
1176 static int add_inode_backref(struct cache_tree *inode_cache,
1177                              u64 ino, u64 dir, u64 index,
1178                              const char *name, int namelen,
1179                              u8 filetype, u8 itemtype, int errors)
1180 {
1181         struct inode_record *rec;
1182         struct inode_backref *backref;
1183
1184         rec = get_inode_rec(inode_cache, ino, 1);
1185         BUG_ON(IS_ERR(rec));
1186         backref = get_inode_backref(rec, name, namelen, dir);
1187         BUG_ON(!backref);
1188         if (errors)
1189                 backref->errors |= errors;
1190         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1191                 if (backref->found_dir_index)
1192                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1193                 if (backref->found_inode_ref && backref->index != index)
1194                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1195                 if (backref->found_dir_item && backref->filetype != filetype)
1196                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1197
1198                 backref->index = index;
1199                 backref->filetype = filetype;
1200                 backref->found_dir_index = 1;
1201         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1202                 rec->found_link++;
1203                 if (backref->found_dir_item)
1204                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1205                 if (backref->found_dir_index && backref->filetype != filetype)
1206                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1207
1208                 backref->filetype = filetype;
1209                 backref->found_dir_item = 1;
1210         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1211                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1212                 if (backref->found_inode_ref)
1213                         backref->errors |= REF_ERR_DUP_INODE_REF;
1214                 if (backref->found_dir_index && backref->index != index)
1215                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1216                 else
1217                         backref->index = index;
1218
1219                 backref->ref_type = itemtype;
1220                 backref->found_inode_ref = 1;
1221         } else {
1222                 BUG_ON(1);
1223         }
1224
1225         maybe_free_inode_rec(inode_cache, rec);
1226         return 0;
1227 }
1228
1229 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1230                             struct cache_tree *dst_cache)
1231 {
1232         struct inode_backref *backref;
1233         u32 dir_count = 0;
1234         int ret = 0;
1235
1236         dst->merging = 1;
1237         list_for_each_entry(backref, &src->backrefs, list) {
1238                 if (backref->found_dir_index) {
1239                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1240                                         backref->index, backref->name,
1241                                         backref->namelen, backref->filetype,
1242                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1243                 }
1244                 if (backref->found_dir_item) {
1245                         dir_count++;
1246                         add_inode_backref(dst_cache, dst->ino,
1247                                         backref->dir, 0, backref->name,
1248                                         backref->namelen, backref->filetype,
1249                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1250                 }
1251                 if (backref->found_inode_ref) {
1252                         add_inode_backref(dst_cache, dst->ino,
1253                                         backref->dir, backref->index,
1254                                         backref->name, backref->namelen, 0,
1255                                         backref->ref_type, backref->errors);
1256                 }
1257         }
1258
1259         if (src->found_dir_item)
1260                 dst->found_dir_item = 1;
1261         if (src->found_file_extent)
1262                 dst->found_file_extent = 1;
1263         if (src->found_csum_item)
1264                 dst->found_csum_item = 1;
1265         if (src->some_csum_missing)
1266                 dst->some_csum_missing = 1;
1267         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1268                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1269                 if (ret < 0)
1270                         return ret;
1271         }
1272
1273         BUG_ON(src->found_link < dir_count);
1274         dst->found_link += src->found_link - dir_count;
1275         dst->found_size += src->found_size;
1276         if (src->extent_start != (u64)-1) {
1277                 if (dst->extent_start == (u64)-1) {
1278                         dst->extent_start = src->extent_start;
1279                         dst->extent_end = src->extent_end;
1280                 } else {
1281                         if (dst->extent_end > src->extent_start)
1282                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1283                         else if (dst->extent_end < src->extent_start) {
1284                                 ret = add_file_extent_hole(&dst->holes,
1285                                         dst->extent_end,
1286                                         src->extent_start - dst->extent_end);
1287                         }
1288                         if (dst->extent_end < src->extent_end)
1289                                 dst->extent_end = src->extent_end;
1290                 }
1291         }
1292
1293         dst->errors |= src->errors;
1294         if (src->found_inode_item) {
1295                 if (!dst->found_inode_item) {
1296                         dst->nlink = src->nlink;
1297                         dst->isize = src->isize;
1298                         dst->nbytes = src->nbytes;
1299                         dst->imode = src->imode;
1300                         dst->nodatasum = src->nodatasum;
1301                         dst->found_inode_item = 1;
1302                 } else {
1303                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1304                 }
1305         }
1306         dst->merging = 0;
1307
1308         return 0;
1309 }
1310
1311 static int splice_shared_node(struct shared_node *src_node,
1312                               struct shared_node *dst_node)
1313 {
1314         struct cache_extent *cache;
1315         struct ptr_node *node, *ins;
1316         struct cache_tree *src, *dst;
1317         struct inode_record *rec, *conflict;
1318         u64 current_ino = 0;
1319         int splice = 0;
1320         int ret;
1321
1322         if (--src_node->refs == 0)
1323                 splice = 1;
1324         if (src_node->current)
1325                 current_ino = src_node->current->ino;
1326
1327         src = &src_node->root_cache;
1328         dst = &dst_node->root_cache;
1329 again:
1330         cache = search_cache_extent(src, 0);
1331         while (cache) {
1332                 node = container_of(cache, struct ptr_node, cache);
1333                 rec = node->data;
1334                 cache = next_cache_extent(cache);
1335
1336                 if (splice) {
1337                         remove_cache_extent(src, &node->cache);
1338                         ins = node;
1339                 } else {
1340                         ins = malloc(sizeof(*ins));
1341                         BUG_ON(!ins);
1342                         ins->cache.start = node->cache.start;
1343                         ins->cache.size = node->cache.size;
1344                         ins->data = rec;
1345                         rec->refs++;
1346                 }
1347                 ret = insert_cache_extent(dst, &ins->cache);
1348                 if (ret == -EEXIST) {
1349                         conflict = get_inode_rec(dst, rec->ino, 1);
1350                         BUG_ON(IS_ERR(conflict));
1351                         merge_inode_recs(rec, conflict, dst);
1352                         if (rec->checked) {
1353                                 conflict->checked = 1;
1354                                 if (dst_node->current == conflict)
1355                                         dst_node->current = NULL;
1356                         }
1357                         maybe_free_inode_rec(dst, conflict);
1358                         free_inode_rec(rec);
1359                         free(ins);
1360                 } else {
1361                         BUG_ON(ret);
1362                 }
1363         }
1364
1365         if (src == &src_node->root_cache) {
1366                 src = &src_node->inode_cache;
1367                 dst = &dst_node->inode_cache;
1368                 goto again;
1369         }
1370
1371         if (current_ino > 0 && (!dst_node->current ||
1372             current_ino > dst_node->current->ino)) {
1373                 if (dst_node->current) {
1374                         dst_node->current->checked = 1;
1375                         maybe_free_inode_rec(dst, dst_node->current);
1376                 }
1377                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1378                 BUG_ON(IS_ERR(dst_node->current));
1379         }
1380         return 0;
1381 }
1382
1383 static void free_inode_ptr(struct cache_extent *cache)
1384 {
1385         struct ptr_node *node;
1386         struct inode_record *rec;
1387
1388         node = container_of(cache, struct ptr_node, cache);
1389         rec = node->data;
1390         free_inode_rec(rec);
1391         free(node);
1392 }
1393
1394 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1395
1396 static struct shared_node *find_shared_node(struct cache_tree *shared,
1397                                             u64 bytenr)
1398 {
1399         struct cache_extent *cache;
1400         struct shared_node *node;
1401
1402         cache = lookup_cache_extent(shared, bytenr, 1);
1403         if (cache) {
1404                 node = container_of(cache, struct shared_node, cache);
1405                 return node;
1406         }
1407         return NULL;
1408 }
1409
1410 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1411 {
1412         int ret;
1413         struct shared_node *node;
1414
1415         node = calloc(1, sizeof(*node));
1416         if (!node)
1417                 return -ENOMEM;
1418         node->cache.start = bytenr;
1419         node->cache.size = 1;
1420         cache_tree_init(&node->root_cache);
1421         cache_tree_init(&node->inode_cache);
1422         node->refs = refs;
1423
1424         ret = insert_cache_extent(shared, &node->cache);
1425
1426         return ret;
1427 }
1428
1429 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1430                              struct walk_control *wc, int level)
1431 {
1432         struct shared_node *node;
1433         struct shared_node *dest;
1434         int ret;
1435
1436         if (level == wc->active_node)
1437                 return 0;
1438
1439         BUG_ON(wc->active_node <= level);
1440         node = find_shared_node(&wc->shared, bytenr);
1441         if (!node) {
1442                 ret = add_shared_node(&wc->shared, bytenr, refs);
1443                 BUG_ON(ret);
1444                 node = find_shared_node(&wc->shared, bytenr);
1445                 wc->nodes[level] = node;
1446                 wc->active_node = level;
1447                 return 0;
1448         }
1449
1450         if (wc->root_level == wc->active_node &&
1451             btrfs_root_refs(&root->root_item) == 0) {
1452                 if (--node->refs == 0) {
1453                         free_inode_recs_tree(&node->root_cache);
1454                         free_inode_recs_tree(&node->inode_cache);
1455                         remove_cache_extent(&wc->shared, &node->cache);
1456                         free(node);
1457                 }
1458                 return 1;
1459         }
1460
1461         dest = wc->nodes[wc->active_node];
1462         splice_shared_node(node, dest);
1463         if (node->refs == 0) {
1464                 remove_cache_extent(&wc->shared, &node->cache);
1465                 free(node);
1466         }
1467         return 1;
1468 }
1469
1470 static int leave_shared_node(struct btrfs_root *root,
1471                              struct walk_control *wc, int level)
1472 {
1473         struct shared_node *node;
1474         struct shared_node *dest;
1475         int i;
1476
1477         if (level == wc->root_level)
1478                 return 0;
1479
1480         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1481                 if (wc->nodes[i])
1482                         break;
1483         }
1484         BUG_ON(i >= BTRFS_MAX_LEVEL);
1485
1486         node = wc->nodes[wc->active_node];
1487         wc->nodes[wc->active_node] = NULL;
1488         wc->active_node = i;
1489
1490         dest = wc->nodes[wc->active_node];
1491         if (wc->active_node < wc->root_level ||
1492             btrfs_root_refs(&root->root_item) > 0) {
1493                 BUG_ON(node->refs <= 1);
1494                 splice_shared_node(node, dest);
1495         } else {
1496                 BUG_ON(node->refs < 2);
1497                 node->refs--;
1498         }
1499         return 0;
1500 }
1501
1502 /*
1503  * Returns:
1504  * < 0 - on error
1505  * 1   - if the root with id child_root_id is a child of root parent_root_id
1506  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1507  *       has other root(s) as parent(s)
1508  * 2   - if the root child_root_id doesn't have any parent roots
1509  */
1510 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1511                          u64 child_root_id)
1512 {
1513         struct btrfs_path path;
1514         struct btrfs_key key;
1515         struct extent_buffer *leaf;
1516         int has_parent = 0;
1517         int ret;
1518
1519         btrfs_init_path(&path);
1520
1521         key.objectid = parent_root_id;
1522         key.type = BTRFS_ROOT_REF_KEY;
1523         key.offset = child_root_id;
1524         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1525                                 0, 0);
1526         if (ret < 0)
1527                 return ret;
1528         btrfs_release_path(&path);
1529         if (!ret)
1530                 return 1;
1531
1532         key.objectid = child_root_id;
1533         key.type = BTRFS_ROOT_BACKREF_KEY;
1534         key.offset = 0;
1535         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1536                                 0, 0);
1537         if (ret < 0)
1538                 goto out;
1539
1540         while (1) {
1541                 leaf = path.nodes[0];
1542                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1543                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1544                         if (ret)
1545                                 break;
1546                         leaf = path.nodes[0];
1547                 }
1548
1549                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1550                 if (key.objectid != child_root_id ||
1551                     key.type != BTRFS_ROOT_BACKREF_KEY)
1552                         break;
1553
1554                 has_parent = 1;
1555
1556                 if (key.offset == parent_root_id) {
1557                         btrfs_release_path(&path);
1558                         return 1;
1559                 }
1560
1561                 path.slots[0]++;
1562         }
1563 out:
1564         btrfs_release_path(&path);
1565         if (ret < 0)
1566                 return ret;
1567         return has_parent ? 0 : 2;
1568 }
1569
1570 static int process_dir_item(struct extent_buffer *eb,
1571                             int slot, struct btrfs_key *key,
1572                             struct shared_node *active_node)
1573 {
1574         u32 total;
1575         u32 cur = 0;
1576         u32 len;
1577         u32 name_len;
1578         u32 data_len;
1579         int error;
1580         int nritems = 0;
1581         u8 filetype;
1582         struct btrfs_dir_item *di;
1583         struct inode_record *rec;
1584         struct cache_tree *root_cache;
1585         struct cache_tree *inode_cache;
1586         struct btrfs_key location;
1587         char namebuf[BTRFS_NAME_LEN];
1588
1589         root_cache = &active_node->root_cache;
1590         inode_cache = &active_node->inode_cache;
1591         rec = active_node->current;
1592         rec->found_dir_item = 1;
1593
1594         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1595         total = btrfs_item_size_nr(eb, slot);
1596         while (cur < total) {
1597                 nritems++;
1598                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1599                 name_len = btrfs_dir_name_len(eb, di);
1600                 data_len = btrfs_dir_data_len(eb, di);
1601                 filetype = btrfs_dir_type(eb, di);
1602
1603                 rec->found_size += name_len;
1604                 if (cur + sizeof(*di) + name_len > total ||
1605                     name_len > BTRFS_NAME_LEN) {
1606                         error = REF_ERR_NAME_TOO_LONG;
1607
1608                         if (cur + sizeof(*di) > total)
1609                                 break;
1610                         len = min_t(u32, total - cur - sizeof(*di),
1611                                     BTRFS_NAME_LEN);
1612                 } else {
1613                         len = name_len;
1614                         error = 0;
1615                 }
1616
1617                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1618
1619                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1620                     key->offset != btrfs_name_hash(namebuf, len)) {
1621                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1622                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1623                         key->objectid, key->offset, namebuf, len, filetype,
1624                         key->offset, btrfs_name_hash(namebuf, len));
1625                 }
1626
1627                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1628                         add_inode_backref(inode_cache, location.objectid,
1629                                           key->objectid, key->offset, namebuf,
1630                                           len, filetype, key->type, error);
1631                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1632                         add_inode_backref(root_cache, location.objectid,
1633                                           key->objectid, key->offset,
1634                                           namebuf, len, filetype,
1635                                           key->type, error);
1636                 } else {
1637                         fprintf(stderr, "invalid location in dir item %u\n",
1638                                 location.type);
1639                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1640                                           key->objectid, key->offset, namebuf,
1641                                           len, filetype, key->type, error);
1642                 }
1643
1644                 len = sizeof(*di) + name_len + data_len;
1645                 di = (struct btrfs_dir_item *)((char *)di + len);
1646                 cur += len;
1647         }
1648         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1649                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1650
1651         return 0;
1652 }
1653
1654 static int process_inode_ref(struct extent_buffer *eb,
1655                              int slot, struct btrfs_key *key,
1656                              struct shared_node *active_node)
1657 {
1658         u32 total;
1659         u32 cur = 0;
1660         u32 len;
1661         u32 name_len;
1662         u64 index;
1663         int error;
1664         struct cache_tree *inode_cache;
1665         struct btrfs_inode_ref *ref;
1666         char namebuf[BTRFS_NAME_LEN];
1667
1668         inode_cache = &active_node->inode_cache;
1669
1670         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1671         total = btrfs_item_size_nr(eb, slot);
1672         while (cur < total) {
1673                 name_len = btrfs_inode_ref_name_len(eb, ref);
1674                 index = btrfs_inode_ref_index(eb, ref);
1675
1676                 /* inode_ref + namelen should not cross item boundary */
1677                 if (cur + sizeof(*ref) + name_len > total ||
1678                     name_len > BTRFS_NAME_LEN) {
1679                         if (total < cur + sizeof(*ref))
1680                                 break;
1681
1682                         /* Still try to read out the remaining part */
1683                         len = min_t(u32, total - cur - sizeof(*ref),
1684                                     BTRFS_NAME_LEN);
1685                         error = REF_ERR_NAME_TOO_LONG;
1686                 } else {
1687                         len = name_len;
1688                         error = 0;
1689                 }
1690
1691                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1692                 add_inode_backref(inode_cache, key->objectid, key->offset,
1693                                   index, namebuf, len, 0, key->type, error);
1694
1695                 len = sizeof(*ref) + name_len;
1696                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1697                 cur += len;
1698         }
1699         return 0;
1700 }
1701
1702 static int process_inode_extref(struct extent_buffer *eb,
1703                                 int slot, struct btrfs_key *key,
1704                                 struct shared_node *active_node)
1705 {
1706         u32 total;
1707         u32 cur = 0;
1708         u32 len;
1709         u32 name_len;
1710         u64 index;
1711         u64 parent;
1712         int error;
1713         struct cache_tree *inode_cache;
1714         struct btrfs_inode_extref *extref;
1715         char namebuf[BTRFS_NAME_LEN];
1716
1717         inode_cache = &active_node->inode_cache;
1718
1719         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1720         total = btrfs_item_size_nr(eb, slot);
1721         while (cur < total) {
1722                 name_len = btrfs_inode_extref_name_len(eb, extref);
1723                 index = btrfs_inode_extref_index(eb, extref);
1724                 parent = btrfs_inode_extref_parent(eb, extref);
1725                 if (name_len <= BTRFS_NAME_LEN) {
1726                         len = name_len;
1727                         error = 0;
1728                 } else {
1729                         len = BTRFS_NAME_LEN;
1730                         error = REF_ERR_NAME_TOO_LONG;
1731                 }
1732                 read_extent_buffer(eb, namebuf,
1733                                    (unsigned long)(extref + 1), len);
1734                 add_inode_backref(inode_cache, key->objectid, parent,
1735                                   index, namebuf, len, 0, key->type, error);
1736
1737                 len = sizeof(*extref) + name_len;
1738                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1739                 cur += len;
1740         }
1741         return 0;
1742
1743 }
1744
1745 static int count_csum_range(struct btrfs_root *root, u64 start,
1746                             u64 len, u64 *found)
1747 {
1748         struct btrfs_key key;
1749         struct btrfs_path path;
1750         struct extent_buffer *leaf;
1751         int ret;
1752         size_t size;
1753         *found = 0;
1754         u64 csum_end;
1755         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1756
1757         btrfs_init_path(&path);
1758
1759         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1760         key.offset = start;
1761         key.type = BTRFS_EXTENT_CSUM_KEY;
1762
1763         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1764                                 &key, &path, 0, 0);
1765         if (ret < 0)
1766                 goto out;
1767         if (ret > 0 && path.slots[0] > 0) {
1768                 leaf = path.nodes[0];
1769                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1770                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1771                     key.type == BTRFS_EXTENT_CSUM_KEY)
1772                         path.slots[0]--;
1773         }
1774
1775         while (len > 0) {
1776                 leaf = path.nodes[0];
1777                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1778                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1779                         if (ret > 0)
1780                                 break;
1781                         else if (ret < 0)
1782                                 goto out;
1783                         leaf = path.nodes[0];
1784                 }
1785
1786                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1787                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1788                     key.type != BTRFS_EXTENT_CSUM_KEY)
1789                         break;
1790
1791                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1792                 if (key.offset >= start + len)
1793                         break;
1794
1795                 if (key.offset > start)
1796                         start = key.offset;
1797
1798                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1799                 csum_end = key.offset + (size / csum_size) *
1800                            root->fs_info->sectorsize;
1801                 if (csum_end > start) {
1802                         size = min(csum_end - start, len);
1803                         len -= size;
1804                         start += size;
1805                         *found += size;
1806                 }
1807
1808                 path.slots[0]++;
1809         }
1810 out:
1811         btrfs_release_path(&path);
1812         if (ret < 0)
1813                 return ret;
1814         return 0;
1815 }
1816
1817 static int process_file_extent(struct btrfs_root *root,
1818                                 struct extent_buffer *eb,
1819                                 int slot, struct btrfs_key *key,
1820                                 struct shared_node *active_node)
1821 {
1822         struct inode_record *rec;
1823         struct btrfs_file_extent_item *fi;
1824         u64 num_bytes = 0;
1825         u64 disk_bytenr = 0;
1826         u64 extent_offset = 0;
1827         u64 mask = root->fs_info->sectorsize - 1;
1828         int extent_type;
1829         int ret;
1830
1831         rec = active_node->current;
1832         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1833         rec->found_file_extent = 1;
1834
1835         if (rec->extent_start == (u64)-1) {
1836                 rec->extent_start = key->offset;
1837                 rec->extent_end = key->offset;
1838         }
1839
1840         if (rec->extent_end > key->offset)
1841                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1842         else if (rec->extent_end < key->offset) {
1843                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1844                                            key->offset - rec->extent_end);
1845                 if (ret < 0)
1846                         return ret;
1847         }
1848
1849         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1850         extent_type = btrfs_file_extent_type(eb, fi);
1851
1852         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1853                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1854                 if (num_bytes == 0)
1855                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1856                 rec->found_size += num_bytes;
1857                 num_bytes = (num_bytes + mask) & ~mask;
1858         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1859                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1860                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1861                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1862                 extent_offset = btrfs_file_extent_offset(eb, fi);
1863                 if (num_bytes == 0 || (num_bytes & mask))
1864                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865                 if (num_bytes + extent_offset >
1866                     btrfs_file_extent_ram_bytes(eb, fi))
1867                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1868                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1869                     (btrfs_file_extent_compression(eb, fi) ||
1870                      btrfs_file_extent_encryption(eb, fi) ||
1871                      btrfs_file_extent_other_encoding(eb, fi)))
1872                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1873                 if (disk_bytenr > 0)
1874                         rec->found_size += num_bytes;
1875         } else {
1876                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1877         }
1878         rec->extent_end = key->offset + num_bytes;
1879
1880         /*
1881          * The data reloc tree will copy full extents into its inode and then
1882          * copy the corresponding csums.  Because the extent it copied could be
1883          * a preallocated extent that hasn't been written to yet there may be no
1884          * csums to copy, ergo we won't have csums for our file extent.  This is
1885          * ok so just don't bother checking csums if the inode belongs to the
1886          * data reloc tree.
1887          */
1888         if (disk_bytenr > 0 &&
1889             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1890                 u64 found;
1891                 if (btrfs_file_extent_compression(eb, fi))
1892                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1893                 else
1894                         disk_bytenr += extent_offset;
1895
1896                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1897                 if (ret < 0)
1898                         return ret;
1899                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1900                         if (found > 0)
1901                                 rec->found_csum_item = 1;
1902                         if (found < num_bytes)
1903                                 rec->some_csum_missing = 1;
1904                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1905                         if (found > 0)
1906                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1907                 }
1908         }
1909         return 0;
1910 }
1911
1912 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1913                             struct walk_control *wc)
1914 {
1915         struct btrfs_key key;
1916         u32 nritems;
1917         int i;
1918         int ret = 0;
1919         struct cache_tree *inode_cache;
1920         struct shared_node *active_node;
1921
1922         if (wc->root_level == wc->active_node &&
1923             btrfs_root_refs(&root->root_item) == 0)
1924                 return 0;
1925
1926         active_node = wc->nodes[wc->active_node];
1927         inode_cache = &active_node->inode_cache;
1928         nritems = btrfs_header_nritems(eb);
1929         for (i = 0; i < nritems; i++) {
1930                 btrfs_item_key_to_cpu(eb, &key, i);
1931
1932                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1933                         continue;
1934                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1935                         continue;
1936
1937                 if (active_node->current == NULL ||
1938                     active_node->current->ino < key.objectid) {
1939                         if (active_node->current) {
1940                                 active_node->current->checked = 1;
1941                                 maybe_free_inode_rec(inode_cache,
1942                                                      active_node->current);
1943                         }
1944                         active_node->current = get_inode_rec(inode_cache,
1945                                                              key.objectid, 1);
1946                         BUG_ON(IS_ERR(active_node->current));
1947                 }
1948                 switch (key.type) {
1949                 case BTRFS_DIR_ITEM_KEY:
1950                 case BTRFS_DIR_INDEX_KEY:
1951                         ret = process_dir_item(eb, i, &key, active_node);
1952                         break;
1953                 case BTRFS_INODE_REF_KEY:
1954                         ret = process_inode_ref(eb, i, &key, active_node);
1955                         break;
1956                 case BTRFS_INODE_EXTREF_KEY:
1957                         ret = process_inode_extref(eb, i, &key, active_node);
1958                         break;
1959                 case BTRFS_INODE_ITEM_KEY:
1960                         ret = process_inode_item(eb, i, &key, active_node);
1961                         break;
1962                 case BTRFS_EXTENT_DATA_KEY:
1963                         ret = process_file_extent(root, eb, i, &key,
1964                                                   active_node);
1965                         break;
1966                 default:
1967                         break;
1968                 };
1969         }
1970         return ret;
1971 }
1972
1973 struct node_refs {
1974         u64 bytenr[BTRFS_MAX_LEVEL];
1975         u64 refs[BTRFS_MAX_LEVEL];
1976         int need_check[BTRFS_MAX_LEVEL];
1977         /* field for checking all trees */
1978         int checked[BTRFS_MAX_LEVEL];
1979         /* the corresponding extent should be marked as full backref or not */
1980         int full_backref[BTRFS_MAX_LEVEL];
1981 };
1982
1983 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1984                              struct extent_buffer *eb, struct node_refs *nrefs,
1985                              u64 level, int check_all);
1986 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1987                             unsigned int ext_ref);
1988
1989 /*
1990  * Returns >0  Found error, not fatal, should continue
1991  * Returns <0  Fatal error, must exit the whole check
1992  * Returns 0   No errors found
1993  */
1994 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1995                                struct node_refs *nrefs, int *level, int ext_ref)
1996 {
1997         struct extent_buffer *cur = path->nodes[0];
1998         struct btrfs_key key;
1999         u64 cur_bytenr;
2000         u32 nritems;
2001         u64 first_ino = 0;
2002         int root_level = btrfs_header_level(root->node);
2003         int i;
2004         int ret = 0; /* Final return value */
2005         int err = 0; /* Positive error bitmap */
2006
2007         cur_bytenr = cur->start;
2008
2009         /* skip to first inode item or the first inode number change */
2010         nritems = btrfs_header_nritems(cur);
2011         for (i = 0; i < nritems; i++) {
2012                 btrfs_item_key_to_cpu(cur, &key, i);
2013                 if (i == 0)
2014                         first_ino = key.objectid;
2015                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2016                     (first_ino && first_ino != key.objectid))
2017                         break;
2018         }
2019         if (i == nritems) {
2020                 path->slots[0] = nritems;
2021                 return 0;
2022         }
2023         path->slots[0] = i;
2024
2025 again:
2026         err |= check_inode_item(root, path, ext_ref);
2027
2028         /* modify cur since check_inode_item may change path */
2029         cur = path->nodes[0];
2030
2031         if (err & LAST_ITEM)
2032                 goto out;
2033
2034         /* still have inode items in thie leaf */
2035         if (cur->start == cur_bytenr)
2036                 goto again;
2037
2038         /*
2039          * we have switched to another leaf, above nodes may
2040          * have changed, here walk down the path, if a node
2041          * or leaf is shared, check whether we can skip this
2042          * node or leaf.
2043          */
2044         for (i = root_level; i >= 0; i--) {
2045                 if (path->nodes[i]->start == nrefs->bytenr[i])
2046                         continue;
2047
2048                 ret = update_nodes_refs(root, path->nodes[i]->start,
2049                                 path->nodes[i], nrefs, i, 0);
2050                 if (ret)
2051                         goto out;
2052
2053                 if (!nrefs->need_check[i]) {
2054                         *level += 1;
2055                         break;
2056                 }
2057         }
2058
2059         for (i = 0; i < *level; i++) {
2060                 free_extent_buffer(path->nodes[i]);
2061                 path->nodes[i] = NULL;
2062         }
2063 out:
2064         err &= ~LAST_ITEM;
2065         if (err && !ret)
2066                 ret = err;
2067         return ret;
2068 }
2069
2070 static void reada_walk_down(struct btrfs_root *root,
2071                             struct extent_buffer *node, int slot)
2072 {
2073         struct btrfs_fs_info *fs_info = root->fs_info;
2074         u64 bytenr;
2075         u64 ptr_gen;
2076         u32 nritems;
2077         int i;
2078         int level;
2079
2080         level = btrfs_header_level(node);
2081         if (level != 1)
2082                 return;
2083
2084         nritems = btrfs_header_nritems(node);
2085         for (i = slot; i < nritems; i++) {
2086                 bytenr = btrfs_node_blockptr(node, i);
2087                 ptr_gen = btrfs_node_ptr_generation(node, i);
2088                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2089         }
2090 }
2091
2092 /*
2093  * Check the child node/leaf by the following condition:
2094  * 1. the first item key of the node/leaf should be the same with the one
2095  *    in parent.
2096  * 2. block in parent node should match the child node/leaf.
2097  * 3. generation of parent node and child's header should be consistent.
2098  *
2099  * Or the child node/leaf pointed by the key in parent is not valid.
2100  *
2101  * We hope to check leaf owner too, but since subvol may share leaves,
2102  * which makes leaf owner check not so strong, key check should be
2103  * sufficient enough for that case.
2104  */
2105 static int check_child_node(struct extent_buffer *parent, int slot,
2106                             struct extent_buffer *child)
2107 {
2108         struct btrfs_key parent_key;
2109         struct btrfs_key child_key;
2110         int ret = 0;
2111
2112         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2113         if (btrfs_header_level(child) == 0)
2114                 btrfs_item_key_to_cpu(child, &child_key, 0);
2115         else
2116                 btrfs_node_key_to_cpu(child, &child_key, 0);
2117
2118         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2119                 ret = -EINVAL;
2120                 fprintf(stderr,
2121                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2122                         parent_key.objectid, parent_key.type, parent_key.offset,
2123                         child_key.objectid, child_key.type, child_key.offset);
2124         }
2125         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2126                 ret = -EINVAL;
2127                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2128                         btrfs_node_blockptr(parent, slot),
2129                         btrfs_header_bytenr(child));
2130         }
2131         if (btrfs_node_ptr_generation(parent, slot) !=
2132             btrfs_header_generation(child)) {
2133                 ret = -EINVAL;
2134                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2135                         btrfs_header_generation(child),
2136                         btrfs_node_ptr_generation(parent, slot));
2137         }
2138         return ret;
2139 }
2140
2141 /*
2142  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2143  * in every fs or file tree check. Here we find its all root ids, and only check
2144  * it in the fs or file tree which has the smallest root id.
2145  */
2146 static int need_check(struct btrfs_root *root, struct ulist *roots)
2147 {
2148         struct rb_node *node;
2149         struct ulist_node *u;
2150
2151         if (roots->nnodes == 1)
2152                 return 1;
2153
2154         node = rb_first(&roots->root);
2155         u = rb_entry(node, struct ulist_node, rb_node);
2156         /*
2157          * current root id is not smallest, we skip it and let it be checked
2158          * in the fs or file tree who hash the smallest root id.
2159          */
2160         if (root->objectid != u->val)
2161                 return 0;
2162
2163         return 1;
2164 }
2165
2166 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
2167                                u64 *flags_ret)
2168 {
2169         struct btrfs_root *extent_root = root->fs_info->extent_root;
2170         struct btrfs_root_item *ri = &root->root_item;
2171         struct btrfs_extent_inline_ref *iref;
2172         struct btrfs_extent_item *ei;
2173         struct btrfs_key key;
2174         struct btrfs_path *path = NULL;
2175         unsigned long ptr;
2176         unsigned long end;
2177         u64 flags;
2178         u64 owner = 0;
2179         u64 offset;
2180         int slot;
2181         int type;
2182         int ret = 0;
2183
2184         /*
2185          * Except file/reloc tree, we can not have FULL BACKREF MODE
2186          */
2187         if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
2188                 goto normal;
2189
2190         /* root node */
2191         if (eb->start == btrfs_root_bytenr(ri))
2192                 goto normal;
2193
2194         if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
2195                 goto full_backref;
2196
2197         owner = btrfs_header_owner(eb);
2198         if (owner == root->objectid)
2199                 goto normal;
2200
2201         path = btrfs_alloc_path();
2202         if (!path)
2203                 return -ENOMEM;
2204
2205         key.objectid = btrfs_header_bytenr(eb);
2206         key.type = (u8)-1;
2207         key.offset = (u64)-1;
2208
2209         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2210         if (ret <= 0) {
2211                 ret = -EIO;
2212                 goto out;
2213         }
2214
2215         if (ret > 0) {
2216                 ret = btrfs_previous_extent_item(extent_root, path,
2217                                                  key.objectid);
2218                 if (ret)
2219                         goto full_backref;
2220
2221         }
2222         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2223
2224         eb = path->nodes[0];
2225         slot = path->slots[0];
2226         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
2227
2228         flags = btrfs_extent_flags(eb, ei);
2229         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2230                 goto full_backref;
2231
2232         ptr = (unsigned long)(ei + 1);
2233         end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
2234
2235         if (key.type == BTRFS_EXTENT_ITEM_KEY)
2236                 ptr += sizeof(struct btrfs_tree_block_info);
2237
2238 next:
2239         /* Reached extent item ends normally */
2240         if (ptr == end)
2241                 goto full_backref;
2242
2243         /* Beyond extent item end, wrong item size */
2244         if (ptr > end) {
2245                 error("extent item at bytenr %llu slot %d has wrong size",
2246                         eb->start, slot);
2247                 goto full_backref;
2248         }
2249
2250         iref = (struct btrfs_extent_inline_ref *)ptr;
2251         offset = btrfs_extent_inline_ref_offset(eb, iref);
2252         type = btrfs_extent_inline_ref_type(eb, iref);
2253
2254         if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
2255                 goto normal;
2256         ptr += btrfs_extent_inline_ref_size(type);
2257         goto next;
2258
2259 normal:
2260         *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
2261         goto out;
2262
2263 full_backref:
2264         *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2265 out:
2266         btrfs_free_path(path);
2267         return ret;
2268 }
2269
2270 /*
2271  * for a tree node or leaf, we record its reference count, so later if we still
2272  * process this node or leaf, don't need to compute its reference count again.
2273  *
2274  * @bytenr  if @bytenr == (u64)-1, only update nrefs->full_backref[level]
2275  */
2276 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2277                              struct extent_buffer *eb, struct node_refs *nrefs,
2278                              u64 level, int check_all)
2279 {
2280         struct ulist *roots;
2281         u64 refs = 0;
2282         u64 flags = 0;
2283         int root_level = btrfs_header_level(root->node);
2284         int check;
2285         int ret;
2286
2287         if (nrefs->bytenr[level] == bytenr)
2288                 return 0;
2289
2290         if (bytenr != (u64)-1) {
2291                 /* the return value of this function seems a mistake */
2292                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2293                                        level, 1, &refs, &flags);
2294                 /* temporary fix */
2295                 if (ret < 0 && !check_all)
2296                         return ret;
2297
2298                 nrefs->bytenr[level] = bytenr;
2299                 nrefs->refs[level] = refs;
2300                 nrefs->full_backref[level] = 0;
2301                 nrefs->checked[level] = 0;
2302
2303                 if (refs > 1) {
2304                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2305                                                    0, &roots);
2306                         if (ret)
2307                                 return -EIO;
2308
2309                         check = need_check(root, roots);
2310                         ulist_free(roots);
2311                         nrefs->need_check[level] = check;
2312                 } else {
2313                         if (!check_all) {
2314                                 nrefs->need_check[level] = 1;
2315                         } else {
2316                                 if (level == root_level) {
2317                                         nrefs->need_check[level] = 1;
2318                                 } else {
2319                                         /*
2320                                          * The node refs may have not been
2321                                          * updated if upper needs checking (the
2322                                          * lowest root_objectid) the node can
2323                                          * be checked.
2324                                          */
2325                                         nrefs->need_check[level] =
2326                                                 nrefs->need_check[level + 1];
2327                                 }
2328                         }
2329                 }
2330         }
2331
2332         if (check_all && eb) {
2333                 calc_extent_flag_v2(root, eb, &flags);
2334                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2335                         nrefs->full_backref[level] = 1;
2336         }
2337
2338         return 0;
2339 }
2340
2341 /*
2342  * @level           if @level == -1 means extent data item
2343  *                  else normal treeblocl.
2344  */
2345 static int should_check_extent_strictly(struct btrfs_root *root,
2346                                         struct node_refs *nrefs, int level)
2347 {
2348         int root_level = btrfs_header_level(root->node);
2349
2350         if (level > root_level || level < -1)
2351                 return 1;
2352         if (level == root_level)
2353                 return 1;
2354         /*
2355          * if the upper node is marked full backref, it should contain shared
2356          * backref of the parent (except owner == root->objectid).
2357          */
2358         while (++level <= root_level)
2359                 if (nrefs->refs[level] > 1)
2360                         return 0;
2361
2362         return 1;
2363 }
2364
2365 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2366                           struct walk_control *wc, int *level,
2367                           struct node_refs *nrefs)
2368 {
2369         enum btrfs_tree_block_status status;
2370         u64 bytenr;
2371         u64 ptr_gen;
2372         struct btrfs_fs_info *fs_info = root->fs_info;
2373         struct extent_buffer *next;
2374         struct extent_buffer *cur;
2375         int ret, err = 0;
2376         u64 refs;
2377
2378         WARN_ON(*level < 0);
2379         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2380
2381         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2382                 refs = nrefs->refs[*level];
2383                 ret = 0;
2384         } else {
2385                 ret = btrfs_lookup_extent_info(NULL, root,
2386                                        path->nodes[*level]->start,
2387                                        *level, 1, &refs, NULL);
2388                 if (ret < 0) {
2389                         err = ret;
2390                         goto out;
2391                 }
2392                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2393                 nrefs->refs[*level] = refs;
2394         }
2395
2396         if (refs > 1) {
2397                 ret = enter_shared_node(root, path->nodes[*level]->start,
2398                                         refs, wc, *level);
2399                 if (ret > 0) {
2400                         err = ret;
2401                         goto out;
2402                 }
2403         }
2404
2405         while (*level >= 0) {
2406                 WARN_ON(*level < 0);
2407                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2408                 cur = path->nodes[*level];
2409
2410                 if (btrfs_header_level(cur) != *level)
2411                         WARN_ON(1);
2412
2413                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2414                         break;
2415                 if (*level == 0) {
2416                         ret = process_one_leaf(root, cur, wc);
2417                         if (ret < 0)
2418                                 err = ret;
2419                         break;
2420                 }
2421                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2422                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2423
2424                 if (bytenr == nrefs->bytenr[*level - 1]) {
2425                         refs = nrefs->refs[*level - 1];
2426                 } else {
2427                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2428                                         *level - 1, 1, &refs, NULL);
2429                         if (ret < 0) {
2430                                 refs = 0;
2431                         } else {
2432                                 nrefs->bytenr[*level - 1] = bytenr;
2433                                 nrefs->refs[*level - 1] = refs;
2434                         }
2435                 }
2436
2437                 if (refs > 1) {
2438                         ret = enter_shared_node(root, bytenr, refs,
2439                                                 wc, *level - 1);
2440                         if (ret > 0) {
2441                                 path->slots[*level]++;
2442                                 continue;
2443                         }
2444                 }
2445
2446                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2447                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2448                         free_extent_buffer(next);
2449                         reada_walk_down(root, cur, path->slots[*level]);
2450                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2451                         if (!extent_buffer_uptodate(next)) {
2452                                 struct btrfs_key node_key;
2453
2454                                 btrfs_node_key_to_cpu(path->nodes[*level],
2455                                                       &node_key,
2456                                                       path->slots[*level]);
2457                                 btrfs_add_corrupt_extent_record(root->fs_info,
2458                                                 &node_key,
2459                                                 path->nodes[*level]->start,
2460                                                 root->fs_info->nodesize,
2461                                                 *level);
2462                                 err = -EIO;
2463                                 goto out;
2464                         }
2465                 }
2466
2467                 ret = check_child_node(cur, path->slots[*level], next);
2468                 if (ret) {
2469                         free_extent_buffer(next);
2470                         err = ret;
2471                         goto out;
2472                 }
2473
2474                 if (btrfs_is_leaf(next))
2475                         status = btrfs_check_leaf(root, NULL, next);
2476                 else
2477                         status = btrfs_check_node(root, NULL, next);
2478                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2479                         free_extent_buffer(next);
2480                         err = -EIO;
2481                         goto out;
2482                 }
2483
2484                 *level = *level - 1;
2485                 free_extent_buffer(path->nodes[*level]);
2486                 path->nodes[*level] = next;
2487                 path->slots[*level] = 0;
2488         }
2489 out:
2490         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2491         return err;
2492 }
2493
2494 static int fs_root_objectid(u64 objectid);
2495
2496 /*
2497  * Update global fs information.
2498  */
2499 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2500                          int level)
2501 {
2502         u32 free_nrs;
2503         struct extent_buffer *eb = path->nodes[level];
2504
2505         total_btree_bytes += eb->len;
2506         if (fs_root_objectid(root->objectid))
2507                 total_fs_tree_bytes += eb->len;
2508         if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2509                 total_extent_tree_bytes += eb->len;
2510
2511         if (level == 0) {
2512                 btree_space_waste += btrfs_leaf_free_space(root, eb);
2513         } else {
2514                 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root) -
2515                             btrfs_header_nritems(eb));
2516                 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2517         }
2518 }
2519
2520 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2521                             unsigned int ext_ref);
2522 static int check_tree_block_ref(struct btrfs_root *root,
2523                                 struct extent_buffer *eb, u64 bytenr,
2524                                 int level, u64 owner, struct node_refs *nrefs);
2525 static int check_leaf_items(struct btrfs_trans_handle *trans,
2526                             struct btrfs_root *root, struct btrfs_path *path,
2527                             struct node_refs *nrefs, int account_bytes);
2528
2529 /*
2530  * @trans      just for lowmem repair mode
2531  * @check all  if not 0 then check all tree block backrefs and items
2532  *             0 then just check relationship of items in fs tree(s)
2533  *
2534  * Returns >0  Found error, should continue
2535  * Returns <0  Fatal error, must exit the whole check
2536  * Returns 0   No errors found
2537  */
2538 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2539                              struct btrfs_root *root, struct btrfs_path *path,
2540                              int *level, struct node_refs *nrefs, int ext_ref,
2541                              int check_all)
2542
2543 {
2544         enum btrfs_tree_block_status status;
2545         u64 bytenr;
2546         u64 ptr_gen;
2547         struct btrfs_fs_info *fs_info = root->fs_info;
2548         struct extent_buffer *next;
2549         struct extent_buffer *cur;
2550         int ret;
2551         int err = 0;
2552         int check;
2553         int account_file_data = 0;
2554
2555         WARN_ON(*level < 0);
2556         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2557
2558         ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2559                                 path->nodes[*level], nrefs, *level, check_all);
2560         if (ret < 0)
2561                 return ret;
2562
2563         while (*level >= 0) {
2564                 WARN_ON(*level < 0);
2565                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2566                 cur = path->nodes[*level];
2567                 bytenr = btrfs_header_bytenr(cur);
2568                 check = nrefs->need_check[*level];
2569
2570                 if (btrfs_header_level(cur) != *level)
2571                         WARN_ON(1);
2572                /*
2573                 * Update bytes accounting and check tree block ref
2574                 * NOTE: Doing accounting and check before checking nritems
2575                 * is necessary because of empty node/leaf.
2576                 */
2577                 if ((check_all && !nrefs->checked[*level]) ||
2578                     (!check_all && nrefs->need_check[*level])) {
2579                         ret = check_tree_block_ref(root, cur,
2580                            btrfs_header_bytenr(cur), btrfs_header_level(cur),
2581                            btrfs_header_owner(cur), nrefs);
2582                         err |= ret;
2583
2584                         if (check_all && nrefs->need_check[*level] &&
2585                                 nrefs->refs[*level]) {
2586                                 account_bytes(root, path, *level);
2587                                 account_file_data = 1;
2588                         }
2589                         nrefs->checked[*level] = 1;
2590                 }
2591
2592                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2593                         break;
2594
2595                 /* Don't forgot to check leaf/node validation */
2596                 if (*level == 0) {
2597                         /* skip duplicate check */
2598                         if (check || !check_all) {
2599                                 ret = btrfs_check_leaf(root, NULL, cur);
2600                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2601                                         err |= -EIO;
2602                                         break;
2603                                 }
2604                         }
2605
2606                         ret = 0;
2607                         if (!check_all)
2608                                 ret = process_one_leaf_v2(root, path, nrefs,
2609                                                           level, ext_ref);
2610                         else
2611                                 ret = check_leaf_items(trans, root, path,
2612                                                nrefs, account_file_data);
2613                         err |= ret;
2614                         break;
2615                 } else {
2616                         if (check || !check_all) {
2617                                 ret = btrfs_check_node(root, NULL, cur);
2618                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2619                                         err |= -EIO;
2620                                         break;
2621                                 }
2622                         }
2623                 }
2624
2625                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2626                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2627
2628                 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2629                                         check_all);
2630                 if (ret < 0)
2631                         break;
2632                 /*
2633                  * check all trees in check_chunks_and_extent_v2
2634                  * check shared node once in check_fs_roots
2635                  */
2636                 if (!check_all && !nrefs->need_check[*level - 1]) {
2637                         path->slots[*level]++;
2638                         continue;
2639                 }
2640
2641                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2642                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2643                         free_extent_buffer(next);
2644                         reada_walk_down(root, cur, path->slots[*level]);
2645                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2646                         if (!extent_buffer_uptodate(next)) {
2647                                 struct btrfs_key node_key;
2648
2649                                 btrfs_node_key_to_cpu(path->nodes[*level],
2650                                                       &node_key,
2651                                                       path->slots[*level]);
2652                                 btrfs_add_corrupt_extent_record(fs_info,
2653                                         &node_key, path->nodes[*level]->start,
2654                                         fs_info->nodesize, *level);
2655                                 err |= -EIO;
2656                                 break;
2657                         }
2658                 }
2659
2660                 ret = check_child_node(cur, path->slots[*level], next);
2661                 err |= ret;
2662                 if (ret < 0) 
2663                         break;
2664
2665                 if (btrfs_is_leaf(next))
2666                         status = btrfs_check_leaf(root, NULL, next);
2667                 else
2668                         status = btrfs_check_node(root, NULL, next);
2669                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2670                         free_extent_buffer(next);
2671                         err |= -EIO;
2672                         break;
2673                 }
2674
2675                 *level = *level - 1;
2676                 free_extent_buffer(path->nodes[*level]);
2677                 path->nodes[*level] = next;
2678                 path->slots[*level] = 0;
2679                 account_file_data = 0;
2680
2681                 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2682         }
2683         return err;
2684 }
2685
2686 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2687                         struct walk_control *wc, int *level)
2688 {
2689         int i;
2690         struct extent_buffer *leaf;
2691
2692         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2693                 leaf = path->nodes[i];
2694                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2695                         path->slots[i]++;
2696                         *level = i;
2697                         return 0;
2698                 } else {
2699                         free_extent_buffer(path->nodes[*level]);
2700                         path->nodes[*level] = NULL;
2701                         BUG_ON(*level > wc->active_node);
2702                         if (*level == wc->active_node)
2703                                 leave_shared_node(root, wc, *level);
2704                         *level = i + 1;
2705                 }
2706         }
2707         return 1;
2708 }
2709
2710 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2711                            int *level)
2712 {
2713         int i;
2714         struct extent_buffer *leaf;
2715
2716         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2717                 leaf = path->nodes[i];
2718                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2719                         path->slots[i]++;
2720                         *level = i;
2721                         return 0;
2722                 } else {
2723                         free_extent_buffer(path->nodes[*level]);
2724                         path->nodes[*level] = NULL;
2725                         *level = i + 1;
2726                 }
2727         }
2728         return 1;
2729 }
2730
2731 static int check_root_dir(struct inode_record *rec)
2732 {
2733         struct inode_backref *backref;
2734         int ret = -1;
2735
2736         if (!rec->found_inode_item || rec->errors)
2737                 goto out;
2738         if (rec->nlink != 1 || rec->found_link != 0)
2739                 goto out;
2740         if (list_empty(&rec->backrefs))
2741                 goto out;
2742         backref = to_inode_backref(rec->backrefs.next);
2743         if (!backref->found_inode_ref)
2744                 goto out;
2745         if (backref->index != 0 || backref->namelen != 2 ||
2746             memcmp(backref->name, "..", 2))
2747                 goto out;
2748         if (backref->found_dir_index || backref->found_dir_item)
2749                 goto out;
2750         ret = 0;
2751 out:
2752         return ret;
2753 }
2754
2755 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2756                               struct btrfs_root *root, struct btrfs_path *path,
2757                               struct inode_record *rec)
2758 {
2759         struct btrfs_inode_item *ei;
2760         struct btrfs_key key;
2761         int ret;
2762
2763         key.objectid = rec->ino;
2764         key.type = BTRFS_INODE_ITEM_KEY;
2765         key.offset = (u64)-1;
2766
2767         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2768         if (ret < 0)
2769                 goto out;
2770         if (ret) {
2771                 if (!path->slots[0]) {
2772                         ret = -ENOENT;
2773                         goto out;
2774                 }
2775                 path->slots[0]--;
2776                 ret = 0;
2777         }
2778         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2779         if (key.objectid != rec->ino) {
2780                 ret = -ENOENT;
2781                 goto out;
2782         }
2783
2784         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2785                             struct btrfs_inode_item);
2786         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2787         btrfs_mark_buffer_dirty(path->nodes[0]);
2788         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2789         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2790                root->root_key.objectid);
2791 out:
2792         btrfs_release_path(path);
2793         return ret;
2794 }
2795
2796 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2797                                     struct btrfs_root *root,
2798                                     struct btrfs_path *path,
2799                                     struct inode_record *rec)
2800 {
2801         int ret;
2802
2803         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2804         btrfs_release_path(path);
2805         if (!ret)
2806                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2807         return ret;
2808 }
2809
2810 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2811                                struct btrfs_root *root,
2812                                struct btrfs_path *path,
2813                                struct inode_record *rec)
2814 {
2815         struct btrfs_inode_item *ei;
2816         struct btrfs_key key;
2817         int ret = 0;
2818
2819         key.objectid = rec->ino;
2820         key.type = BTRFS_INODE_ITEM_KEY;
2821         key.offset = 0;
2822
2823         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2824         if (ret) {
2825                 if (ret > 0)
2826                         ret = -ENOENT;
2827                 goto out;
2828         }
2829
2830         /* Since ret == 0, no need to check anything */
2831         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2832                             struct btrfs_inode_item);
2833         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2834         btrfs_mark_buffer_dirty(path->nodes[0]);
2835         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2836         printf("reset nbytes for ino %llu root %llu\n",
2837                rec->ino, root->root_key.objectid);
2838 out:
2839         btrfs_release_path(path);
2840         return ret;
2841 }
2842
2843 static int add_missing_dir_index(struct btrfs_root *root,
2844                                  struct cache_tree *inode_cache,
2845                                  struct inode_record *rec,
2846                                  struct inode_backref *backref)
2847 {
2848         struct btrfs_path path;
2849         struct btrfs_trans_handle *trans;
2850         struct btrfs_dir_item *dir_item;
2851         struct extent_buffer *leaf;
2852         struct btrfs_key key;
2853         struct btrfs_disk_key disk_key;
2854         struct inode_record *dir_rec;
2855         unsigned long name_ptr;
2856         u32 data_size = sizeof(*dir_item) + backref->namelen;
2857         int ret;
2858
2859         trans = btrfs_start_transaction(root, 1);
2860         if (IS_ERR(trans))
2861                 return PTR_ERR(trans);
2862
2863         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2864                 (unsigned long long)rec->ino);
2865
2866         btrfs_init_path(&path);
2867         key.objectid = backref->dir;
2868         key.type = BTRFS_DIR_INDEX_KEY;
2869         key.offset = backref->index;
2870         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2871         BUG_ON(ret);
2872
2873         leaf = path.nodes[0];
2874         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2875
2876         disk_key.objectid = cpu_to_le64(rec->ino);
2877         disk_key.type = BTRFS_INODE_ITEM_KEY;
2878         disk_key.offset = 0;
2879
2880         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2881         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2882         btrfs_set_dir_data_len(leaf, dir_item, 0);
2883         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2884         name_ptr = (unsigned long)(dir_item + 1);
2885         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2886         btrfs_mark_buffer_dirty(leaf);
2887         btrfs_release_path(&path);
2888         btrfs_commit_transaction(trans, root);
2889
2890         backref->found_dir_index = 1;
2891         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2892         BUG_ON(IS_ERR(dir_rec));
2893         if (!dir_rec)
2894                 return 0;
2895         dir_rec->found_size += backref->namelen;
2896         if (dir_rec->found_size == dir_rec->isize &&
2897             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2898                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2899         if (dir_rec->found_size != dir_rec->isize)
2900                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2901
2902         return 0;
2903 }
2904
2905 static int delete_dir_index(struct btrfs_root *root,
2906                             struct inode_backref *backref)
2907 {
2908         struct btrfs_trans_handle *trans;
2909         struct btrfs_dir_item *di;
2910         struct btrfs_path path;
2911         int ret = 0;
2912
2913         trans = btrfs_start_transaction(root, 1);
2914         if (IS_ERR(trans))
2915                 return PTR_ERR(trans);
2916
2917         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2918                 (unsigned long long)backref->dir,
2919                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2920                 (unsigned long long)root->objectid);
2921
2922         btrfs_init_path(&path);
2923         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2924                                     backref->name, backref->namelen,
2925                                     backref->index, -1);
2926         if (IS_ERR(di)) {
2927                 ret = PTR_ERR(di);
2928                 btrfs_release_path(&path);
2929                 btrfs_commit_transaction(trans, root);
2930                 if (ret == -ENOENT)
2931                         return 0;
2932                 return ret;
2933         }
2934
2935         if (!di)
2936                 ret = btrfs_del_item(trans, root, &path);
2937         else
2938                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2939         BUG_ON(ret);
2940         btrfs_release_path(&path);
2941         btrfs_commit_transaction(trans, root);
2942         return ret;
2943 }
2944
2945 static int __create_inode_item(struct btrfs_trans_handle *trans,
2946                                struct btrfs_root *root, u64 ino, u64 size,
2947                                u64 nbytes, u64 nlink, u32 mode)
2948 {
2949         struct btrfs_inode_item ii;
2950         time_t now = time(NULL);
2951         int ret;
2952
2953         btrfs_set_stack_inode_size(&ii, size);
2954         btrfs_set_stack_inode_nbytes(&ii, nbytes);
2955         btrfs_set_stack_inode_nlink(&ii, nlink);
2956         btrfs_set_stack_inode_mode(&ii, mode);
2957         btrfs_set_stack_inode_generation(&ii, trans->transid);
2958         btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2959         btrfs_set_stack_timespec_sec(&ii.ctime, now);
2960         btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2961         btrfs_set_stack_timespec_sec(&ii.mtime, now);
2962         btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2963         btrfs_set_stack_timespec_sec(&ii.otime, 0);
2964         btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2965
2966         ret = btrfs_insert_inode(trans, root, ino, &ii);
2967         ASSERT(!ret);
2968
2969         warning("root %llu inode %llu recreating inode item, this may "
2970                 "be incomplete, please check permissions and content after "
2971                 "the fsck completes.\n", (unsigned long long)root->objectid,
2972                 (unsigned long long)ino);
2973
2974         return 0;
2975 }
2976
2977 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2978                                     struct btrfs_root *root, u64 ino,
2979                                     u8 filetype)
2980 {
2981         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2982
2983         return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2984 }
2985
2986 static int create_inode_item(struct btrfs_root *root,
2987                              struct inode_record *rec, int root_dir)
2988 {
2989         struct btrfs_trans_handle *trans;
2990         u64 nlink = 0;
2991         u32 mode = 0;
2992         u64 size = 0;
2993         int ret;
2994
2995         trans = btrfs_start_transaction(root, 1);
2996         if (IS_ERR(trans)) {
2997                 ret = PTR_ERR(trans);
2998                 return ret;
2999         }
3000
3001         nlink = root_dir ? 1 : rec->found_link;
3002         if (rec->found_dir_item) {
3003                 if (rec->found_file_extent)
3004                         fprintf(stderr, "root %llu inode %llu has both a dir "
3005                                 "item and extents, unsure if it is a dir or a "
3006                                 "regular file so setting it as a directory\n",
3007                                 (unsigned long long)root->objectid,
3008                                 (unsigned long long)rec->ino);
3009                 mode = S_IFDIR | 0755;
3010                 size = rec->found_size;
3011         } else if (!rec->found_dir_item) {
3012                 size = rec->extent_end;
3013                 mode =  S_IFREG | 0755;
3014         }
3015
3016         ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
3017                                   nlink, mode);
3018         btrfs_commit_transaction(trans, root);
3019         return 0;
3020 }
3021
3022 static int repair_inode_backrefs(struct btrfs_root *root,
3023                                  struct inode_record *rec,
3024                                  struct cache_tree *inode_cache,
3025                                  int delete)
3026 {
3027         struct inode_backref *tmp, *backref;
3028         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3029         int ret = 0;
3030         int repaired = 0;
3031
3032         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3033                 if (!delete && rec->ino == root_dirid) {
3034                         if (!rec->found_inode_item) {
3035                                 ret = create_inode_item(root, rec, 1);
3036                                 if (ret)
3037                                         break;
3038                                 repaired++;
3039                         }
3040                 }
3041
3042                 /* Index 0 for root dir's are special, don't mess with it */
3043                 if (rec->ino == root_dirid && backref->index == 0)
3044                         continue;
3045
3046                 if (delete &&
3047                     ((backref->found_dir_index && !backref->found_inode_ref) ||
3048                      (backref->found_dir_index && backref->found_inode_ref &&
3049                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
3050                         ret = delete_dir_index(root, backref);
3051                         if (ret)
3052                                 break;
3053                         repaired++;
3054                         list_del(&backref->list);
3055                         free(backref);
3056                         continue;
3057                 }
3058
3059                 if (!delete && !backref->found_dir_index &&
3060                     backref->found_dir_item && backref->found_inode_ref) {
3061                         ret = add_missing_dir_index(root, inode_cache, rec,
3062                                                     backref);
3063                         if (ret)
3064                                 break;
3065                         repaired++;
3066                         if (backref->found_dir_item &&
3067                             backref->found_dir_index) {
3068                                 if (!backref->errors &&
3069                                     backref->found_inode_ref) {
3070                                         list_del(&backref->list);
3071                                         free(backref);
3072                                         continue;
3073                                 }
3074                         }
3075                 }
3076
3077                 if (!delete && (!backref->found_dir_index &&
3078                                 !backref->found_dir_item &&
3079                                 backref->found_inode_ref)) {
3080                         struct btrfs_trans_handle *trans;
3081                         struct btrfs_key location;
3082
3083                         ret = check_dir_conflict(root, backref->name,
3084                                                  backref->namelen,
3085                                                  backref->dir,
3086                                                  backref->index);
3087                         if (ret) {
3088                                 /*
3089                                  * let nlink fixing routine to handle it,
3090                                  * which can do it better.
3091                                  */
3092                                 ret = 0;
3093                                 break;
3094                         }
3095                         location.objectid = rec->ino;
3096                         location.type = BTRFS_INODE_ITEM_KEY;
3097                         location.offset = 0;
3098
3099                         trans = btrfs_start_transaction(root, 1);
3100                         if (IS_ERR(trans)) {
3101                                 ret = PTR_ERR(trans);
3102                                 break;
3103                         }
3104                         fprintf(stderr, "adding missing dir index/item pair "
3105                                 "for inode %llu\n",
3106                                 (unsigned long long)rec->ino);
3107                         ret = btrfs_insert_dir_item(trans, root, backref->name,
3108                                                     backref->namelen,
3109                                                     backref->dir, &location,
3110                                                     imode_to_type(rec->imode),
3111                                                     backref->index);
3112                         BUG_ON(ret);
3113                         btrfs_commit_transaction(trans, root);
3114                         repaired++;
3115                 }
3116
3117                 if (!delete && (backref->found_inode_ref &&
3118                                 backref->found_dir_index &&
3119                                 backref->found_dir_item &&
3120                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
3121                                 !rec->found_inode_item)) {
3122                         ret = create_inode_item(root, rec, 0);
3123                         if (ret)
3124                                 break;
3125                         repaired++;
3126                 }
3127
3128         }
3129         return ret ? ret : repaired;
3130 }
3131
3132 /*
3133  * To determine the file type for nlink/inode_item repair
3134  *
3135  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
3136  * Return -ENOENT if file type is not found.
3137  */
3138 static int find_file_type(struct inode_record *rec, u8 *type)
3139 {
3140         struct inode_backref *backref;
3141
3142         /* For inode item recovered case */
3143         if (rec->found_inode_item) {
3144                 *type = imode_to_type(rec->imode);
3145                 return 0;
3146         }
3147
3148         list_for_each_entry(backref, &rec->backrefs, list) {
3149                 if (backref->found_dir_index || backref->found_dir_item) {
3150                         *type = backref->filetype;
3151                         return 0;
3152                 }
3153         }
3154         return -ENOENT;
3155 }
3156
3157 /*
3158  * To determine the file name for nlink repair
3159  *
3160  * Return 0 if file name is found, set name and namelen.
3161  * Return -ENOENT if file name is not found.
3162  */
3163 static int find_file_name(struct inode_record *rec,
3164                           char *name, int *namelen)
3165 {
3166         struct inode_backref *backref;
3167
3168         list_for_each_entry(backref, &rec->backrefs, list) {
3169                 if (backref->found_dir_index || backref->found_dir_item ||
3170                     backref->found_inode_ref) {
3171                         memcpy(name, backref->name, backref->namelen);
3172                         *namelen = backref->namelen;
3173                         return 0;
3174                 }
3175         }
3176         return -ENOENT;
3177 }
3178
3179 /* Reset the nlink of the inode to the correct one */
3180 static int reset_nlink(struct btrfs_trans_handle *trans,
3181                        struct btrfs_root *root,
3182                        struct btrfs_path *path,
3183                        struct inode_record *rec)
3184 {
3185         struct inode_backref *backref;
3186         struct inode_backref *tmp;
3187         struct btrfs_key key;
3188         struct btrfs_inode_item *inode_item;
3189         int ret = 0;
3190
3191         /* We don't believe this either, reset it and iterate backref */
3192         rec->found_link = 0;
3193
3194         /* Remove all backref including the valid ones */
3195         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3196                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3197                                    backref->index, backref->name,
3198                                    backref->namelen, 0);
3199                 if (ret < 0)
3200                         goto out;
3201
3202                 /* remove invalid backref, so it won't be added back */
3203                 if (!(backref->found_dir_index &&
3204                       backref->found_dir_item &&
3205                       backref->found_inode_ref)) {
3206                         list_del(&backref->list);
3207                         free(backref);
3208                 } else {
3209                         rec->found_link++;
3210                 }
3211         }
3212
3213         /* Set nlink to 0 */
3214         key.objectid = rec->ino;
3215         key.type = BTRFS_INODE_ITEM_KEY;
3216         key.offset = 0;
3217         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3218         if (ret < 0)
3219                 goto out;
3220         if (ret > 0) {
3221                 ret = -ENOENT;
3222                 goto out;
3223         }
3224         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3225                                     struct btrfs_inode_item);
3226         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3227         btrfs_mark_buffer_dirty(path->nodes[0]);
3228         btrfs_release_path(path);
3229
3230         /*
3231          * Add back valid inode_ref/dir_item/dir_index,
3232          * add_link() will handle the nlink inc, so new nlink must be correct
3233          */
3234         list_for_each_entry(backref, &rec->backrefs, list) {
3235                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3236                                      backref->name, backref->namelen,
3237                                      backref->filetype, &backref->index, 1, 0);
3238                 if (ret < 0)
3239                         goto out;
3240         }
3241 out:
3242         btrfs_release_path(path);
3243         return ret;
3244 }
3245
3246 static int get_highest_inode(struct btrfs_trans_handle *trans,
3247                                 struct btrfs_root *root,
3248                                 struct btrfs_path *path,
3249                                 u64 *highest_ino)
3250 {
3251         struct btrfs_key key, found_key;
3252         int ret;
3253
3254         btrfs_init_path(path);
3255         key.objectid = BTRFS_LAST_FREE_OBJECTID;
3256         key.offset = -1;
3257         key.type = BTRFS_INODE_ITEM_KEY;
3258         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3259         if (ret == 1) {
3260                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3261                                 path->slots[0] - 1);
3262                 *highest_ino = found_key.objectid;
3263                 ret = 0;
3264         }
3265         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3266                 ret = -EOVERFLOW;
3267         btrfs_release_path(path);
3268         return ret;
3269 }
3270
3271 /*
3272  * Link inode to dir 'lost+found'. Increase @ref_count.
3273  *
3274  * Returns 0 means success.
3275  * Returns <0 means failure.
3276  */
3277 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3278                                    struct btrfs_root *root,
3279                                    struct btrfs_path *path,
3280                                    u64 ino, char *namebuf, u32 name_len,
3281                                    u8 filetype, u64 *ref_count)
3282 {
3283         char *dir_name = "lost+found";
3284         u64 lost_found_ino;
3285         int ret;
3286         u32 mode = 0700;
3287
3288         btrfs_release_path(path);
3289         ret = get_highest_inode(trans, root, path, &lost_found_ino);
3290         if (ret < 0)
3291                 goto out;
3292         lost_found_ino++;
3293
3294         ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3295                           BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3296                           mode);
3297         if (ret < 0) {
3298                 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3299                 goto out;
3300         }
3301         ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3302                              namebuf, name_len, filetype, NULL, 1, 0);
3303         /*
3304          * Add ".INO" suffix several times to handle case where
3305          * "FILENAME.INO" is already taken by another file.
3306          */
3307         while (ret == -EEXIST) {
3308                 /*
3309                  * Conflicting file name, add ".INO" as suffix * +1 for '.'
3310                  */
3311                 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3312                         ret = -EFBIG;
3313                         goto out;
3314                 }
3315                 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3316                          ".%llu", ino);
3317                 name_len += count_digits(ino) + 1;
3318                 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3319                                      name_len, filetype, NULL, 1, 0);
3320         }
3321         if (ret < 0) {
3322                 error("failed to link the inode %llu to %s dir: %s",
3323                       ino, dir_name, strerror(-ret));
3324                 goto out;
3325         }
3326
3327         ++*ref_count;
3328         printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3329                name_len, namebuf, dir_name);
3330 out:
3331         btrfs_release_path(path);
3332         if (ret)
3333                 error("failed to move file '%.*s' to '%s' dir", name_len,
3334                                 namebuf, dir_name);
3335         return ret;
3336 }
3337
3338 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3339                                struct btrfs_root *root,
3340                                struct btrfs_path *path,
3341                                struct inode_record *rec)
3342 {
3343         char namebuf[BTRFS_NAME_LEN] = {0};
3344         u8 type = 0;
3345         int namelen = 0;
3346         int name_recovered = 0;
3347         int type_recovered = 0;
3348         int ret = 0;
3349
3350         /*
3351          * Get file name and type first before these invalid inode ref
3352          * are deleted by remove_all_invalid_backref()
3353          */
3354         name_recovered = !find_file_name(rec, namebuf, &namelen);
3355         type_recovered = !find_file_type(rec, &type);
3356
3357         if (!name_recovered) {
3358                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3359                        rec->ino, rec->ino);
3360                 namelen = count_digits(rec->ino);
3361                 sprintf(namebuf, "%llu", rec->ino);
3362                 name_recovered = 1;
3363         }
3364         if (!type_recovered) {
3365                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3366                        rec->ino);
3367                 type = BTRFS_FT_REG_FILE;
3368                 type_recovered = 1;
3369         }
3370
3371         ret = reset_nlink(trans, root, path, rec);
3372         if (ret < 0) {
3373                 fprintf(stderr,
3374                         "Failed to reset nlink for inode %llu: %s\n",
3375                         rec->ino, strerror(-ret));
3376                 goto out;
3377         }
3378
3379         if (rec->found_link == 0) {
3380                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3381                                               namebuf, namelen, type,
3382                                               (u64 *)&rec->found_link);
3383                 if (ret)
3384                         goto out;
3385         }
3386         printf("Fixed the nlink of inode %llu\n", rec->ino);
3387 out:
3388         /*
3389          * Clear the flag anyway, or we will loop forever for the same inode
3390          * as it will not be removed from the bad inode list and the dead loop
3391          * happens.
3392          */
3393         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3394         btrfs_release_path(path);
3395         return ret;
3396 }
3397
3398 /*
3399  * Check if there is any normal(reg or prealloc) file extent for given
3400  * ino.
3401  * This is used to determine the file type when neither its dir_index/item or
3402  * inode_item exists.
3403  *
3404  * This will *NOT* report error, if any error happens, just consider it does
3405  * not have any normal file extent.
3406  */
3407 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3408 {
3409         struct btrfs_path path;
3410         struct btrfs_key key;
3411         struct btrfs_key found_key;
3412         struct btrfs_file_extent_item *fi;
3413         u8 type;
3414         int ret = 0;
3415
3416         btrfs_init_path(&path);
3417         key.objectid = ino;
3418         key.type = BTRFS_EXTENT_DATA_KEY;
3419         key.offset = 0;
3420
3421         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3422         if (ret < 0) {
3423                 ret = 0;
3424                 goto out;
3425         }
3426         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3427                 ret = btrfs_next_leaf(root, &path);
3428                 if (ret) {
3429                         ret = 0;
3430                         goto out;
3431                 }
3432         }
3433         while (1) {
3434                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3435                                       path.slots[0]);
3436                 if (found_key.objectid != ino ||
3437                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3438                         break;
3439                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3440                                     struct btrfs_file_extent_item);
3441                 type = btrfs_file_extent_type(path.nodes[0], fi);
3442                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3443                         ret = 1;
3444                         goto out;
3445                 }
3446         }
3447 out:
3448         btrfs_release_path(&path);
3449         return ret;
3450 }
3451
3452 static u32 btrfs_type_to_imode(u8 type)
3453 {
3454         static u32 imode_by_btrfs_type[] = {
3455                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3456                 [BTRFS_FT_DIR]          = S_IFDIR,
3457                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3458                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3459                 [BTRFS_FT_FIFO]         = S_IFIFO,
3460                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3461                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3462         };
3463
3464         return imode_by_btrfs_type[(type)];
3465 }
3466
3467 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3468                                 struct btrfs_root *root,
3469                                 struct btrfs_path *path,
3470                                 struct inode_record *rec)
3471 {
3472         u8 filetype;
3473         u32 mode = 0700;
3474         int type_recovered = 0;
3475         int ret = 0;
3476
3477         printf("Trying to rebuild inode:%llu\n", rec->ino);
3478
3479         type_recovered = !find_file_type(rec, &filetype);
3480
3481         /*
3482          * Try to determine inode type if type not found.
3483          *
3484          * For found regular file extent, it must be FILE.
3485          * For found dir_item/index, it must be DIR.
3486          *
3487          * For undetermined one, use FILE as fallback.
3488          *
3489          * TODO:
3490          * 1. If found backref(inode_index/item is already handled) to it,
3491          *    it must be DIR.
3492          *    Need new inode-inode ref structure to allow search for that.
3493          */
3494         if (!type_recovered) {
3495                 if (rec->found_file_extent &&
3496                     find_normal_file_extent(root, rec->ino)) {
3497                         type_recovered = 1;
3498                         filetype = BTRFS_FT_REG_FILE;
3499                 } else if (rec->found_dir_item) {
3500                         type_recovered = 1;
3501                         filetype = BTRFS_FT_DIR;
3502                 } else if (!list_empty(&rec->orphan_extents)) {
3503                         type_recovered = 1;
3504                         filetype = BTRFS_FT_REG_FILE;
3505                 } else{
3506                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3507                                rec->ino);
3508                         type_recovered = 1;
3509                         filetype = BTRFS_FT_REG_FILE;
3510                 }
3511         }
3512
3513         ret = btrfs_new_inode(trans, root, rec->ino,
3514                               mode | btrfs_type_to_imode(filetype));
3515         if (ret < 0)
3516                 goto out;
3517
3518         /*
3519          * Here inode rebuild is done, we only rebuild the inode item,
3520          * don't repair the nlink(like move to lost+found).
3521          * That is the job of nlink repair.
3522          *
3523          * We just fill the record and return
3524          */
3525         rec->found_dir_item = 1;
3526         rec->imode = mode | btrfs_type_to_imode(filetype);
3527         rec->nlink = 0;
3528         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3529         /* Ensure the inode_nlinks repair function will be called */
3530         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3531 out:
3532         return ret;
3533 }
3534
3535 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3536                                       struct btrfs_root *root,
3537                                       struct btrfs_path *path,
3538                                       struct inode_record *rec)
3539 {
3540         struct orphan_data_extent *orphan;
3541         struct orphan_data_extent *tmp;
3542         int ret = 0;
3543
3544         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3545                 /*
3546                  * Check for conflicting file extents
3547                  *
3548                  * Here we don't know whether the extents is compressed or not,
3549                  * so we can only assume it not compressed nor data offset,
3550                  * and use its disk_len as extent length.
3551                  */
3552                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3553                                        orphan->offset, orphan->disk_len, 0);
3554                 btrfs_release_path(path);
3555                 if (ret < 0)
3556                         goto out;
3557                 if (!ret) {
3558                         fprintf(stderr,
3559                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3560                                 orphan->disk_bytenr, orphan->disk_len);
3561                         ret = btrfs_free_extent(trans,
3562                                         root->fs_info->extent_root,
3563                                         orphan->disk_bytenr, orphan->disk_len,
3564                                         0, root->objectid, orphan->objectid,
3565                                         orphan->offset);
3566                         if (ret < 0)
3567                                 goto out;
3568                 }
3569                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3570                                 orphan->offset, orphan->disk_bytenr,
3571                                 orphan->disk_len, orphan->disk_len);
3572                 if (ret < 0)
3573                         goto out;
3574
3575                 /* Update file size info */
3576                 rec->found_size += orphan->disk_len;
3577                 if (rec->found_size == rec->nbytes)
3578                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3579
3580                 /* Update the file extent hole info too */
3581                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3582                                            orphan->disk_len);
3583                 if (ret < 0)
3584                         goto out;
3585                 if (RB_EMPTY_ROOT(&rec->holes))
3586                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3587
3588                 list_del(&orphan->list);
3589                 free(orphan);
3590         }
3591         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3592 out:
3593         return ret;
3594 }
3595
3596 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3597                                         struct btrfs_root *root,
3598                                         struct btrfs_path *path,
3599                                         struct inode_record *rec)
3600 {
3601         struct rb_node *node;
3602         struct file_extent_hole *hole;
3603         int found = 0;
3604         int ret = 0;
3605
3606         node = rb_first(&rec->holes);
3607
3608         while (node) {
3609                 found = 1;
3610                 hole = rb_entry(node, struct file_extent_hole, node);
3611                 ret = btrfs_punch_hole(trans, root, rec->ino,
3612                                        hole->start, hole->len);
3613                 if (ret < 0)
3614                         goto out;
3615                 ret = del_file_extent_hole(&rec->holes, hole->start,
3616                                            hole->len);
3617                 if (ret < 0)
3618                         goto out;
3619                 if (RB_EMPTY_ROOT(&rec->holes))
3620                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3621                 node = rb_first(&rec->holes);
3622         }
3623         /* special case for a file losing all its file extent */
3624         if (!found) {
3625                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3626                                        round_up(rec->isize,
3627                                                 root->fs_info->sectorsize));
3628                 if (ret < 0)
3629                         goto out;
3630         }
3631         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3632                rec->ino, root->objectid);
3633 out:
3634         return ret;
3635 }
3636
3637 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3638 {
3639         struct btrfs_trans_handle *trans;
3640         struct btrfs_path path;
3641         int ret = 0;
3642
3643         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3644                              I_ERR_NO_ORPHAN_ITEM |
3645                              I_ERR_LINK_COUNT_WRONG |
3646                              I_ERR_NO_INODE_ITEM |
3647                              I_ERR_FILE_EXTENT_ORPHAN |
3648                              I_ERR_FILE_EXTENT_DISCOUNT|
3649                              I_ERR_FILE_NBYTES_WRONG)))
3650                 return rec->errors;
3651
3652         /*
3653          * For nlink repair, it may create a dir and add link, so
3654          * 2 for parent(256)'s dir_index and dir_item
3655          * 2 for lost+found dir's inode_item and inode_ref
3656          * 1 for the new inode_ref of the file
3657          * 2 for lost+found dir's dir_index and dir_item for the file
3658          */
3659         trans = btrfs_start_transaction(root, 7);
3660         if (IS_ERR(trans))
3661                 return PTR_ERR(trans);
3662
3663         btrfs_init_path(&path);
3664         if (rec->errors & I_ERR_NO_INODE_ITEM)
3665                 ret = repair_inode_no_item(trans, root, &path, rec);
3666         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3667                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3668         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3669                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3670         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3671                 ret = repair_inode_isize(trans, root, &path, rec);
3672         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3673                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3674         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3675                 ret = repair_inode_nlinks(trans, root, &path, rec);
3676         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3677                 ret = repair_inode_nbytes(trans, root, &path, rec);
3678         btrfs_commit_transaction(trans, root);
3679         btrfs_release_path(&path);
3680         return ret;
3681 }
3682
3683 static int check_inode_recs(struct btrfs_root *root,
3684                             struct cache_tree *inode_cache)
3685 {
3686         struct cache_extent *cache;
3687         struct ptr_node *node;
3688         struct inode_record *rec;
3689         struct inode_backref *backref;
3690         int stage = 0;
3691         int ret = 0;
3692         int err = 0;
3693         u64 error = 0;
3694         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3695
3696         if (btrfs_root_refs(&root->root_item) == 0) {
3697                 if (!cache_tree_empty(inode_cache))
3698                         fprintf(stderr, "warning line %d\n", __LINE__);
3699                 return 0;
3700         }
3701
3702         /*
3703          * We need to repair backrefs first because we could change some of the
3704          * errors in the inode recs.
3705          *
3706          * We also need to go through and delete invalid backrefs first and then
3707          * add the correct ones second.  We do this because we may get EEXIST
3708          * when adding back the correct index because we hadn't yet deleted the
3709          * invalid index.
3710          *
3711          * For example, if we were missing a dir index then the directories
3712          * isize would be wrong, so if we fixed the isize to what we thought it
3713          * would be and then fixed the backref we'd still have a invalid fs, so
3714          * we need to add back the dir index and then check to see if the isize
3715          * is still wrong.
3716          */
3717         while (stage < 3) {
3718                 stage++;
3719                 if (stage == 3 && !err)
3720                         break;
3721
3722                 cache = search_cache_extent(inode_cache, 0);
3723                 while (repair && cache) {
3724                         node = container_of(cache, struct ptr_node, cache);
3725                         rec = node->data;
3726                         cache = next_cache_extent(cache);
3727
3728                         /* Need to free everything up and rescan */
3729                         if (stage == 3) {
3730                                 remove_cache_extent(inode_cache, &node->cache);
3731                                 free(node);
3732                                 free_inode_rec(rec);
3733                                 continue;
3734                         }
3735
3736                         if (list_empty(&rec->backrefs))
3737                                 continue;
3738
3739                         ret = repair_inode_backrefs(root, rec, inode_cache,
3740                                                     stage == 1);
3741                         if (ret < 0) {
3742                                 err = ret;
3743                                 stage = 2;
3744                                 break;
3745                         } if (ret > 0) {
3746                                 err = -EAGAIN;
3747                         }
3748                 }
3749         }
3750         if (err)
3751                 return err;
3752
3753         rec = get_inode_rec(inode_cache, root_dirid, 0);
3754         BUG_ON(IS_ERR(rec));
3755         if (rec) {
3756                 ret = check_root_dir(rec);
3757                 if (ret) {
3758                         fprintf(stderr, "root %llu root dir %llu error\n",
3759                                 (unsigned long long)root->root_key.objectid,
3760                                 (unsigned long long)root_dirid);
3761                         print_inode_error(root, rec);
3762                         error++;
3763                 }
3764         } else {
3765                 if (repair) {
3766                         struct btrfs_trans_handle *trans;
3767
3768                         trans = btrfs_start_transaction(root, 1);
3769                         if (IS_ERR(trans)) {
3770                                 err = PTR_ERR(trans);
3771                                 return err;
3772                         }
3773
3774                         fprintf(stderr,
3775                                 "root %llu missing its root dir, recreating\n",
3776                                 (unsigned long long)root->objectid);
3777
3778                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3779                         BUG_ON(ret);
3780
3781                         btrfs_commit_transaction(trans, root);
3782                         return -EAGAIN;
3783                 }
3784
3785                 fprintf(stderr, "root %llu root dir %llu not found\n",
3786                         (unsigned long long)root->root_key.objectid,
3787                         (unsigned long long)root_dirid);
3788         }
3789
3790         while (1) {
3791                 cache = search_cache_extent(inode_cache, 0);
3792                 if (!cache)
3793                         break;
3794                 node = container_of(cache, struct ptr_node, cache);
3795                 rec = node->data;
3796                 remove_cache_extent(inode_cache, &node->cache);
3797                 free(node);
3798                 if (rec->ino == root_dirid ||
3799                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3800                         free_inode_rec(rec);
3801                         continue;
3802                 }
3803
3804                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3805                         ret = check_orphan_item(root, rec->ino);
3806                         if (ret == 0)
3807                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3808                         if (can_free_inode_rec(rec)) {
3809                                 free_inode_rec(rec);
3810                                 continue;
3811                         }
3812                 }
3813
3814                 if (!rec->found_inode_item)
3815                         rec->errors |= I_ERR_NO_INODE_ITEM;
3816                 if (rec->found_link != rec->nlink)
3817                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3818                 if (repair) {
3819                         ret = try_repair_inode(root, rec);
3820                         if (ret == 0 && can_free_inode_rec(rec)) {
3821                                 free_inode_rec(rec);
3822                                 continue;
3823                         }
3824                         ret = 0;
3825                 }
3826
3827                 if (!(repair && ret == 0))
3828                         error++;
3829                 print_inode_error(root, rec);
3830                 list_for_each_entry(backref, &rec->backrefs, list) {
3831                         if (!backref->found_dir_item)
3832                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3833                         if (!backref->found_dir_index)
3834                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3835                         if (!backref->found_inode_ref)
3836                                 backref->errors |= REF_ERR_NO_INODE_REF;
3837                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3838                                 " namelen %u name %s filetype %d errors %x",
3839                                 (unsigned long long)backref->dir,
3840                                 (unsigned long long)backref->index,
3841                                 backref->namelen, backref->name,
3842                                 backref->filetype, backref->errors);
3843                         print_ref_error(backref->errors);
3844                 }
3845                 free_inode_rec(rec);
3846         }
3847         return (error > 0) ? -1 : 0;
3848 }
3849
3850 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3851                                         u64 objectid)
3852 {
3853         struct cache_extent *cache;
3854         struct root_record *rec = NULL;
3855         int ret;
3856
3857         cache = lookup_cache_extent(root_cache, objectid, 1);
3858         if (cache) {
3859                 rec = container_of(cache, struct root_record, cache);
3860         } else {
3861                 rec = calloc(1, sizeof(*rec));
3862                 if (!rec)
3863                         return ERR_PTR(-ENOMEM);
3864                 rec->objectid = objectid;
3865                 INIT_LIST_HEAD(&rec->backrefs);
3866                 rec->cache.start = objectid;
3867                 rec->cache.size = 1;
3868
3869                 ret = insert_cache_extent(root_cache, &rec->cache);
3870                 if (ret)
3871                         return ERR_PTR(-EEXIST);
3872         }
3873         return rec;
3874 }
3875
3876 static struct root_backref *get_root_backref(struct root_record *rec,
3877                                              u64 ref_root, u64 dir, u64 index,
3878                                              const char *name, int namelen)
3879 {
3880         struct root_backref *backref;
3881
3882         list_for_each_entry(backref, &rec->backrefs, list) {
3883                 if (backref->ref_root != ref_root || backref->dir != dir ||
3884                     backref->namelen != namelen)
3885                         continue;
3886                 if (memcmp(name, backref->name, namelen))
3887                         continue;
3888                 return backref;
3889         }
3890
3891         backref = calloc(1, sizeof(*backref) + namelen + 1);
3892         if (!backref)
3893                 return NULL;
3894         backref->ref_root = ref_root;
3895         backref->dir = dir;
3896         backref->index = index;
3897         backref->namelen = namelen;
3898         memcpy(backref->name, name, namelen);
3899         backref->name[namelen] = '\0';
3900         list_add_tail(&backref->list, &rec->backrefs);
3901         return backref;
3902 }
3903
3904 static void free_root_record(struct cache_extent *cache)
3905 {
3906         struct root_record *rec;
3907         struct root_backref *backref;
3908
3909         rec = container_of(cache, struct root_record, cache);
3910         while (!list_empty(&rec->backrefs)) {
3911                 backref = to_root_backref(rec->backrefs.next);
3912                 list_del(&backref->list);
3913                 free(backref);
3914         }
3915
3916         free(rec);
3917 }
3918
3919 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3920
3921 static int add_root_backref(struct cache_tree *root_cache,
3922                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3923                             const char *name, int namelen,
3924                             int item_type, int errors)
3925 {
3926         struct root_record *rec;
3927         struct root_backref *backref;
3928
3929         rec = get_root_rec(root_cache, root_id);
3930         BUG_ON(IS_ERR(rec));
3931         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3932         BUG_ON(!backref);
3933
3934         backref->errors |= errors;
3935
3936         if (item_type != BTRFS_DIR_ITEM_KEY) {
3937                 if (backref->found_dir_index || backref->found_back_ref ||
3938                     backref->found_forward_ref) {
3939                         if (backref->index != index)
3940                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3941                 } else {
3942                         backref->index = index;
3943                 }
3944         }
3945
3946         if (item_type == BTRFS_DIR_ITEM_KEY) {
3947                 if (backref->found_forward_ref)
3948                         rec->found_ref++;
3949                 backref->found_dir_item = 1;
3950         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3951                 backref->found_dir_index = 1;
3952         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3953                 if (backref->found_forward_ref)
3954                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3955                 else if (backref->found_dir_item)
3956                         rec->found_ref++;
3957                 backref->found_forward_ref = 1;
3958         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3959                 if (backref->found_back_ref)
3960                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3961                 backref->found_back_ref = 1;
3962         } else {
3963                 BUG_ON(1);
3964         }
3965
3966         if (backref->found_forward_ref && backref->found_dir_item)
3967                 backref->reachable = 1;
3968         return 0;
3969 }
3970
3971 static int merge_root_recs(struct btrfs_root *root,
3972                            struct cache_tree *src_cache,
3973                            struct cache_tree *dst_cache)
3974 {
3975         struct cache_extent *cache;
3976         struct ptr_node *node;
3977         struct inode_record *rec;
3978         struct inode_backref *backref;
3979         int ret = 0;
3980
3981         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3982                 free_inode_recs_tree(src_cache);
3983                 return 0;
3984         }
3985
3986         while (1) {
3987                 cache = search_cache_extent(src_cache, 0);
3988                 if (!cache)
3989                         break;
3990                 node = container_of(cache, struct ptr_node, cache);
3991                 rec = node->data;
3992                 remove_cache_extent(src_cache, &node->cache);
3993                 free(node);
3994
3995                 ret = is_child_root(root, root->objectid, rec->ino);
3996                 if (ret < 0)
3997                         break;
3998                 else if (ret == 0)
3999                         goto skip;
4000
4001                 list_for_each_entry(backref, &rec->backrefs, list) {
4002                         BUG_ON(backref->found_inode_ref);
4003                         if (backref->found_dir_item)
4004                                 add_root_backref(dst_cache, rec->ino,
4005                                         root->root_key.objectid, backref->dir,
4006                                         backref->index, backref->name,
4007                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
4008                                         backref->errors);
4009                         if (backref->found_dir_index)
4010                                 add_root_backref(dst_cache, rec->ino,
4011                                         root->root_key.objectid, backref->dir,
4012                                         backref->index, backref->name,
4013                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
4014                                         backref->errors);
4015                 }
4016 skip:
4017                 free_inode_rec(rec);
4018         }
4019         if (ret < 0)
4020                 return ret;
4021         return 0;
4022 }
4023
4024 static int check_root_refs(struct btrfs_root *root,
4025                            struct cache_tree *root_cache)
4026 {
4027         struct root_record *rec;
4028         struct root_record *ref_root;
4029         struct root_backref *backref;
4030         struct cache_extent *cache;
4031         int loop = 1;
4032         int ret;
4033         int error;
4034         int errors = 0;
4035
4036         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
4037         BUG_ON(IS_ERR(rec));
4038         rec->found_ref = 1;
4039
4040         /* fixme: this can not detect circular references */
4041         while (loop) {
4042                 loop = 0;
4043                 cache = search_cache_extent(root_cache, 0);
4044                 while (1) {
4045                         if (!cache)
4046                                 break;
4047                         rec = container_of(cache, struct root_record, cache);
4048                         cache = next_cache_extent(cache);
4049
4050                         if (rec->found_ref == 0)
4051                                 continue;
4052
4053                         list_for_each_entry(backref, &rec->backrefs, list) {
4054                                 if (!backref->reachable)
4055                                         continue;
4056
4057                                 ref_root = get_root_rec(root_cache,
4058                                                         backref->ref_root);
4059                                 BUG_ON(IS_ERR(ref_root));
4060                                 if (ref_root->found_ref > 0)
4061                                         continue;
4062
4063                                 backref->reachable = 0;
4064                                 rec->found_ref--;
4065                                 if (rec->found_ref == 0)
4066                                         loop = 1;
4067                         }
4068                 }
4069         }
4070
4071         cache = search_cache_extent(root_cache, 0);
4072         while (1) {
4073                 if (!cache)
4074                         break;
4075                 rec = container_of(cache, struct root_record, cache);
4076                 cache = next_cache_extent(cache);
4077
4078                 if (rec->found_ref == 0 &&
4079                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
4080                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
4081                         ret = check_orphan_item(root->fs_info->tree_root,
4082                                                 rec->objectid);
4083                         if (ret == 0)
4084                                 continue;
4085
4086                         /*
4087                          * If we don't have a root item then we likely just have
4088                          * a dir item in a snapshot for this root but no actual
4089                          * ref key or anything so it's meaningless.
4090                          */
4091                         if (!rec->found_root_item)
4092                                 continue;
4093                         errors++;
4094                         fprintf(stderr, "fs tree %llu not referenced\n",
4095                                 (unsigned long long)rec->objectid);
4096                 }
4097
4098                 error = 0;
4099                 if (rec->found_ref > 0 && !rec->found_root_item)
4100                         error = 1;
4101                 list_for_each_entry(backref, &rec->backrefs, list) {
4102                         if (!backref->found_dir_item)
4103                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
4104                         if (!backref->found_dir_index)
4105                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
4106                         if (!backref->found_back_ref)
4107                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
4108                         if (!backref->found_forward_ref)
4109                                 backref->errors |= REF_ERR_NO_ROOT_REF;
4110                         if (backref->reachable && backref->errors)
4111                                 error = 1;
4112                 }
4113                 if (!error)
4114                         continue;
4115
4116                 errors++;
4117                 fprintf(stderr, "fs tree %llu refs %u %s\n",
4118                         (unsigned long long)rec->objectid, rec->found_ref,
4119                          rec->found_root_item ? "" : "not found");
4120
4121                 list_for_each_entry(backref, &rec->backrefs, list) {
4122                         if (!backref->reachable)
4123                                 continue;
4124                         if (!backref->errors && rec->found_root_item)
4125                                 continue;
4126                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
4127                                 " index %llu namelen %u name %s errors %x\n",
4128                                 (unsigned long long)backref->ref_root,
4129                                 (unsigned long long)backref->dir,
4130                                 (unsigned long long)backref->index,
4131                                 backref->namelen, backref->name,
4132                                 backref->errors);
4133                         print_ref_error(backref->errors);
4134                 }
4135         }
4136         return errors > 0 ? 1 : 0;
4137 }
4138
4139 static int process_root_ref(struct extent_buffer *eb, int slot,
4140                             struct btrfs_key *key,
4141                             struct cache_tree *root_cache)
4142 {
4143         u64 dirid;
4144         u64 index;
4145         u32 len;
4146         u32 name_len;
4147         struct btrfs_root_ref *ref;
4148         char namebuf[BTRFS_NAME_LEN];
4149         int error;
4150
4151         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
4152
4153         dirid = btrfs_root_ref_dirid(eb, ref);
4154         index = btrfs_root_ref_sequence(eb, ref);
4155         name_len = btrfs_root_ref_name_len(eb, ref);
4156
4157         if (name_len <= BTRFS_NAME_LEN) {
4158                 len = name_len;
4159                 error = 0;
4160         } else {
4161                 len = BTRFS_NAME_LEN;
4162                 error = REF_ERR_NAME_TOO_LONG;
4163         }
4164         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
4165
4166         if (key->type == BTRFS_ROOT_REF_KEY) {
4167                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
4168                                  index, namebuf, len, key->type, error);
4169         } else {
4170                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
4171                                  index, namebuf, len, key->type, error);
4172         }
4173         return 0;
4174 }
4175
4176 static void free_corrupt_block(struct cache_extent *cache)
4177 {
4178         struct btrfs_corrupt_block *corrupt;
4179
4180         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4181         free(corrupt);
4182 }
4183
4184 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4185
4186 /*
4187  * Repair the btree of the given root.
4188  *
4189  * The fix is to remove the node key in corrupt_blocks cache_tree.
4190  * and rebalance the tree.
4191  * After the fix, the btree should be writeable.
4192  */
4193 static int repair_btree(struct btrfs_root *root,
4194                         struct cache_tree *corrupt_blocks)
4195 {
4196         struct btrfs_trans_handle *trans;
4197         struct btrfs_path path;
4198         struct btrfs_corrupt_block *corrupt;
4199         struct cache_extent *cache;
4200         struct btrfs_key key;
4201         u64 offset;
4202         int level;
4203         int ret = 0;
4204
4205         if (cache_tree_empty(corrupt_blocks))
4206                 return 0;
4207
4208         trans = btrfs_start_transaction(root, 1);
4209         if (IS_ERR(trans)) {
4210                 ret = PTR_ERR(trans);
4211                 fprintf(stderr, "Error starting transaction: %s\n",
4212                         strerror(-ret));
4213                 return ret;
4214         }
4215         btrfs_init_path(&path);
4216         cache = first_cache_extent(corrupt_blocks);
4217         while (cache) {
4218                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4219                                        cache);
4220                 level = corrupt->level;
4221                 path.lowest_level = level;
4222                 key.objectid = corrupt->key.objectid;
4223                 key.type = corrupt->key.type;
4224                 key.offset = corrupt->key.offset;
4225
4226                 /*
4227                  * Here we don't want to do any tree balance, since it may
4228                  * cause a balance with corrupted brother leaf/node,
4229                  * so ins_len set to 0 here.
4230                  * Balance will be done after all corrupt node/leaf is deleted.
4231                  */
4232                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4233                 if (ret < 0)
4234                         goto out;
4235                 offset = btrfs_node_blockptr(path.nodes[level],
4236                                              path.slots[level]);
4237
4238                 /* Remove the ptr */
4239                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4240                 if (ret < 0)
4241                         goto out;
4242                 /*
4243                  * Remove the corresponding extent
4244                  * return value is not concerned.
4245                  */
4246                 btrfs_release_path(&path);
4247                 ret = btrfs_free_extent(trans, root, offset,
4248                                 root->fs_info->nodesize, 0,
4249                                 root->root_key.objectid, level - 1, 0);
4250                 cache = next_cache_extent(cache);
4251         }
4252
4253         /* Balance the btree using btrfs_search_slot() */
4254         cache = first_cache_extent(corrupt_blocks);
4255         while (cache) {
4256                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4257                                        cache);
4258                 memcpy(&key, &corrupt->key, sizeof(key));
4259                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4260                 if (ret < 0)
4261                         goto out;
4262                 /* return will always >0 since it won't find the item */
4263                 ret = 0;
4264                 btrfs_release_path(&path);
4265                 cache = next_cache_extent(cache);
4266         }
4267 out:
4268         btrfs_commit_transaction(trans, root);
4269         btrfs_release_path(&path);
4270         return ret;
4271 }
4272
4273 static int check_fs_root(struct btrfs_root *root,
4274                          struct cache_tree *root_cache,
4275                          struct walk_control *wc)
4276 {
4277         int ret = 0;
4278         int err = 0;
4279         int wret;
4280         int level;
4281         struct btrfs_path path;
4282         struct shared_node root_node;
4283         struct root_record *rec;
4284         struct btrfs_root_item *root_item = &root->root_item;
4285         struct cache_tree corrupt_blocks;
4286         struct orphan_data_extent *orphan;
4287         struct orphan_data_extent *tmp;
4288         enum btrfs_tree_block_status status;
4289         struct node_refs nrefs;
4290
4291         /*
4292          * Reuse the corrupt_block cache tree to record corrupted tree block
4293          *
4294          * Unlike the usage in extent tree check, here we do it in a per
4295          * fs/subvol tree base.
4296          */
4297         cache_tree_init(&corrupt_blocks);
4298         root->fs_info->corrupt_blocks = &corrupt_blocks;
4299
4300         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4301                 rec = get_root_rec(root_cache, root->root_key.objectid);
4302                 BUG_ON(IS_ERR(rec));
4303                 if (btrfs_root_refs(root_item) > 0)
4304                         rec->found_root_item = 1;
4305         }
4306
4307         btrfs_init_path(&path);
4308         memset(&root_node, 0, sizeof(root_node));
4309         cache_tree_init(&root_node.root_cache);
4310         cache_tree_init(&root_node.inode_cache);
4311         memset(&nrefs, 0, sizeof(nrefs));
4312
4313         /* Move the orphan extent record to corresponding inode_record */
4314         list_for_each_entry_safe(orphan, tmp,
4315                                  &root->orphan_data_extents, list) {
4316                 struct inode_record *inode;
4317
4318                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4319                                       1);
4320                 BUG_ON(IS_ERR(inode));
4321                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4322                 list_move(&orphan->list, &inode->orphan_extents);
4323         }
4324
4325         level = btrfs_header_level(root->node);
4326         memset(wc->nodes, 0, sizeof(wc->nodes));
4327         wc->nodes[level] = &root_node;
4328         wc->active_node = level;
4329         wc->root_level = level;
4330
4331         /* We may not have checked the root block, lets do that now */
4332         if (btrfs_is_leaf(root->node))
4333                 status = btrfs_check_leaf(root, NULL, root->node);
4334         else
4335                 status = btrfs_check_node(root, NULL, root->node);
4336         if (status != BTRFS_TREE_BLOCK_CLEAN)
4337                 return -EIO;
4338
4339         if (btrfs_root_refs(root_item) > 0 ||
4340             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4341                 path.nodes[level] = root->node;
4342                 extent_buffer_get(root->node);
4343                 path.slots[level] = 0;
4344         } else {
4345                 struct btrfs_key key;
4346                 struct btrfs_disk_key found_key;
4347
4348                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4349                 level = root_item->drop_level;
4350                 path.lowest_level = level;
4351                 if (level > btrfs_header_level(root->node) ||
4352                     level >= BTRFS_MAX_LEVEL) {
4353                         error("ignoring invalid drop level: %u", level);
4354                         goto skip_walking;
4355                 }
4356                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4357                 if (wret < 0)
4358                         goto skip_walking;
4359                 btrfs_node_key(path.nodes[level], &found_key,
4360                                 path.slots[level]);
4361                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4362                                         sizeof(found_key)));
4363         }
4364
4365         while (1) {
4366                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4367                 if (wret < 0)
4368                         ret = wret;
4369                 if (wret != 0)
4370                         break;
4371
4372                 wret = walk_up_tree(root, &path, wc, &level);
4373                 if (wret < 0)
4374                         ret = wret;
4375                 if (wret != 0)
4376                         break;
4377         }
4378 skip_walking:
4379         btrfs_release_path(&path);
4380
4381         if (!cache_tree_empty(&corrupt_blocks)) {
4382                 struct cache_extent *cache;
4383                 struct btrfs_corrupt_block *corrupt;
4384
4385                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4386                        root->root_key.objectid);
4387                 cache = first_cache_extent(&corrupt_blocks);
4388                 while (cache) {
4389                         corrupt = container_of(cache,
4390                                                struct btrfs_corrupt_block,
4391                                                cache);
4392                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4393                                cache->start, corrupt->level,
4394                                corrupt->key.objectid, corrupt->key.type,
4395                                corrupt->key.offset);
4396                         cache = next_cache_extent(cache);
4397                 }
4398                 if (repair) {
4399                         printf("Try to repair the btree for root %llu\n",
4400                                root->root_key.objectid);
4401                         ret = repair_btree(root, &corrupt_blocks);
4402                         if (ret < 0)
4403                                 fprintf(stderr, "Failed to repair btree: %s\n",
4404                                         strerror(-ret));
4405                         if (!ret)
4406                                 printf("Btree for root %llu is fixed\n",
4407                                        root->root_key.objectid);
4408                 }
4409         }
4410
4411         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4412         if (err < 0)
4413                 ret = err;
4414
4415         if (root_node.current) {
4416                 root_node.current->checked = 1;
4417                 maybe_free_inode_rec(&root_node.inode_cache,
4418                                 root_node.current);
4419         }
4420
4421         err = check_inode_recs(root, &root_node.inode_cache);
4422         if (!ret)
4423                 ret = err;
4424
4425         free_corrupt_blocks_tree(&corrupt_blocks);
4426         root->fs_info->corrupt_blocks = NULL;
4427         free_orphan_data_extents(&root->orphan_data_extents);
4428         return ret;
4429 }
4430
4431 static int fs_root_objectid(u64 objectid)
4432 {
4433         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4434             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4435                 return 1;
4436         return is_fstree(objectid);
4437 }
4438
4439 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4440                           struct cache_tree *root_cache)
4441 {
4442         struct btrfs_path path;
4443         struct btrfs_key key;
4444         struct walk_control wc;
4445         struct extent_buffer *leaf, *tree_node;
4446         struct btrfs_root *tmp_root;
4447         struct btrfs_root *tree_root = fs_info->tree_root;
4448         int ret;
4449         int err = 0;
4450
4451         if (ctx.progress_enabled) {
4452                 ctx.tp = TASK_FS_ROOTS;
4453                 task_start(ctx.info);
4454         }
4455
4456         /*
4457          * Just in case we made any changes to the extent tree that weren't
4458          * reflected into the free space cache yet.
4459          */
4460         if (repair)
4461                 reset_cached_block_groups(fs_info);
4462         memset(&wc, 0, sizeof(wc));
4463         cache_tree_init(&wc.shared);
4464         btrfs_init_path(&path);
4465
4466 again:
4467         key.offset = 0;
4468         key.objectid = 0;
4469         key.type = BTRFS_ROOT_ITEM_KEY;
4470         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4471         if (ret < 0) {
4472                 err = 1;
4473                 goto out;
4474         }
4475         tree_node = tree_root->node;
4476         while (1) {
4477                 if (tree_node != tree_root->node) {
4478                         free_root_recs_tree(root_cache);
4479                         btrfs_release_path(&path);
4480                         goto again;
4481                 }
4482                 leaf = path.nodes[0];
4483                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4484                         ret = btrfs_next_leaf(tree_root, &path);
4485                         if (ret) {
4486                                 if (ret < 0)
4487                                         err = 1;
4488                                 break;
4489                         }
4490                         leaf = path.nodes[0];
4491                 }
4492                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4493                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4494                     fs_root_objectid(key.objectid)) {
4495                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4496                                 tmp_root = btrfs_read_fs_root_no_cache(
4497                                                 fs_info, &key);
4498                         } else {
4499                                 key.offset = (u64)-1;
4500                                 tmp_root = btrfs_read_fs_root(
4501                                                 fs_info, &key);
4502                         }
4503                         if (IS_ERR(tmp_root)) {
4504                                 err = 1;
4505                                 goto next;
4506                         }
4507                         ret = check_fs_root(tmp_root, root_cache, &wc);
4508                         if (ret == -EAGAIN) {
4509                                 free_root_recs_tree(root_cache);
4510                                 btrfs_release_path(&path);
4511                                 goto again;
4512                         }
4513                         if (ret)
4514                                 err = 1;
4515                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4516                                 btrfs_free_fs_root(tmp_root);
4517                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4518                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4519                         process_root_ref(leaf, path.slots[0], &key,
4520                                          root_cache);
4521                 }
4522 next:
4523                 path.slots[0]++;
4524         }
4525 out:
4526         btrfs_release_path(&path);
4527         if (err)
4528                 free_extent_cache_tree(&wc.shared);
4529         if (!cache_tree_empty(&wc.shared))
4530                 fprintf(stderr, "warning line %d\n", __LINE__);
4531
4532         task_stop(ctx.info);
4533
4534         return err;
4535 }
4536
4537 /*
4538  * Find the @index according by @ino and name.
4539  * Notice:time efficiency is O(N)
4540  *
4541  * @root:       the root of the fs/file tree
4542  * @index_ret:  the index as return value
4543  * @namebuf:    the name to match
4544  * @name_len:   the length of name to match
4545  * @file_type:  the file_type of INODE_ITEM to match
4546  *
4547  * Returns 0 if found and *@index_ret will be modified with right value
4548  * Returns< 0 not found and *@index_ret will be (u64)-1
4549  */
4550 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4551                           u64 *index_ret, char *namebuf, u32 name_len,
4552                           u8 file_type)
4553 {
4554         struct btrfs_path path;
4555         struct extent_buffer *node;
4556         struct btrfs_dir_item *di;
4557         struct btrfs_key key;
4558         struct btrfs_key location;
4559         char name[BTRFS_NAME_LEN] = {0};
4560
4561         u32 total;
4562         u32 cur = 0;
4563         u32 len;
4564         u32 data_len;
4565         u8 filetype;
4566         int slot;
4567         int ret;
4568
4569         ASSERT(index_ret);
4570
4571         /* search from the last index */
4572         key.objectid = dirid;
4573         key.offset = (u64)-1;
4574         key.type = BTRFS_DIR_INDEX_KEY;
4575
4576         btrfs_init_path(&path);
4577         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4578         if (ret < 0)
4579                 return ret;
4580
4581 loop:
4582         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4583         if (ret) {
4584                 ret = -ENOENT;
4585                 *index_ret = (64)-1;
4586                 goto out;
4587         }
4588         /* Check whether inode_id/filetype/name match */
4589         node = path.nodes[0];
4590         slot = path.slots[0];
4591         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4592         total = btrfs_item_size_nr(node, slot);
4593         while (cur < total) {
4594                 ret = -ENOENT;
4595                 len = btrfs_dir_name_len(node, di);
4596                 data_len = btrfs_dir_data_len(node, di);
4597
4598                 btrfs_dir_item_key_to_cpu(node, di, &location);
4599                 if (location.objectid != location_id ||
4600                     location.type != BTRFS_INODE_ITEM_KEY ||
4601                     location.offset != 0)
4602                         goto next;
4603
4604                 filetype = btrfs_dir_type(node, di);
4605                 if (file_type != filetype)
4606                         goto next;
4607
4608                 if (len > BTRFS_NAME_LEN)
4609                         len = BTRFS_NAME_LEN;
4610
4611                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4612                 if (len != name_len || strncmp(namebuf, name, len))
4613                         goto next;
4614
4615                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4616                 *index_ret = key.offset;
4617                 ret = 0;
4618                 goto out;
4619 next:
4620                 len += sizeof(*di) + data_len;
4621                 di = (struct btrfs_dir_item *)((char *)di + len);
4622                 cur += len;
4623         }
4624         goto loop;
4625
4626 out:
4627         btrfs_release_path(&path);
4628         return ret;
4629 }
4630
4631 /*
4632  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4633  * INODE_REF/INODE_EXTREF match.
4634  *
4635  * @root:       the root of the fs/file tree
4636  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4637  *              value while find index
4638  * @location_key: location key of the struct btrfs_dir_item to match
4639  * @name:       the name to match
4640  * @namelen:    the length of name
4641  * @file_type:  the type of file to math
4642  *
4643  * Return 0 if no error occurred.
4644  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4645  * DIR_ITEM/DIR_INDEX
4646  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4647  * and DIR_ITEM/DIR_INDEX mismatch
4648  */
4649 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4650                          struct btrfs_key *location_key, char *name,
4651                          u32 namelen, u8 file_type)
4652 {
4653         struct btrfs_path path;
4654         struct extent_buffer *node;
4655         struct btrfs_dir_item *di;
4656         struct btrfs_key location;
4657         char namebuf[BTRFS_NAME_LEN] = {0};
4658         u32 total;
4659         u32 cur = 0;
4660         u32 len;
4661         u32 data_len;
4662         u8 filetype;
4663         int slot;
4664         int ret;
4665
4666         /* get the index by traversing all index */
4667         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4668                 ret = find_dir_index(root, key->objectid,
4669                                      location_key->objectid, &key->offset,
4670                                      name, namelen, file_type);
4671                 if (ret)
4672                         ret = DIR_INDEX_MISSING;
4673                 return ret;
4674         }
4675
4676         btrfs_init_path(&path);
4677         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4678         if (ret) {
4679                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4680                         DIR_INDEX_MISSING;
4681                 goto out;
4682         }
4683
4684         /* Check whether inode_id/filetype/name match */
4685         node = path.nodes[0];
4686         slot = path.slots[0];
4687         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4688         total = btrfs_item_size_nr(node, slot);
4689         while (cur < total) {
4690                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4691                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4692
4693                 len = btrfs_dir_name_len(node, di);
4694                 data_len = btrfs_dir_data_len(node, di);
4695
4696                 btrfs_dir_item_key_to_cpu(node, di, &location);
4697                 if (location.objectid != location_key->objectid ||
4698                     location.type != location_key->type ||
4699                     location.offset != location_key->offset)
4700                         goto next;
4701
4702                 filetype = btrfs_dir_type(node, di);
4703                 if (file_type != filetype)
4704                         goto next;
4705
4706                 if (len > BTRFS_NAME_LEN) {
4707                         len = BTRFS_NAME_LEN;
4708                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4709                         root->objectid,
4710                         key->type == BTRFS_DIR_ITEM_KEY ?
4711                         "DIR_ITEM" : "DIR_INDEX",
4712                         key->objectid, key->offset, len);
4713                 }
4714                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4715                                    len);
4716                 if (len != namelen || strncmp(namebuf, name, len))
4717                         goto next;
4718
4719                 ret = 0;
4720                 goto out;
4721 next:
4722                 len += sizeof(*di) + data_len;
4723                 di = (struct btrfs_dir_item *)((char *)di + len);
4724                 cur += len;
4725         }
4726
4727 out:
4728         btrfs_release_path(&path);
4729         return ret;
4730 }
4731
4732 /*
4733  * Prints inode ref error message
4734  */
4735 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4736                                 u64 index, const char *namebuf, int name_len,
4737                                 u8 filetype, int err)
4738 {
4739         if (!err)
4740                 return;
4741
4742         /* root dir error */
4743         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4744                 error(
4745         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4746                       root->objectid, key->objectid, key->offset, namebuf);
4747                 return;
4748         }
4749
4750         /* normal error */
4751         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4752                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4753                       root->objectid, key->offset,
4754                       btrfs_name_hash(namebuf, name_len),
4755                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4756                       namebuf, filetype);
4757         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4758                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4759                       root->objectid, key->offset, index,
4760                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4761                       namebuf, filetype);
4762 }
4763
4764 /*
4765  * Insert the missing inode item.
4766  *
4767  * Returns 0 means success.
4768  * Returns <0 means error.
4769  */
4770 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4771                                      u8 filetype)
4772 {
4773         struct btrfs_key key;
4774         struct btrfs_trans_handle *trans;
4775         struct btrfs_path path;
4776         int ret;
4777
4778         key.objectid = ino;
4779         key.type = BTRFS_INODE_ITEM_KEY;
4780         key.offset = 0;
4781
4782         btrfs_init_path(&path);
4783         trans = btrfs_start_transaction(root, 1);
4784         if (IS_ERR(trans)) {
4785                 ret = -EIO;
4786                 goto out;
4787         }
4788
4789         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4790         if (ret < 0 || !ret)
4791                 goto fail;
4792
4793         /* insert inode item */
4794         create_inode_item_lowmem(trans, root, ino, filetype);
4795         ret = 0;
4796 fail:
4797         btrfs_commit_transaction(trans, root);
4798 out:
4799         if (ret)
4800                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4801                       root->objectid, ino);
4802         btrfs_release_path(&path);
4803         return ret;
4804 }
4805
4806 /*
4807  * The ternary means dir item, dir index and relative inode ref.
4808  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4809  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4810  * strategy:
4811  * If two of three is missing or mismatched, delete the existing one.
4812  * If one of three is missing or mismatched, add the missing one.
4813  *
4814  * returns 0 means success.
4815  * returns not 0 means on error;
4816  */
4817 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4818                           u64 index, char *name, int name_len, u8 filetype,
4819                           int err)
4820 {
4821         struct btrfs_trans_handle *trans;
4822         int stage = 0;
4823         int ret = 0;
4824
4825         /*
4826          * stage shall be one of following valild values:
4827          *      0: Fine, nothing to do.
4828          *      1: One of three is wrong, so add missing one.
4829          *      2: Two of three is wrong, so delete existed one.
4830          */
4831         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4832                 stage++;
4833         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4834                 stage++;
4835         if (err & (INODE_REF_MISSING))
4836                 stage++;
4837
4838         /* stage must be smllarer than 3 */
4839         ASSERT(stage < 3);
4840
4841         trans = btrfs_start_transaction(root, 1);
4842         if (stage == 2) {
4843                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4844                                    name_len, 0);
4845                 goto out;
4846         }
4847         if (stage == 1) {
4848                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4849                                filetype, &index, 1, 1);
4850                 goto out;
4851         }
4852 out:
4853         btrfs_commit_transaction(trans, root);
4854
4855         if (ret)
4856                 error("fail to repair inode %llu name %s filetype %u",
4857                       ino, name, filetype);
4858         else
4859                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4860                        stage == 2 ? "Delete" : "Add",
4861                        ino, name, filetype);
4862
4863         return ret;
4864 }
4865
4866 /*
4867  * Traverse the given INODE_REF and call find_dir_item() to find related
4868  * DIR_ITEM/DIR_INDEX.
4869  *
4870  * @root:       the root of the fs/file tree
4871  * @ref_key:    the key of the INODE_REF
4872  * @path        the path provides node and slot
4873  * @refs:       the count of INODE_REF
4874  * @mode:       the st_mode of INODE_ITEM
4875  * @name_ret:   returns with the first ref's name
4876  * @name_len_ret:    len of the name_ret
4877  *
4878  * Return 0 if no error occurred.
4879  */
4880 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4881                            struct btrfs_path *path, char *name_ret,
4882                            u32 *namelen_ret, u64 *refs_ret, int mode)
4883 {
4884         struct btrfs_key key;
4885         struct btrfs_key location;
4886         struct btrfs_inode_ref *ref;
4887         struct extent_buffer *node;
4888         char namebuf[BTRFS_NAME_LEN] = {0};
4889         u32 total;
4890         u32 cur = 0;
4891         u32 len;
4892         u32 name_len;
4893         u64 index;
4894         int ret;
4895         int err = 0;
4896         int tmp_err;
4897         int slot;
4898         int need_research = 0;
4899         u64 refs;
4900
4901 begin:
4902         err = 0;
4903         cur = 0;
4904         refs = *refs_ret;
4905
4906         /* since after repair, path and the dir item may be changed */
4907         if (need_research) {
4908                 need_research = 0;
4909                 btrfs_release_path(path);
4910                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4911                 /* the item was deleted, let path point to the last checked item */
4912                 if (ret > 0) {
4913                         if (path->slots[0] == 0)
4914                                 btrfs_prev_leaf(root, path);
4915                         else
4916                                 path->slots[0]--;
4917                 }
4918                 if (ret)
4919                         goto out;
4920         }
4921
4922         location.objectid = ref_key->objectid;
4923         location.type = BTRFS_INODE_ITEM_KEY;
4924         location.offset = 0;
4925         node = path->nodes[0];
4926         slot = path->slots[0];
4927
4928         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4929         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4930         total = btrfs_item_size_nr(node, slot);
4931
4932 next:
4933         /* Update inode ref count */
4934         refs++;
4935         tmp_err = 0;
4936         index = btrfs_inode_ref_index(node, ref);
4937         name_len = btrfs_inode_ref_name_len(node, ref);
4938
4939         if (name_len <= BTRFS_NAME_LEN) {
4940                 len = name_len;
4941         } else {
4942                 len = BTRFS_NAME_LEN;
4943                 warning("root %llu INODE_REF[%llu %llu] name too long",
4944                         root->objectid, ref_key->objectid, ref_key->offset);
4945         }
4946
4947         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4948
4949         /* copy the first name found to name_ret */
4950         if (refs == 1 && name_ret) {
4951                 memcpy(name_ret, namebuf, len);
4952                 *namelen_ret = len;
4953         }
4954
4955         /* Check root dir ref */
4956         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4957                 if (index != 0 || len != strlen("..") ||
4958                     strncmp("..", namebuf, len) ||
4959                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4960                         /* set err bits then repair will delete the ref */
4961                         err |= DIR_INDEX_MISSING;
4962                         err |= DIR_ITEM_MISSING;
4963                 }
4964                 goto end;
4965         }
4966
4967         /* Find related DIR_INDEX */
4968         key.objectid = ref_key->offset;
4969         key.type = BTRFS_DIR_INDEX_KEY;
4970         key.offset = index;
4971         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4972                             imode_to_type(mode));
4973
4974         /* Find related dir_item */
4975         key.objectid = ref_key->offset;
4976         key.type = BTRFS_DIR_ITEM_KEY;
4977         key.offset = btrfs_name_hash(namebuf, len);
4978         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4979                             imode_to_type(mode));
4980 end:
4981         if (tmp_err && repair) {
4982                 ret = repair_ternary_lowmem(root, ref_key->offset,
4983                                             ref_key->objectid, index, namebuf,
4984                                             name_len, imode_to_type(mode),
4985                                             tmp_err);
4986                 if (!ret) {
4987                         need_research = 1;
4988                         goto begin;
4989                 }
4990         }
4991         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4992                             imode_to_type(mode), tmp_err);
4993         err |= tmp_err;
4994         len = sizeof(*ref) + name_len;
4995         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4996         cur += len;
4997         if (cur < total)
4998                 goto next;
4999
5000 out:
5001         *refs_ret = refs;
5002         return err;
5003 }
5004
5005 /*
5006  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
5007  * DIR_ITEM/DIR_INDEX.
5008  *
5009  * @root:       the root of the fs/file tree
5010  * @ref_key:    the key of the INODE_EXTREF
5011  * @refs:       the count of INODE_EXTREF
5012  * @mode:       the st_mode of INODE_ITEM
5013  *
5014  * Return 0 if no error occurred.
5015  */
5016 static int check_inode_extref(struct btrfs_root *root,
5017                               struct btrfs_key *ref_key,
5018                               struct extent_buffer *node, int slot, u64 *refs,
5019                               int mode)
5020 {
5021         struct btrfs_key key;
5022         struct btrfs_key location;
5023         struct btrfs_inode_extref *extref;
5024         char namebuf[BTRFS_NAME_LEN] = {0};
5025         u32 total;
5026         u32 cur = 0;
5027         u32 len;
5028         u32 name_len;
5029         u64 index;
5030         u64 parent;
5031         int ret;
5032         int err = 0;
5033
5034         location.objectid = ref_key->objectid;
5035         location.type = BTRFS_INODE_ITEM_KEY;
5036         location.offset = 0;
5037
5038         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5039         total = btrfs_item_size_nr(node, slot);
5040
5041 next:
5042         /* update inode ref count */
5043         (*refs)++;
5044         name_len = btrfs_inode_extref_name_len(node, extref);
5045         index = btrfs_inode_extref_index(node, extref);
5046         parent = btrfs_inode_extref_parent(node, extref);
5047         if (name_len <= BTRFS_NAME_LEN) {
5048                 len = name_len;
5049         } else {
5050                 len = BTRFS_NAME_LEN;
5051                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
5052                         root->objectid, ref_key->objectid, ref_key->offset);
5053         }
5054         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
5055
5056         /* Check root dir ref name */
5057         if (index == 0 && strncmp(namebuf, "..", name_len)) {
5058                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
5059                       root->objectid, ref_key->objectid, ref_key->offset,
5060                       namebuf);
5061                 err |= ROOT_DIR_ERROR;
5062         }
5063
5064         /* find related dir_index */
5065         key.objectid = parent;
5066         key.type = BTRFS_DIR_INDEX_KEY;
5067         key.offset = index;
5068         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5069         err |= ret;
5070
5071         /* find related dir_item */
5072         key.objectid = parent;
5073         key.type = BTRFS_DIR_ITEM_KEY;
5074         key.offset = btrfs_name_hash(namebuf, len);
5075         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5076         err |= ret;
5077
5078         len = sizeof(*extref) + name_len;
5079         extref = (struct btrfs_inode_extref *)((char *)extref + len);
5080         cur += len;
5081
5082         if (cur < total)
5083                 goto next;
5084
5085         return err;
5086 }
5087
5088 /*
5089  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
5090  * DIR_ITEM/DIR_INDEX match.
5091  * Return with @index_ret.
5092  *
5093  * @root:       the root of the fs/file tree
5094  * @key:        the key of the INODE_REF/INODE_EXTREF
5095  * @name:       the name in the INODE_REF/INODE_EXTREF
5096  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
5097  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
5098  *              value (64)-1 means do not check index
5099  * @ext_ref:    the EXTENDED_IREF feature
5100  *
5101  * Return 0 if no error occurred.
5102  * Return >0 for error bitmap
5103  */
5104 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
5105                           char *name, int namelen, u64 *index_ret,
5106                           unsigned int ext_ref)
5107 {
5108         struct btrfs_path path;
5109         struct btrfs_inode_ref *ref;
5110         struct btrfs_inode_extref *extref;
5111         struct extent_buffer *node;
5112         char ref_namebuf[BTRFS_NAME_LEN] = {0};
5113         u32 total;
5114         u32 cur = 0;
5115         u32 len;
5116         u32 ref_namelen;
5117         u64 ref_index;
5118         u64 parent;
5119         u64 dir_id;
5120         int slot;
5121         int ret;
5122
5123         ASSERT(index_ret);
5124
5125         btrfs_init_path(&path);
5126         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5127         if (ret) {
5128                 ret = INODE_REF_MISSING;
5129                 goto extref;
5130         }
5131
5132         node = path.nodes[0];
5133         slot = path.slots[0];
5134
5135         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5136         total = btrfs_item_size_nr(node, slot);
5137
5138         /* Iterate all entry of INODE_REF */
5139         while (cur < total) {
5140                 ret = INODE_REF_MISSING;
5141
5142                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
5143                 ref_index = btrfs_inode_ref_index(node, ref);
5144                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5145                         goto next_ref;
5146
5147                 if (cur + sizeof(*ref) + ref_namelen > total ||
5148                     ref_namelen > BTRFS_NAME_LEN) {
5149                         warning("root %llu INODE %s[%llu %llu] name too long",
5150                                 root->objectid,
5151                                 key->type == BTRFS_INODE_REF_KEY ?
5152                                         "REF" : "EXTREF",
5153                                 key->objectid, key->offset);
5154
5155                         if (cur + sizeof(*ref) > total)
5156                                 break;
5157                         len = min_t(u32, total - cur - sizeof(*ref),
5158                                     BTRFS_NAME_LEN);
5159                 } else {
5160                         len = ref_namelen;
5161                 }
5162
5163                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
5164                                    len);
5165
5166                 if (len != namelen || strncmp(ref_namebuf, name, len))
5167                         goto next_ref;
5168
5169                 *index_ret = ref_index;
5170                 ret = 0;
5171                 goto out;
5172 next_ref:
5173                 len = sizeof(*ref) + ref_namelen;
5174                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5175                 cur += len;
5176         }
5177
5178 extref:
5179         /* Skip if not support EXTENDED_IREF feature */
5180         if (!ext_ref)
5181                 goto out;
5182
5183         btrfs_release_path(&path);
5184         btrfs_init_path(&path);
5185
5186         dir_id = key->offset;
5187         key->type = BTRFS_INODE_EXTREF_KEY;
5188         key->offset = btrfs_extref_hash(dir_id, name, namelen);
5189
5190         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5191         if (ret) {
5192                 ret = INODE_REF_MISSING;
5193                 goto out;
5194         }
5195
5196         node = path.nodes[0];
5197         slot = path.slots[0];
5198
5199         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5200         cur = 0;
5201         total = btrfs_item_size_nr(node, slot);
5202
5203         /* Iterate all entry of INODE_EXTREF */
5204         while (cur < total) {
5205                 ret = INODE_REF_MISSING;
5206
5207                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5208                 ref_index = btrfs_inode_extref_index(node, extref);
5209                 parent = btrfs_inode_extref_parent(node, extref);
5210                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5211                         goto next_extref;
5212
5213                 if (parent != dir_id)
5214                         goto next_extref;
5215
5216                 if (ref_namelen <= BTRFS_NAME_LEN) {
5217                         len = ref_namelen;
5218                 } else {
5219                         len = BTRFS_NAME_LEN;
5220                         warning("root %llu INODE %s[%llu %llu] name too long",
5221                                 root->objectid,
5222                                 key->type == BTRFS_INODE_REF_KEY ?
5223                                         "REF" : "EXTREF",
5224                                 key->objectid, key->offset);
5225                 }
5226                 read_extent_buffer(node, ref_namebuf,
5227                                    (unsigned long)(extref + 1), len);
5228
5229                 if (len != namelen || strncmp(ref_namebuf, name, len))
5230                         goto next_extref;
5231
5232                 *index_ret = ref_index;
5233                 ret = 0;
5234                 goto out;
5235
5236 next_extref:
5237                 len = sizeof(*extref) + ref_namelen;
5238                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5239                 cur += len;
5240
5241         }
5242 out:
5243         btrfs_release_path(&path);
5244         return ret;
5245 }
5246
5247 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5248                                u64 ino, u64 index, const char *namebuf,
5249                                int name_len, u8 filetype, int err)
5250 {
5251         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5252                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5253                       root->objectid, key->objectid, key->offset, namebuf,
5254                       filetype,
5255                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5256         }
5257
5258         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5259                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5260                       root->objectid, key->objectid, index, namebuf, filetype,
5261                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5262         }
5263
5264         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5265                 error(
5266                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5267                       root->objectid, ino, index, namebuf, filetype,
5268                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5269         }
5270
5271         if (err & INODE_REF_MISSING)
5272                 error(
5273                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5274                       root->objectid, ino, key->objectid, namebuf, filetype);
5275
5276 }
5277
5278 /*
5279  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5280  *
5281  * Returns error after repair
5282  */
5283 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5284                            u64 index, u8 filetype, char *namebuf, u32 name_len,
5285                            int err)
5286 {
5287         int ret;
5288
5289         if (err & INODE_ITEM_MISSING) {
5290                 ret = repair_inode_item_missing(root, ino, filetype);
5291                 if (!ret)
5292                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5293         }
5294
5295         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5296                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5297                                             name_len, filetype, err);
5298                 if (!ret) {
5299                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5300                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5301                         err &= ~(INODE_REF_MISSING);
5302                 }
5303         }
5304         return err;
5305 }
5306
5307 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5308                 u64 *size_ret)
5309 {
5310         struct btrfs_key key;
5311         struct btrfs_path path;
5312         u32 len;
5313         struct btrfs_dir_item *di;
5314         int ret;
5315         int cur = 0;
5316         int total = 0;
5317
5318         ASSERT(size_ret);
5319         *size_ret = 0;
5320
5321         key.objectid = ino;
5322         key.type = type;
5323         key.offset = (u64)-1;
5324
5325         btrfs_init_path(&path);
5326         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5327         if (ret < 0) {
5328                 ret = -EIO;
5329                 goto out;
5330         }
5331         /* if found, go to spacial case */
5332         if (ret == 0)
5333                 goto special_case;
5334
5335 loop:
5336         ret = btrfs_previous_item(root, &path, ino, type);
5337
5338         if (ret) {
5339                 ret = 0;
5340                 goto out;
5341         }
5342
5343 special_case:
5344         di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5345         cur = 0;
5346         total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5347
5348         while (cur < total) {
5349                 len = btrfs_dir_name_len(path.nodes[0], di);
5350                 if (len > BTRFS_NAME_LEN)
5351                         len = BTRFS_NAME_LEN;
5352                 *size_ret += len;
5353
5354                 len += btrfs_dir_data_len(path.nodes[0], di);
5355                 len += sizeof(*di);
5356                 di = (struct btrfs_dir_item *)((char *)di + len);
5357                 cur += len;
5358         }
5359         goto loop;
5360
5361 out:
5362         btrfs_release_path(&path);
5363         return ret;
5364 }
5365
5366 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5367 {
5368         u64 item_size;
5369         u64 index_size;
5370         int ret;
5371
5372         ASSERT(size);
5373         ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5374         if (ret)
5375                 goto out;
5376
5377         ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5378         if (ret)
5379                 goto out;
5380
5381         *size = item_size + index_size;
5382
5383 out:
5384         if (ret)
5385                 error("failed to count root %llu INODE[%llu] root size",
5386                       root->objectid, ino);
5387         return ret;
5388 }
5389
5390 /*
5391  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5392  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5393  *
5394  * @root:       the root of the fs/file tree
5395  * @key:        the key of the INODE_REF/INODE_EXTREF
5396  * @path:       the path
5397  * @size:       the st_size of the INODE_ITEM
5398  * @ext_ref:    the EXTENDED_IREF feature
5399  *
5400  * Return 0 if no error occurred.
5401  * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5402  */
5403 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5404                           struct btrfs_path *path, u64 *size,
5405                           unsigned int ext_ref)
5406 {
5407         struct btrfs_dir_item *di;
5408         struct btrfs_inode_item *ii;
5409         struct btrfs_key key;
5410         struct btrfs_key location;
5411         struct extent_buffer *node;
5412         int slot;
5413         char namebuf[BTRFS_NAME_LEN] = {0};
5414         u32 total;
5415         u32 cur = 0;
5416         u32 len;
5417         u32 name_len;
5418         u32 data_len;
5419         u8 filetype;
5420         u32 mode = 0;
5421         u64 index;
5422         int ret;
5423         int err;
5424         int tmp_err;
5425         int need_research = 0;
5426
5427         /*
5428          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5429          * ignore index check.
5430          */
5431         if (di_key->type == BTRFS_DIR_INDEX_KEY)
5432                 index = di_key->offset;
5433         else
5434                 index = (u64)-1;
5435 begin:
5436         err = 0;
5437         cur = 0;
5438
5439         /* since after repair, path and the dir item may be changed */
5440         if (need_research) {
5441                 need_research = 0;
5442                 err |= DIR_COUNT_AGAIN;
5443                 btrfs_release_path(path);
5444                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5445                 /* the item was deleted, let path point the last checked item */
5446                 if (ret > 0) {
5447                         if (path->slots[0] == 0)
5448                                 btrfs_prev_leaf(root, path);
5449                         else
5450                                 path->slots[0]--;
5451                 }
5452                 if (ret)
5453                         goto out;
5454         }
5455
5456         node = path->nodes[0];
5457         slot = path->slots[0];
5458
5459         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5460         total = btrfs_item_size_nr(node, slot);
5461         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5462
5463         while (cur < total) {
5464                 data_len = btrfs_dir_data_len(node, di);
5465                 tmp_err = 0;
5466                 if (data_len)
5467                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5468                               root->objectid,
5469               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5470                               di_key->objectid, di_key->offset, data_len);
5471
5472                 name_len = btrfs_dir_name_len(node, di);
5473                 if (name_len <= BTRFS_NAME_LEN) {
5474                         len = name_len;
5475                 } else {
5476                         len = BTRFS_NAME_LEN;
5477                         warning("root %llu %s[%llu %llu] name too long",
5478                                 root->objectid,
5479                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5480                                 di_key->objectid, di_key->offset);
5481                 }
5482                 (*size) += name_len;
5483                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5484                                    len);
5485                 filetype = btrfs_dir_type(node, di);
5486
5487                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5488                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5489                         err |= -EIO;
5490                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5491                         root->objectid, di_key->objectid, di_key->offset,
5492                         namebuf, len, filetype, di_key->offset,
5493                         btrfs_name_hash(namebuf, len));
5494                 }
5495
5496                 btrfs_dir_item_key_to_cpu(node, di, &location);
5497                 /* Ignore related ROOT_ITEM check */
5498                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5499                         goto next;
5500
5501                 btrfs_release_path(path);
5502                 /* Check relative INODE_ITEM(existence/filetype) */
5503                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5504                 if (ret) {
5505                         tmp_err |= INODE_ITEM_MISSING;
5506                         goto next;
5507                 }
5508
5509                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5510                                     struct btrfs_inode_item);
5511                 mode = btrfs_inode_mode(path->nodes[0], ii);
5512                 if (imode_to_type(mode) != filetype) {
5513                         tmp_err |= INODE_ITEM_MISMATCH;
5514                         goto next;
5515                 }
5516
5517                 /* Check relative INODE_REF/INODE_EXTREF */
5518                 key.objectid = location.objectid;
5519                 key.type = BTRFS_INODE_REF_KEY;
5520                 key.offset = di_key->objectid;
5521                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5522                                           &index, ext_ref);
5523
5524                 /* check relative INDEX/ITEM */
5525                 key.objectid = di_key->objectid;
5526                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5527                         key.type = BTRFS_DIR_INDEX_KEY;
5528                         key.offset = index;
5529                 } else {
5530                         key.type = BTRFS_DIR_ITEM_KEY;
5531                         key.offset = btrfs_name_hash(namebuf, name_len);
5532                 }
5533
5534                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5535                                          name_len, filetype);
5536                 /* find_dir_item may find index */
5537                 if (key.type == BTRFS_DIR_INDEX_KEY)
5538                         index = key.offset;
5539 next:
5540
5541                 if (tmp_err && repair) {
5542                         ret = repair_dir_item(root, di_key->objectid,
5543                                               location.objectid, index,
5544                                               imode_to_type(mode), namebuf,
5545                                               name_len, tmp_err);
5546                         if (ret != tmp_err) {
5547                                 need_research = 1;
5548                                 goto begin;
5549                         }
5550                 }
5551                 btrfs_release_path(path);
5552                 print_dir_item_err(root, di_key, location.objectid, index,
5553                                    namebuf, name_len, filetype, tmp_err);
5554                 err |= tmp_err;
5555                 len = sizeof(*di) + name_len + data_len;
5556                 di = (struct btrfs_dir_item *)((char *)di + len);
5557                 cur += len;
5558
5559                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5560                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5561                               root->objectid, di_key->objectid,
5562                               di_key->offset);
5563                         break;
5564                 }
5565         }
5566 out:
5567         /* research path */
5568         btrfs_release_path(path);
5569         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5570         if (ret)
5571                 err |= ret > 0 ? -ENOENT : ret;
5572         return err;
5573 }
5574
5575 /*
5576  * Wrapper function of btrfs_punch_hole.
5577  *
5578  * Returns 0 means success.
5579  * Returns not 0 means error.
5580  */
5581 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5582                              u64 len)
5583 {
5584         struct btrfs_trans_handle *trans;
5585         int ret = 0;
5586
5587         trans = btrfs_start_transaction(root, 1);
5588         if (IS_ERR(trans))
5589                 return PTR_ERR(trans);
5590
5591         ret = btrfs_punch_hole(trans, root, ino, start, len);
5592         if (ret)
5593                 error("failed to add hole [%llu, %llu] in inode [%llu]",
5594                       start, len, ino);
5595         else
5596                 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5597                        ino);
5598
5599         btrfs_commit_transaction(trans, root);
5600         return ret;
5601 }
5602
5603 /*
5604  * Check file extent datasum/hole, update the size of the file extents,
5605  * check and update the last offset of the file extent.
5606  *
5607  * @root:       the root of fs/file tree.
5608  * @fkey:       the key of the file extent.
5609  * @nodatasum:  INODE_NODATASUM feature.
5610  * @size:       the sum of all EXTENT_DATA items size for this inode.
5611  * @end:        the offset of the last extent.
5612  *
5613  * Return 0 if no error occurred.
5614  */
5615 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5616                              struct extent_buffer *node, int slot,
5617                              unsigned int nodatasum, u64 *size, u64 *end)
5618 {
5619         struct btrfs_file_extent_item *fi;
5620         u64 disk_bytenr;
5621         u64 disk_num_bytes;
5622         u64 extent_num_bytes;
5623         u64 extent_offset;
5624         u64 csum_found;         /* In byte size, sectorsize aligned */
5625         u64 search_start;       /* Logical range start we search for csum */
5626         u64 search_len;         /* Logical range len we search for csum */
5627         unsigned int extent_type;
5628         unsigned int is_hole;
5629         int compressed = 0;
5630         int ret;
5631         int err = 0;
5632
5633         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5634
5635         /* Check inline extent */
5636         extent_type = btrfs_file_extent_type(node, fi);
5637         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5638                 struct btrfs_item *e = btrfs_item_nr(slot);
5639                 u32 item_inline_len;
5640
5641                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5642                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5643                 compressed = btrfs_file_extent_compression(node, fi);
5644                 if (extent_num_bytes == 0) {
5645                         error(
5646                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5647                                 root->objectid, fkey->objectid, fkey->offset);
5648                         err |= FILE_EXTENT_ERROR;
5649                 }
5650                 if (!compressed && extent_num_bytes != item_inline_len) {
5651                         error(
5652                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5653                                 root->objectid, fkey->objectid, fkey->offset,
5654                                 extent_num_bytes, item_inline_len);
5655                         err |= FILE_EXTENT_ERROR;
5656                 }
5657                 *end += extent_num_bytes;
5658                 *size += extent_num_bytes;
5659                 return err;
5660         }
5661
5662         /* Check extent type */
5663         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5664                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5665                 err |= FILE_EXTENT_ERROR;
5666                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5667                       root->objectid, fkey->objectid, fkey->offset);
5668                 return err;
5669         }
5670
5671         /* Check REG_EXTENT/PREALLOC_EXTENT */
5672         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5673         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5674         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5675         extent_offset = btrfs_file_extent_offset(node, fi);
5676         compressed = btrfs_file_extent_compression(node, fi);
5677         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5678
5679         /*
5680          * Check EXTENT_DATA csum
5681          *
5682          * For plain (uncompressed) extent, we should only check the range
5683          * we're referring to, as it's possible that part of prealloc extent
5684          * has been written, and has csum:
5685          *
5686          * |<--- Original large preallocated extent A ---->|
5687          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5688          *      No csum                         Has csum
5689          *
5690          * For compressed extent, we should check the whole range.
5691          */
5692         if (!compressed) {
5693                 search_start = disk_bytenr + extent_offset;
5694                 search_len = extent_num_bytes;
5695         } else {
5696                 search_start = disk_bytenr;
5697                 search_len = disk_num_bytes;
5698         }
5699         ret = count_csum_range(root, search_start, search_len, &csum_found);
5700         if (csum_found > 0 && nodatasum) {
5701                 err |= ODD_CSUM_ITEM;
5702                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5703                       root->objectid, fkey->objectid, fkey->offset);
5704         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5705                    !is_hole && (ret < 0 || csum_found < search_len)) {
5706                 err |= CSUM_ITEM_MISSING;
5707                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5708                       root->objectid, fkey->objectid, fkey->offset,
5709                       csum_found, search_len);
5710         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5711                 err |= ODD_CSUM_ITEM;
5712                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5713                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5714         }
5715
5716         /* Check EXTENT_DATA hole */
5717         if (!no_holes && *end != fkey->offset) {
5718                 if (repair)
5719                         ret = punch_extent_hole(root, fkey->objectid,
5720                                                 *end, fkey->offset - *end);
5721                 if (!repair || ret) {
5722                         err |= FILE_EXTENT_ERROR;
5723                         error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5724                               root->objectid, fkey->objectid, fkey->offset);
5725                 }
5726         }
5727
5728         *end += extent_num_bytes;
5729         if (!is_hole)
5730                 *size += extent_num_bytes;
5731
5732         return err;
5733 }
5734
5735 /*
5736  * Set inode item nbytes to @nbytes
5737  *
5738  * Returns  0     on success
5739  * Returns  != 0  on error
5740  */
5741 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5742                                       struct btrfs_path *path,
5743                                       u64 ino, u64 nbytes)
5744 {
5745         struct btrfs_trans_handle *trans;
5746         struct btrfs_inode_item *ii;
5747         struct btrfs_key key;
5748         struct btrfs_key research_key;
5749         int err = 0;
5750         int ret;
5751
5752         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5753
5754         key.objectid = ino;
5755         key.type = BTRFS_INODE_ITEM_KEY;
5756         key.offset = 0;
5757
5758         trans = btrfs_start_transaction(root, 1);
5759         if (IS_ERR(trans)) {
5760                 ret = PTR_ERR(trans);
5761                 err |= ret;
5762                 goto out;
5763         }
5764
5765         btrfs_release_path(path);
5766         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5767         if (ret > 0)
5768                 ret = -ENOENT;
5769         if (ret) {
5770                 err |= ret;
5771                 goto fail;
5772         }
5773
5774         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5775                             struct btrfs_inode_item);
5776         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5777         btrfs_mark_buffer_dirty(path->nodes[0]);
5778 fail:
5779         btrfs_commit_transaction(trans, root);
5780 out:
5781         if (ret)
5782                 error("failed to set nbytes in inode %llu root %llu",
5783                       ino, root->root_key.objectid);
5784         else
5785                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5786                        root->root_key.objectid, nbytes);
5787
5788         /* research path */
5789         btrfs_release_path(path);
5790         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5791         err |= ret;
5792
5793         return err;
5794 }
5795
5796 /*
5797  * Set directory inode isize to @isize.
5798  *
5799  * Returns 0     on success.
5800  * Returns != 0  on error.
5801  */
5802 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5803                                    struct btrfs_path *path,
5804                                    u64 ino, u64 isize)
5805 {
5806         struct btrfs_trans_handle *trans;
5807         struct btrfs_inode_item *ii;
5808         struct btrfs_key key;
5809         struct btrfs_key research_key;
5810         int ret;
5811         int err = 0;
5812
5813         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5814
5815         key.objectid = ino;
5816         key.type = BTRFS_INODE_ITEM_KEY;
5817         key.offset = 0;
5818
5819         trans = btrfs_start_transaction(root, 1);
5820         if (IS_ERR(trans)) {
5821                 ret = PTR_ERR(trans);
5822                 err |= ret;
5823                 goto out;
5824         }
5825
5826         btrfs_release_path(path);
5827         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5828         if (ret > 0)
5829                 ret = -ENOENT;
5830         if (ret) {
5831                 err |= ret;
5832                 goto fail;
5833         }
5834
5835         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5836                             struct btrfs_inode_item);
5837         btrfs_set_inode_size(path->nodes[0], ii, isize);
5838         btrfs_mark_buffer_dirty(path->nodes[0]);
5839 fail:
5840         btrfs_commit_transaction(trans, root);
5841 out:
5842         if (ret)
5843                 error("failed to set isize in inode %llu root %llu",
5844                       ino, root->root_key.objectid);
5845         else
5846                 printf("Set isize in inode %llu root %llu to %llu\n",
5847                        ino, root->root_key.objectid, isize);
5848
5849         btrfs_release_path(path);
5850         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5851         err |= ret;
5852
5853         return err;
5854 }
5855
5856 /*
5857  * Wrapper function for btrfs_add_orphan_item().
5858  *
5859  * Returns 0     on success.
5860  * Returns != 0  on error.
5861  */
5862 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5863                                            struct btrfs_path *path, u64 ino)
5864 {
5865         struct btrfs_trans_handle *trans;
5866         struct btrfs_key research_key;
5867         int ret;
5868         int err = 0;
5869
5870         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5871
5872         trans = btrfs_start_transaction(root, 1);
5873         if (IS_ERR(trans)) {
5874                 ret = PTR_ERR(trans);
5875                 err |= ret;
5876                 goto out;
5877         }
5878
5879         btrfs_release_path(path);
5880         ret = btrfs_add_orphan_item(trans, root, path, ino);
5881         err |= ret;
5882         btrfs_commit_transaction(trans, root);
5883 out:
5884         if (ret)
5885                 error("failed to add inode %llu as orphan item root %llu",
5886                       ino, root->root_key.objectid);
5887         else
5888                 printf("Added inode %llu as orphan item root %llu\n",
5889                        ino, root->root_key.objectid);
5890
5891         btrfs_release_path(path);
5892         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5893         err |= ret;
5894
5895         return err;
5896 }
5897
5898 /* Set inode_item nlink to @ref_count.
5899  * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5900  *
5901  * Returns 0 on success
5902  */
5903 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5904                                       struct btrfs_path *path, u64 ino,
5905                                       const char *name, u32 namelen,
5906                                       u64 ref_count, u8 filetype, u64 *nlink)
5907 {
5908         struct btrfs_trans_handle *trans;
5909         struct btrfs_inode_item *ii;
5910         struct btrfs_key key;
5911         struct btrfs_key old_key;
5912         char namebuf[BTRFS_NAME_LEN] = {0};
5913         int name_len;
5914         int ret;
5915         int ret2;
5916
5917         /* save the key */
5918         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5919
5920         if (name && namelen) {
5921                 ASSERT(namelen <= BTRFS_NAME_LEN);
5922                 memcpy(namebuf, name, namelen);
5923                 name_len = namelen;
5924         } else {
5925                 sprintf(namebuf, "%llu", ino);
5926                 name_len = count_digits(ino);
5927                 printf("Can't find file name for inode %llu, use %s instead\n",
5928                        ino, namebuf);
5929         }
5930
5931         trans = btrfs_start_transaction(root, 1);
5932         if (IS_ERR(trans)) {
5933                 ret = PTR_ERR(trans);
5934                 goto out;
5935         }
5936
5937         btrfs_release_path(path);
5938         /* if refs is 0, put it into lostfound */
5939         if (ref_count == 0) {
5940                 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5941                                               name_len, filetype, &ref_count);
5942                 if (ret)
5943                         goto fail;
5944         }
5945
5946         /* reset inode_item's nlink to ref_count */
5947         key.objectid = ino;
5948         key.type = BTRFS_INODE_ITEM_KEY;
5949         key.offset = 0;
5950
5951         btrfs_release_path(path);
5952         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5953         if (ret > 0)
5954                 ret = -ENOENT;
5955         if (ret)
5956                 goto fail;
5957
5958         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5959                             struct btrfs_inode_item);
5960         btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5961         btrfs_mark_buffer_dirty(path->nodes[0]);
5962
5963         if (nlink)
5964                 *nlink = ref_count;
5965 fail:
5966         btrfs_commit_transaction(trans, root);
5967 out:
5968         if (ret)
5969                 error(
5970         "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5971                        root->objectid, ino, namebuf, filetype);
5972         else
5973                 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5974                        root->objectid, ino, namebuf, filetype);
5975
5976         /* research */
5977         btrfs_release_path(path);
5978         ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5979         if (ret2 < 0)
5980                 return ret |= ret2;
5981         return ret;
5982 }
5983
5984 /*
5985  * Check INODE_ITEM and related ITEMs (the same inode number)
5986  * 1. check link count
5987  * 2. check inode ref/extref
5988  * 3. check dir item/index
5989  *
5990  * @ext_ref:    the EXTENDED_IREF feature
5991  *
5992  * Return 0 if no error occurred.
5993  * Return >0 for error or hit the traversal is done(by error bitmap)
5994  */
5995 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5996                             unsigned int ext_ref)
5997 {
5998         struct extent_buffer *node;
5999         struct btrfs_inode_item *ii;
6000         struct btrfs_key key;
6001         struct btrfs_key last_key;
6002         u64 inode_id;
6003         u32 mode;
6004         u64 nlink;
6005         u64 nbytes;
6006         u64 isize;
6007         u64 size = 0;
6008         u64 refs = 0;
6009         u64 extent_end = 0;
6010         u64 extent_size = 0;
6011         unsigned int dir;
6012         unsigned int nodatasum;
6013         int slot;
6014         int ret;
6015         int err = 0;
6016         char namebuf[BTRFS_NAME_LEN] = {0};
6017         u32 name_len = 0;
6018
6019         node = path->nodes[0];
6020         slot = path->slots[0];
6021
6022         btrfs_item_key_to_cpu(node, &key, slot);
6023         inode_id = key.objectid;
6024
6025         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
6026                 ret = btrfs_next_item(root, path);
6027                 if (ret > 0)
6028                         err |= LAST_ITEM;
6029                 return err;
6030         }
6031
6032         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
6033         isize = btrfs_inode_size(node, ii);
6034         nbytes = btrfs_inode_nbytes(node, ii);
6035         mode = btrfs_inode_mode(node, ii);
6036         dir = imode_to_type(mode) == BTRFS_FT_DIR;
6037         nlink = btrfs_inode_nlink(node, ii);
6038         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
6039
6040         while (1) {
6041                 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
6042                 ret = btrfs_next_item(root, path);
6043                 if (ret < 0) {
6044                         /* out will fill 'err' rusing current statistics */
6045                         goto out;
6046                 } else if (ret > 0) {
6047                         err |= LAST_ITEM;
6048                         goto out;
6049                 }
6050
6051                 node = path->nodes[0];
6052                 slot = path->slots[0];
6053                 btrfs_item_key_to_cpu(node, &key, slot);
6054                 if (key.objectid != inode_id)
6055                         goto out;
6056
6057                 switch (key.type) {
6058                 case BTRFS_INODE_REF_KEY:
6059                         ret = check_inode_ref(root, &key, path, namebuf,
6060                                               &name_len, &refs, mode);
6061                         err |= ret;
6062                         break;
6063                 case BTRFS_INODE_EXTREF_KEY:
6064                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
6065                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
6066                                         root->objectid, key.objectid,
6067                                         key.offset);
6068                         ret = check_inode_extref(root, &key, node, slot, &refs,
6069                                                  mode);
6070                         err |= ret;
6071                         break;
6072                 case BTRFS_DIR_ITEM_KEY:
6073                 case BTRFS_DIR_INDEX_KEY:
6074                         if (!dir) {
6075                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
6076                                         root->objectid, inode_id,
6077                                         imode_to_type(mode), key.objectid,
6078                                         key.offset);
6079                         }
6080                         ret = check_dir_item(root, &key, path, &size, ext_ref);
6081                         err |= ret;
6082                         break;
6083                 case BTRFS_EXTENT_DATA_KEY:
6084                         if (dir) {
6085                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
6086                                         root->objectid, inode_id, key.objectid,
6087                                         key.offset);
6088                         }
6089                         ret = check_file_extent(root, &key, node, slot,
6090                                                 nodatasum, &extent_size,
6091                                                 &extent_end);
6092                         err |= ret;
6093                         break;
6094                 case BTRFS_XATTR_ITEM_KEY:
6095                         break;
6096                 default:
6097                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
6098                               key.objectid, key.type, key.offset);
6099                 }
6100         }
6101
6102 out:
6103         if (err & LAST_ITEM) {
6104                 btrfs_release_path(path);
6105                 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
6106                 if (ret)
6107                         return err;
6108         }
6109
6110         /* verify INODE_ITEM nlink/isize/nbytes */
6111         if (dir) {
6112                 if (repair && (err & DIR_COUNT_AGAIN)) {
6113                         err &= ~DIR_COUNT_AGAIN;
6114                         count_dir_isize(root, inode_id, &size);
6115                 }
6116
6117                 if ((nlink != 1 || refs != 1) && repair) {
6118                         ret = repair_inode_nlinks_lowmem(root, path, inode_id,
6119                                 namebuf, name_len, refs, imode_to_type(mode),
6120                                 &nlink);
6121                 }
6122
6123                 if (nlink != 1) {
6124                         err |= LINK_COUNT_ERROR;
6125                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
6126                               root->objectid, inode_id, nlink);
6127                 }
6128
6129                 /*
6130                  * Just a warning, as dir inode nbytes is just an
6131                  * instructive value.
6132                  */
6133                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
6134                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
6135                                 root->objectid, inode_id,
6136                                 root->fs_info->nodesize);
6137                 }
6138
6139                 if (isize != size) {
6140                         if (repair)
6141                                 ret = repair_dir_isize_lowmem(root, path,
6142                                                               inode_id, size);
6143                         if (!repair || ret) {
6144                                 err |= ISIZE_ERROR;
6145                                 error(
6146                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
6147                                       root->objectid, inode_id, isize, size);
6148                         }
6149                 }
6150         } else {
6151                 if (nlink != refs) {
6152                         if (repair)
6153                                 ret = repair_inode_nlinks_lowmem(root, path,
6154                                          inode_id, namebuf, name_len, refs,
6155                                          imode_to_type(mode), &nlink);
6156                         if (!repair || ret) {
6157                                 err |= LINK_COUNT_ERROR;
6158                                 error(
6159                 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
6160                                       root->objectid, inode_id, nlink, refs);
6161                         }
6162                 } else if (!nlink) {
6163                         if (repair)
6164                                 ret = repair_inode_orphan_item_lowmem(root,
6165                                                               path, inode_id);
6166                         if (!repair || ret) {
6167                                 err |= ORPHAN_ITEM;
6168                                 error("root %llu INODE[%llu] is orphan item",
6169                                       root->objectid, inode_id);
6170                         }
6171                 }
6172
6173                 if (!nbytes && !no_holes && extent_end < isize) {
6174                         if (repair)
6175                                 ret = punch_extent_hole(root, inode_id,
6176                                                 extent_end, isize - extent_end);
6177                         if (!repair || ret) {
6178                                 err |= NBYTES_ERROR;
6179                                 error(
6180         "root %llu INODE[%llu] size %llu should have a file extent hole",
6181                                       root->objectid, inode_id, isize);
6182                         }
6183                 }
6184
6185                 if (nbytes != extent_size) {
6186                         if (repair)
6187                                 ret = repair_inode_nbytes_lowmem(root, path,
6188                                                          inode_id, extent_size);
6189                         if (!repair || ret) {
6190                                 err |= NBYTES_ERROR;
6191                                 error(
6192         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6193                                       root->objectid, inode_id, nbytes,
6194                                       extent_size);
6195                         }
6196                 }
6197         }
6198
6199         if (err & LAST_ITEM)
6200                 btrfs_next_item(root, path);
6201         return err;
6202 }
6203
6204 /*
6205  * Insert the missing inode item and inode ref.
6206  *
6207  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6208  * Root dir should be handled specially because root dir is the root of fs.
6209  *
6210  * returns err (>0 or 0) after repair
6211  */
6212 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6213 {
6214         struct btrfs_trans_handle *trans;
6215         struct btrfs_key key;
6216         struct btrfs_path path;
6217         int filetype = BTRFS_FT_DIR;
6218         int ret = 0;
6219
6220         btrfs_init_path(&path);
6221
6222         if (err & INODE_REF_MISSING) {
6223                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6224                 key.type = BTRFS_INODE_REF_KEY;
6225                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6226
6227                 trans = btrfs_start_transaction(root, 1);
6228                 if (IS_ERR(trans)) {
6229                         ret = PTR_ERR(trans);
6230                         goto out;
6231                 }
6232
6233                 btrfs_release_path(&path);
6234                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6235                 if (ret)
6236                         goto trans_fail;
6237
6238                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6239                                              BTRFS_FIRST_FREE_OBJECTID,
6240                                              BTRFS_FIRST_FREE_OBJECTID, 0);
6241                 if (ret)
6242                         goto trans_fail;
6243
6244                 printf("Add INODE_REF[%llu %llu] name %s\n",
6245                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6246                        "..");
6247                 err &= ~INODE_REF_MISSING;
6248 trans_fail:
6249                 if (ret)
6250                         error("fail to insert first inode's ref");
6251                 btrfs_commit_transaction(trans, root);
6252         }
6253
6254         if (err & INODE_ITEM_MISSING) {
6255                 ret = repair_inode_item_missing(root,
6256                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
6257                 if (ret)
6258                         goto out;
6259                 err &= ~INODE_ITEM_MISSING;
6260         }
6261 out:
6262         if (ret)
6263                 error("fail to repair first inode");
6264         btrfs_release_path(&path);
6265         return err;
6266 }
6267
6268 /*
6269  * check first root dir's inode_item and inode_ref
6270  *
6271  * returns 0 means no error
6272  * returns >0 means error
6273  * returns <0 means fatal error
6274  */
6275 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6276 {
6277         struct btrfs_path path;
6278         struct btrfs_key key;
6279         struct btrfs_inode_item *ii;
6280         u64 index;
6281         u32 mode;
6282         int err = 0;
6283         int ret;
6284
6285         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6286         key.type = BTRFS_INODE_ITEM_KEY;
6287         key.offset = 0;
6288
6289         /* For root being dropped, we don't need to check first inode */
6290         if (btrfs_root_refs(&root->root_item) == 0 &&
6291             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6292             BTRFS_FIRST_FREE_OBJECTID)
6293                 return 0;
6294
6295         btrfs_init_path(&path);
6296         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6297         if (ret < 0)
6298                 goto out;
6299         if (ret > 0) {
6300                 ret = 0;
6301                 err |= INODE_ITEM_MISSING;
6302         } else {
6303                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6304                                     struct btrfs_inode_item);
6305                 mode = btrfs_inode_mode(path.nodes[0], ii);
6306                 if (imode_to_type(mode) != BTRFS_FT_DIR)
6307                         err |= INODE_ITEM_MISMATCH;
6308         }
6309
6310         /* lookup first inode ref */
6311         key.offset = BTRFS_FIRST_FREE_OBJECTID;
6312         key.type = BTRFS_INODE_REF_KEY;
6313         /* special index value */
6314         index = 0;
6315
6316         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6317         if (ret < 0)
6318                 goto out;
6319         err |= ret;
6320
6321 out:
6322         btrfs_release_path(&path);
6323
6324         if (err && repair)
6325                 err = repair_fs_first_inode(root, err);
6326
6327         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6328                 error("root dir INODE_ITEM is %s",
6329                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6330         if (err & INODE_REF_MISSING)
6331                 error("root dir INODE_REF is missing");
6332
6333         return ret < 0 ? ret : err;
6334 }
6335
6336 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6337                                                 u64 parent, u64 root)
6338 {
6339         struct rb_node *node;
6340         struct tree_backref *back = NULL;
6341         struct tree_backref match = {
6342                 .node = {
6343                         .is_data = 0,
6344                 },
6345         };
6346
6347         if (parent) {
6348                 match.parent = parent;
6349                 match.node.full_backref = 1;
6350         } else {
6351                 match.root = root;
6352         }
6353
6354         node = rb_search(&rec->backref_tree, &match.node.node,
6355                          (rb_compare_keys)compare_extent_backref, NULL);
6356         if (node)
6357                 back = to_tree_backref(rb_node_to_extent_backref(node));
6358
6359         return back;
6360 }
6361
6362 static struct data_backref *find_data_backref(struct extent_record *rec,
6363                                                 u64 parent, u64 root,
6364                                                 u64 owner, u64 offset,
6365                                                 int found_ref,
6366                                                 u64 disk_bytenr, u64 bytes)
6367 {
6368         struct rb_node *node;
6369         struct data_backref *back = NULL;
6370         struct data_backref match = {
6371                 .node = {
6372                         .is_data = 1,
6373                 },
6374                 .owner = owner,
6375                 .offset = offset,
6376                 .bytes = bytes,
6377                 .found_ref = found_ref,
6378                 .disk_bytenr = disk_bytenr,
6379         };
6380
6381         if (parent) {
6382                 match.parent = parent;
6383                 match.node.full_backref = 1;
6384         } else {
6385                 match.root = root;
6386         }
6387
6388         node = rb_search(&rec->backref_tree, &match.node.node,
6389                          (rb_compare_keys)compare_extent_backref, NULL);
6390         if (node)
6391                 back = to_data_backref(rb_node_to_extent_backref(node));
6392
6393         return back;
6394 }
6395 /*
6396  * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6397  * blocks and integrity of fs tree items.
6398  *
6399  * @root:         the root of the tree to be checked.
6400  * @ext_ref       feature EXTENDED_IREF is enable or not.
6401  * @account       if NOT 0 means check the tree (including tree)'s treeblocks.
6402  *                otherwise means check fs tree(s) items relationship and
6403  *                @root MUST be a fs tree root.
6404  * Returns 0      represents OK.
6405  * Returns not 0  represents error.
6406  */
6407 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6408                             struct btrfs_root *root, unsigned int ext_ref,
6409                             int check_all)
6410
6411 {
6412         struct btrfs_path path;
6413         struct node_refs nrefs;
6414         struct btrfs_root_item *root_item = &root->root_item;
6415         int ret;
6416         int level;
6417         int err = 0;
6418
6419         memset(&nrefs, 0, sizeof(nrefs));
6420         if (!check_all) {
6421                 /*
6422                  * We need to manually check the first inode item (256)
6423                  * As the following traversal function will only start from
6424                  * the first inode item in the leaf, if inode item (256) is
6425                  * missing we will skip it forever.
6426                  */
6427                 ret = check_fs_first_inode(root, ext_ref);
6428                 if (ret < 0)
6429                         return ret;
6430         }
6431
6432
6433         level = btrfs_header_level(root->node);
6434         btrfs_init_path(&path);
6435
6436         if (btrfs_root_refs(root_item) > 0 ||
6437             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6438                 path.nodes[level] = root->node;
6439                 path.slots[level] = 0;
6440                 extent_buffer_get(root->node);
6441         } else {
6442                 struct btrfs_key key;
6443
6444                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6445                 level = root_item->drop_level;
6446                 path.lowest_level = level;
6447                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6448                 if (ret < 0)
6449                         goto out;
6450                 ret = 0;
6451         }
6452
6453         while (1) {
6454                 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6455                                         ext_ref, check_all);
6456
6457                 err |= !!ret;
6458
6459                 /* if ret is negative, walk shall stop */
6460                 if (ret < 0) {
6461                         ret = err;
6462                         break;
6463                 }
6464
6465                 ret = walk_up_tree_v2(root, &path, &level);
6466                 if (ret != 0) {
6467                         /* Normal exit, reset ret to err */
6468                         ret = err;
6469                         break;
6470                 }
6471         }
6472
6473 out:
6474         btrfs_release_path(&path);
6475         return ret;
6476 }
6477
6478 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info);
6479
6480 /*
6481  * Iterate all items in the tree and call check_inode_item() to check.
6482  *
6483  * @root:       the root of the tree to be checked.
6484  * @ext_ref:    the EXTENDED_IREF feature
6485  *
6486  * Return 0 if no error found.
6487  * Return <0 for error.
6488  */
6489 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6490 {
6491         reset_cached_block_groups(root->fs_info);
6492         return check_btrfs_root(NULL, root, ext_ref, 0);
6493 }
6494
6495 /*
6496  * Find the relative ref for root_ref and root_backref.
6497  *
6498  * @root:       the root of the root tree.
6499  * @ref_key:    the key of the root ref.
6500  *
6501  * Return 0 if no error occurred.
6502  */
6503 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6504                           struct extent_buffer *node, int slot)
6505 {
6506         struct btrfs_path path;
6507         struct btrfs_key key;
6508         struct btrfs_root_ref *ref;
6509         struct btrfs_root_ref *backref;
6510         char ref_name[BTRFS_NAME_LEN] = {0};
6511         char backref_name[BTRFS_NAME_LEN] = {0};
6512         u64 ref_dirid;
6513         u64 ref_seq;
6514         u32 ref_namelen;
6515         u64 backref_dirid;
6516         u64 backref_seq;
6517         u32 backref_namelen;
6518         u32 len;
6519         int ret;
6520         int err = 0;
6521
6522         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6523         ref_dirid = btrfs_root_ref_dirid(node, ref);
6524         ref_seq = btrfs_root_ref_sequence(node, ref);
6525         ref_namelen = btrfs_root_ref_name_len(node, ref);
6526
6527         if (ref_namelen <= BTRFS_NAME_LEN) {
6528                 len = ref_namelen;
6529         } else {
6530                 len = BTRFS_NAME_LEN;
6531                 warning("%s[%llu %llu] ref_name too long",
6532                         ref_key->type == BTRFS_ROOT_REF_KEY ?
6533                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6534                         ref_key->offset);
6535         }
6536         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6537
6538         /* Find relative root_ref */
6539         key.objectid = ref_key->offset;
6540         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6541         key.offset = ref_key->objectid;
6542
6543         btrfs_init_path(&path);
6544         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6545         if (ret) {
6546                 err |= ROOT_REF_MISSING;
6547                 error("%s[%llu %llu] couldn't find relative ref",
6548                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6549                       "ROOT_REF" : "ROOT_BACKREF",
6550                       ref_key->objectid, ref_key->offset);
6551                 goto out;
6552         }
6553
6554         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6555                                  struct btrfs_root_ref);
6556         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6557         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6558         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6559
6560         if (backref_namelen <= BTRFS_NAME_LEN) {
6561                 len = backref_namelen;
6562         } else {
6563                 len = BTRFS_NAME_LEN;
6564                 warning("%s[%llu %llu] ref_name too long",
6565                         key.type == BTRFS_ROOT_REF_KEY ?
6566                         "ROOT_REF" : "ROOT_BACKREF",
6567                         key.objectid, key.offset);
6568         }
6569         read_extent_buffer(path.nodes[0], backref_name,
6570                            (unsigned long)(backref + 1), len);
6571
6572         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6573             ref_namelen != backref_namelen ||
6574             strncmp(ref_name, backref_name, len)) {
6575                 err |= ROOT_REF_MISMATCH;
6576                 error("%s[%llu %llu] mismatch relative ref",
6577                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6578                       "ROOT_REF" : "ROOT_BACKREF",
6579                       ref_key->objectid, ref_key->offset);
6580         }
6581 out:
6582         btrfs_release_path(&path);
6583         return err;
6584 }
6585
6586 /*
6587  * Check all fs/file tree in low_memory mode.
6588  *
6589  * 1. for fs tree root item, call check_fs_root_v2()
6590  * 2. for fs tree root ref/backref, call check_root_ref()
6591  *
6592  * Return 0 if no error occurred.
6593  */
6594 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6595 {
6596         struct btrfs_root *tree_root = fs_info->tree_root;
6597         struct btrfs_root *cur_root = NULL;
6598         struct btrfs_path path;
6599         struct btrfs_key key;
6600         struct extent_buffer *node;
6601         unsigned int ext_ref;
6602         int slot;
6603         int ret;
6604         int err = 0;
6605
6606         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6607
6608         btrfs_init_path(&path);
6609         key.objectid = BTRFS_FS_TREE_OBJECTID;
6610         key.offset = 0;
6611         key.type = BTRFS_ROOT_ITEM_KEY;
6612
6613         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6614         if (ret < 0) {
6615                 err = ret;
6616                 goto out;
6617         } else if (ret > 0) {
6618                 err = -ENOENT;
6619                 goto out;
6620         }
6621
6622         while (1) {
6623                 node = path.nodes[0];
6624                 slot = path.slots[0];
6625                 btrfs_item_key_to_cpu(node, &key, slot);
6626                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6627                         goto out;
6628                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6629                     fs_root_objectid(key.objectid)) {
6630                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6631                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6632                                                                        &key);
6633                         } else {
6634                                 key.offset = (u64)-1;
6635                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6636                         }
6637
6638                         if (IS_ERR(cur_root)) {
6639                                 error("Fail to read fs/subvol tree: %lld",
6640                                       key.objectid);
6641                                 err = -EIO;
6642                                 goto next;
6643                         }
6644
6645                         ret = check_fs_root_v2(cur_root, ext_ref);
6646                         err |= ret;
6647
6648                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6649                                 btrfs_free_fs_root(cur_root);
6650                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6651                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6652                         ret = check_root_ref(tree_root, &key, node, slot);
6653                         err |= ret;
6654                 }
6655 next:
6656                 ret = btrfs_next_item(tree_root, &path);
6657                 if (ret > 0)
6658                         goto out;
6659                 if (ret < 0) {
6660                         err = ret;
6661                         goto out;
6662                 }
6663         }
6664
6665 out:
6666         btrfs_release_path(&path);
6667         return err;
6668 }
6669
6670 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6671                           struct cache_tree *root_cache)
6672 {
6673         int ret;
6674
6675         if (!ctx.progress_enabled)
6676                 fprintf(stderr, "checking fs roots\n");
6677         if (check_mode == CHECK_MODE_LOWMEM)
6678                 ret = check_fs_roots_v2(fs_info);
6679         else
6680                 ret = check_fs_roots(fs_info, root_cache);
6681
6682         return ret;
6683 }
6684
6685 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6686 {
6687         struct extent_backref *back, *tmp;
6688         struct tree_backref *tback;
6689         struct data_backref *dback;
6690         u64 found = 0;
6691         int err = 0;
6692
6693         rbtree_postorder_for_each_entry_safe(back, tmp,
6694                                              &rec->backref_tree, node) {
6695                 if (!back->found_extent_tree) {
6696                         err = 1;
6697                         if (!print_errs)
6698                                 goto out;
6699                         if (back->is_data) {
6700                                 dback = to_data_backref(back);
6701                                 fprintf(stderr, "Data backref %llu %s %llu"
6702                                         " owner %llu offset %llu num_refs %lu"
6703                                         " not found in extent tree\n",
6704                                         (unsigned long long)rec->start,
6705                                         back->full_backref ?
6706                                         "parent" : "root",
6707                                         back->full_backref ?
6708                                         (unsigned long long)dback->parent:
6709                                         (unsigned long long)dback->root,
6710                                         (unsigned long long)dback->owner,
6711                                         (unsigned long long)dback->offset,
6712                                         (unsigned long)dback->num_refs);
6713                         } else {
6714                                 tback = to_tree_backref(back);
6715                                 fprintf(stderr, "Tree backref %llu parent %llu"
6716                                         " root %llu not found in extent tree\n",
6717                                         (unsigned long long)rec->start,
6718                                         (unsigned long long)tback->parent,
6719                                         (unsigned long long)tback->root);
6720                         }
6721                 }
6722                 if (!back->is_data && !back->found_ref) {
6723                         err = 1;
6724                         if (!print_errs)
6725                                 goto out;
6726                         tback = to_tree_backref(back);
6727                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6728                                 (unsigned long long)rec->start,
6729                                 back->full_backref ? "parent" : "root",
6730                                 back->full_backref ?
6731                                 (unsigned long long)tback->parent :
6732                                 (unsigned long long)tback->root, back);
6733                 }
6734                 if (back->is_data) {
6735                         dback = to_data_backref(back);
6736                         if (dback->found_ref != dback->num_refs) {
6737                                 err = 1;
6738                                 if (!print_errs)
6739                                         goto out;
6740                                 fprintf(stderr, "Incorrect local backref count"
6741                                         " on %llu %s %llu owner %llu"
6742                                         " offset %llu found %u wanted %u back %p\n",
6743                                         (unsigned long long)rec->start,
6744                                         back->full_backref ?
6745                                         "parent" : "root",
6746                                         back->full_backref ?
6747                                         (unsigned long long)dback->parent:
6748                                         (unsigned long long)dback->root,
6749                                         (unsigned long long)dback->owner,
6750                                         (unsigned long long)dback->offset,
6751                                         dback->found_ref, dback->num_refs, back);
6752                         }
6753                         if (dback->disk_bytenr != rec->start) {
6754                                 err = 1;
6755                                 if (!print_errs)
6756                                         goto out;
6757                                 fprintf(stderr, "Backref disk bytenr does not"
6758                                         " match extent record, bytenr=%llu, "
6759                                         "ref bytenr=%llu\n",
6760                                         (unsigned long long)rec->start,
6761                                         (unsigned long long)dback->disk_bytenr);
6762                         }
6763
6764                         if (dback->bytes != rec->nr) {
6765                                 err = 1;
6766                                 if (!print_errs)
6767                                         goto out;
6768                                 fprintf(stderr, "Backref bytes do not match "
6769                                         "extent backref, bytenr=%llu, ref "
6770                                         "bytes=%llu, backref bytes=%llu\n",
6771                                         (unsigned long long)rec->start,
6772                                         (unsigned long long)rec->nr,
6773                                         (unsigned long long)dback->bytes);
6774                         }
6775                 }
6776                 if (!back->is_data) {
6777                         found += 1;
6778                 } else {
6779                         dback = to_data_backref(back);
6780                         found += dback->found_ref;
6781                 }
6782         }
6783         if (found != rec->refs) {
6784                 err = 1;
6785                 if (!print_errs)
6786                         goto out;
6787                 fprintf(stderr, "Incorrect global backref count "
6788                         "on %llu found %llu wanted %llu\n",
6789                         (unsigned long long)rec->start,
6790                         (unsigned long long)found,
6791                         (unsigned long long)rec->refs);
6792         }
6793 out:
6794         return err;
6795 }
6796
6797 static void __free_one_backref(struct rb_node *node)
6798 {
6799         struct extent_backref *back = rb_node_to_extent_backref(node);
6800
6801         free(back);
6802 }
6803
6804 static void free_all_extent_backrefs(struct extent_record *rec)
6805 {
6806         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6807 }
6808
6809 static void free_extent_record_cache(struct cache_tree *extent_cache)
6810 {
6811         struct cache_extent *cache;
6812         struct extent_record *rec;
6813
6814         while (1) {
6815                 cache = first_cache_extent(extent_cache);
6816                 if (!cache)
6817                         break;
6818                 rec = container_of(cache, struct extent_record, cache);
6819                 remove_cache_extent(extent_cache, cache);
6820                 free_all_extent_backrefs(rec);
6821                 free(rec);
6822         }
6823 }
6824
6825 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6826                                  struct extent_record *rec)
6827 {
6828         if (rec->content_checked && rec->owner_ref_checked &&
6829             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6830             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6831             !rec->bad_full_backref && !rec->crossing_stripes &&
6832             !rec->wrong_chunk_type) {
6833                 remove_cache_extent(extent_cache, &rec->cache);
6834                 free_all_extent_backrefs(rec);
6835                 list_del_init(&rec->list);
6836                 free(rec);
6837         }
6838         return 0;
6839 }
6840
6841 static int check_owner_ref(struct btrfs_root *root,
6842                             struct extent_record *rec,
6843                             struct extent_buffer *buf)
6844 {
6845         struct extent_backref *node, *tmp;
6846         struct tree_backref *back;
6847         struct btrfs_root *ref_root;
6848         struct btrfs_key key;
6849         struct btrfs_path path;
6850         struct extent_buffer *parent;
6851         int level;
6852         int found = 0;
6853         int ret;
6854
6855         rbtree_postorder_for_each_entry_safe(node, tmp,
6856                                              &rec->backref_tree, node) {
6857                 if (node->is_data)
6858                         continue;
6859                 if (!node->found_ref)
6860                         continue;
6861                 if (node->full_backref)
6862                         continue;
6863                 back = to_tree_backref(node);
6864                 if (btrfs_header_owner(buf) == back->root)
6865                         return 0;
6866         }
6867         BUG_ON(rec->is_root);
6868
6869         /* try to find the block by search corresponding fs tree */
6870         key.objectid = btrfs_header_owner(buf);
6871         key.type = BTRFS_ROOT_ITEM_KEY;
6872         key.offset = (u64)-1;
6873
6874         ref_root = btrfs_read_fs_root(root->fs_info, &key);
6875         if (IS_ERR(ref_root))
6876                 return 1;
6877
6878         level = btrfs_header_level(buf);
6879         if (level == 0)
6880                 btrfs_item_key_to_cpu(buf, &key, 0);
6881         else
6882                 btrfs_node_key_to_cpu(buf, &key, 0);
6883
6884         btrfs_init_path(&path);
6885         path.lowest_level = level + 1;
6886         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6887         if (ret < 0)
6888                 return 0;
6889
6890         parent = path.nodes[level + 1];
6891         if (parent && buf->start == btrfs_node_blockptr(parent,
6892                                                         path.slots[level + 1]))
6893                 found = 1;
6894
6895         btrfs_release_path(&path);
6896         return found ? 0 : 1;
6897 }
6898
6899 static int is_extent_tree_record(struct extent_record *rec)
6900 {
6901         struct extent_backref *node, *tmp;
6902         struct tree_backref *back;
6903         int is_extent = 0;
6904
6905         rbtree_postorder_for_each_entry_safe(node, tmp,
6906                                              &rec->backref_tree, node) {
6907                 if (node->is_data)
6908                         return 0;
6909                 back = to_tree_backref(node);
6910                 if (node->full_backref)
6911                         return 0;
6912                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6913                         is_extent = 1;
6914         }
6915         return is_extent;
6916 }
6917
6918
6919 static int record_bad_block_io(struct btrfs_fs_info *info,
6920                                struct cache_tree *extent_cache,
6921                                u64 start, u64 len)
6922 {
6923         struct extent_record *rec;
6924         struct cache_extent *cache;
6925         struct btrfs_key key;
6926
6927         cache = lookup_cache_extent(extent_cache, start, len);
6928         if (!cache)
6929                 return 0;
6930
6931         rec = container_of(cache, struct extent_record, cache);
6932         if (!is_extent_tree_record(rec))
6933                 return 0;
6934
6935         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6936         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6937 }
6938
6939 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6940                        struct extent_buffer *buf, int slot)
6941 {
6942         if (btrfs_header_level(buf)) {
6943                 struct btrfs_key_ptr ptr1, ptr2;
6944
6945                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6946                                    sizeof(struct btrfs_key_ptr));
6947                 read_extent_buffer(buf, &ptr2,
6948                                    btrfs_node_key_ptr_offset(slot + 1),
6949                                    sizeof(struct btrfs_key_ptr));
6950                 write_extent_buffer(buf, &ptr1,
6951                                     btrfs_node_key_ptr_offset(slot + 1),
6952                                     sizeof(struct btrfs_key_ptr));
6953                 write_extent_buffer(buf, &ptr2,
6954                                     btrfs_node_key_ptr_offset(slot),
6955                                     sizeof(struct btrfs_key_ptr));
6956                 if (slot == 0) {
6957                         struct btrfs_disk_key key;
6958                         btrfs_node_key(buf, &key, 0);
6959                         btrfs_fixup_low_keys(root, path, &key,
6960                                              btrfs_header_level(buf) + 1);
6961                 }
6962         } else {
6963                 struct btrfs_item *item1, *item2;
6964                 struct btrfs_key k1, k2;
6965                 char *item1_data, *item2_data;
6966                 u32 item1_offset, item2_offset, item1_size, item2_size;
6967
6968                 item1 = btrfs_item_nr(slot);
6969                 item2 = btrfs_item_nr(slot + 1);
6970                 btrfs_item_key_to_cpu(buf, &k1, slot);
6971                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6972                 item1_offset = btrfs_item_offset(buf, item1);
6973                 item2_offset = btrfs_item_offset(buf, item2);
6974                 item1_size = btrfs_item_size(buf, item1);
6975                 item2_size = btrfs_item_size(buf, item2);
6976
6977                 item1_data = malloc(item1_size);
6978                 if (!item1_data)
6979                         return -ENOMEM;
6980                 item2_data = malloc(item2_size);
6981                 if (!item2_data) {
6982                         free(item1_data);
6983                         return -ENOMEM;
6984                 }
6985
6986                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6987                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6988
6989                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6990                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6991                 free(item1_data);
6992                 free(item2_data);
6993
6994                 btrfs_set_item_offset(buf, item1, item2_offset);
6995                 btrfs_set_item_offset(buf, item2, item1_offset);
6996                 btrfs_set_item_size(buf, item1, item2_size);
6997                 btrfs_set_item_size(buf, item2, item1_size);
6998
6999                 path->slots[0] = slot;
7000                 btrfs_set_item_key_unsafe(root, path, &k2);
7001                 path->slots[0] = slot + 1;
7002                 btrfs_set_item_key_unsafe(root, path, &k1);
7003         }
7004         return 0;
7005 }
7006
7007 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
7008 {
7009         struct extent_buffer *buf;
7010         struct btrfs_key k1, k2;
7011         int i;
7012         int level = path->lowest_level;
7013         int ret = -EIO;
7014
7015         buf = path->nodes[level];
7016         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
7017                 if (level) {
7018                         btrfs_node_key_to_cpu(buf, &k1, i);
7019                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
7020                 } else {
7021                         btrfs_item_key_to_cpu(buf, &k1, i);
7022                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
7023                 }
7024                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
7025                         continue;
7026                 ret = swap_values(root, path, buf, i);
7027                 if (ret)
7028                         break;
7029                 btrfs_mark_buffer_dirty(buf);
7030                 i = 0;
7031         }
7032         return ret;
7033 }
7034
7035 static int delete_bogus_item(struct btrfs_root *root,
7036                              struct btrfs_path *path,
7037                              struct extent_buffer *buf, int slot)
7038 {
7039         struct btrfs_key key;
7040         int nritems = btrfs_header_nritems(buf);
7041
7042         btrfs_item_key_to_cpu(buf, &key, slot);
7043
7044         /* These are all the keys we can deal with missing. */
7045         if (key.type != BTRFS_DIR_INDEX_KEY &&
7046             key.type != BTRFS_EXTENT_ITEM_KEY &&
7047             key.type != BTRFS_METADATA_ITEM_KEY &&
7048             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7049             key.type != BTRFS_EXTENT_DATA_REF_KEY)
7050                 return -1;
7051
7052         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
7053                (unsigned long long)key.objectid, key.type,
7054                (unsigned long long)key.offset, slot, buf->start);
7055         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
7056                               btrfs_item_nr_offset(slot + 1),
7057                               sizeof(struct btrfs_item) *
7058                               (nritems - slot - 1));
7059         btrfs_set_header_nritems(buf, nritems - 1);
7060         if (slot == 0) {
7061                 struct btrfs_disk_key disk_key;
7062
7063                 btrfs_item_key(buf, &disk_key, 0);
7064                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
7065         }
7066         btrfs_mark_buffer_dirty(buf);
7067         return 0;
7068 }
7069
7070 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
7071 {
7072         struct extent_buffer *buf;
7073         int i;
7074         int ret = 0;
7075
7076         /* We should only get this for leaves */
7077         BUG_ON(path->lowest_level);
7078         buf = path->nodes[0];
7079 again:
7080         for (i = 0; i < btrfs_header_nritems(buf); i++) {
7081                 unsigned int shift = 0, offset;
7082
7083                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
7084                     BTRFS_LEAF_DATA_SIZE(root)) {
7085                         if (btrfs_item_end_nr(buf, i) >
7086                             BTRFS_LEAF_DATA_SIZE(root)) {
7087                                 ret = delete_bogus_item(root, path, buf, i);
7088                                 if (!ret)
7089                                         goto again;
7090                                 fprintf(stderr, "item is off the end of the "
7091                                         "leaf, can't fix\n");
7092                                 ret = -EIO;
7093                                 break;
7094                         }
7095                         shift = BTRFS_LEAF_DATA_SIZE(root) -
7096                                 btrfs_item_end_nr(buf, i);
7097                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
7098                            btrfs_item_offset_nr(buf, i - 1)) {
7099                         if (btrfs_item_end_nr(buf, i) >
7100                             btrfs_item_offset_nr(buf, i - 1)) {
7101                                 ret = delete_bogus_item(root, path, buf, i);
7102                                 if (!ret)
7103                                         goto again;
7104                                 fprintf(stderr, "items overlap, can't fix\n");
7105                                 ret = -EIO;
7106                                 break;
7107                         }
7108                         shift = btrfs_item_offset_nr(buf, i - 1) -
7109                                 btrfs_item_end_nr(buf, i);
7110                 }
7111                 if (!shift)
7112                         continue;
7113
7114                 printf("Shifting item nr %d by %u bytes in block %llu\n",
7115                        i, shift, (unsigned long long)buf->start);
7116                 offset = btrfs_item_offset_nr(buf, i);
7117                 memmove_extent_buffer(buf,
7118                                       btrfs_leaf_data(buf) + offset + shift,
7119                                       btrfs_leaf_data(buf) + offset,
7120                                       btrfs_item_size_nr(buf, i));
7121                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
7122                                       offset + shift);
7123                 btrfs_mark_buffer_dirty(buf);
7124         }
7125
7126         /*
7127          * We may have moved things, in which case we want to exit so we don't
7128          * write those changes out.  Once we have proper abort functionality in
7129          * progs this can be changed to something nicer.
7130          */
7131         BUG_ON(ret);
7132         return ret;
7133 }
7134
7135 /*
7136  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
7137  * then just return -EIO.
7138  */
7139 static int try_to_fix_bad_block(struct btrfs_root *root,
7140                                 struct extent_buffer *buf,
7141                                 enum btrfs_tree_block_status status)
7142 {
7143         struct btrfs_trans_handle *trans;
7144         struct ulist *roots;
7145         struct ulist_node *node;
7146         struct btrfs_root *search_root;
7147         struct btrfs_path path;
7148         struct ulist_iterator iter;
7149         struct btrfs_key root_key, key;
7150         int ret;
7151
7152         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
7153             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7154                 return -EIO;
7155
7156         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
7157         if (ret)
7158                 return -EIO;
7159
7160         btrfs_init_path(&path);
7161         ULIST_ITER_INIT(&iter);
7162         while ((node = ulist_next(roots, &iter))) {
7163                 root_key.objectid = node->val;
7164                 root_key.type = BTRFS_ROOT_ITEM_KEY;
7165                 root_key.offset = (u64)-1;
7166
7167                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
7168                 if (IS_ERR(root)) {
7169                         ret = -EIO;
7170                         break;
7171                 }
7172
7173
7174                 trans = btrfs_start_transaction(search_root, 0);
7175                 if (IS_ERR(trans)) {
7176                         ret = PTR_ERR(trans);
7177                         break;
7178                 }
7179
7180                 path.lowest_level = btrfs_header_level(buf);
7181                 path.skip_check_block = 1;
7182                 if (path.lowest_level)
7183                         btrfs_node_key_to_cpu(buf, &key, 0);
7184                 else
7185                         btrfs_item_key_to_cpu(buf, &key, 0);
7186                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7187                 if (ret) {
7188                         ret = -EIO;
7189                         btrfs_commit_transaction(trans, search_root);
7190                         break;
7191                 }
7192                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7193                         ret = fix_key_order(search_root, &path);
7194                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7195                         ret = fix_item_offset(search_root, &path);
7196                 if (ret) {
7197                         btrfs_commit_transaction(trans, search_root);
7198                         break;
7199                 }
7200                 btrfs_release_path(&path);
7201                 btrfs_commit_transaction(trans, search_root);
7202         }
7203         ulist_free(roots);
7204         btrfs_release_path(&path);
7205         return ret;
7206 }
7207
7208 static int check_block(struct btrfs_root *root,
7209                        struct cache_tree *extent_cache,
7210                        struct extent_buffer *buf, u64 flags)
7211 {
7212         struct extent_record *rec;
7213         struct cache_extent *cache;
7214         struct btrfs_key key;
7215         enum btrfs_tree_block_status status;
7216         int ret = 0;
7217         int level;
7218
7219         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7220         if (!cache)
7221                 return 1;
7222         rec = container_of(cache, struct extent_record, cache);
7223         rec->generation = btrfs_header_generation(buf);
7224
7225         level = btrfs_header_level(buf);
7226         if (btrfs_header_nritems(buf) > 0) {
7227
7228                 if (level == 0)
7229                         btrfs_item_key_to_cpu(buf, &key, 0);
7230                 else
7231                         btrfs_node_key_to_cpu(buf, &key, 0);
7232
7233                 rec->info_objectid = key.objectid;
7234         }
7235         rec->info_level = level;
7236
7237         if (btrfs_is_leaf(buf))
7238                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7239         else
7240                 status = btrfs_check_node(root, &rec->parent_key, buf);
7241
7242         if (status != BTRFS_TREE_BLOCK_CLEAN) {
7243                 if (repair)
7244                         status = try_to_fix_bad_block(root, buf, status);
7245                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7246                         ret = -EIO;
7247                         fprintf(stderr, "bad block %llu\n",
7248                                 (unsigned long long)buf->start);
7249                 } else {
7250                         /*
7251                          * Signal to callers we need to start the scan over
7252                          * again since we'll have cowed blocks.
7253                          */
7254                         ret = -EAGAIN;
7255                 }
7256         } else {
7257                 rec->content_checked = 1;
7258                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7259                         rec->owner_ref_checked = 1;
7260                 else {
7261                         ret = check_owner_ref(root, rec, buf);
7262                         if (!ret)
7263                                 rec->owner_ref_checked = 1;
7264                 }
7265         }
7266         if (!ret)
7267                 maybe_free_extent_rec(extent_cache, rec);
7268         return ret;
7269 }
7270
7271 #if 0
7272 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7273                                                 u64 parent, u64 root)
7274 {
7275         struct list_head *cur = rec->backrefs.next;
7276         struct extent_backref *node;
7277         struct tree_backref *back;
7278
7279         while(cur != &rec->backrefs) {
7280                 node = to_extent_backref(cur);
7281                 cur = cur->next;
7282                 if (node->is_data)
7283                         continue;
7284                 back = to_tree_backref(node);
7285                 if (parent > 0) {
7286                         if (!node->full_backref)
7287                                 continue;
7288                         if (parent == back->parent)
7289                                 return back;
7290                 } else {
7291                         if (node->full_backref)
7292                                 continue;
7293                         if (back->root == root)
7294                                 return back;
7295                 }
7296         }
7297         return NULL;
7298 }
7299 #endif
7300
7301 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7302                                                 u64 parent, u64 root)
7303 {
7304         struct tree_backref *ref = malloc(sizeof(*ref));
7305
7306         if (!ref)
7307                 return NULL;
7308         memset(&ref->node, 0, sizeof(ref->node));
7309         if (parent > 0) {
7310                 ref->parent = parent;
7311                 ref->node.full_backref = 1;
7312         } else {
7313                 ref->root = root;
7314                 ref->node.full_backref = 0;
7315         }
7316
7317         return ref;
7318 }
7319
7320 #if 0
7321 static struct data_backref *find_data_backref(struct extent_record *rec,
7322                                                 u64 parent, u64 root,
7323                                                 u64 owner, u64 offset,
7324                                                 int found_ref,
7325                                                 u64 disk_bytenr, u64 bytes)
7326 {
7327         struct list_head *cur = rec->backrefs.next;
7328         struct extent_backref *node;
7329         struct data_backref *back;
7330
7331         while(cur != &rec->backrefs) {
7332                 node = to_extent_backref(cur);
7333                 cur = cur->next;
7334                 if (!node->is_data)
7335                         continue;
7336                 back = to_data_backref(node);
7337                 if (parent > 0) {
7338                         if (!node->full_backref)
7339                                 continue;
7340                         if (parent == back->parent)
7341                                 return back;
7342                 } else {
7343                         if (node->full_backref)
7344                                 continue;
7345                         if (back->root == root && back->owner == owner &&
7346                             back->offset == offset) {
7347                                 if (found_ref && node->found_ref &&
7348                                     (back->bytes != bytes ||
7349                                     back->disk_bytenr != disk_bytenr))
7350                                         continue;
7351                                 return back;
7352                         }
7353                 }
7354         }
7355         return NULL;
7356 }
7357 #endif
7358
7359 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7360                                                 u64 parent, u64 root,
7361                                                 u64 owner, u64 offset,
7362                                                 u64 max_size)
7363 {
7364         struct data_backref *ref = malloc(sizeof(*ref));
7365
7366         if (!ref)
7367                 return NULL;
7368         memset(&ref->node, 0, sizeof(ref->node));
7369         ref->node.is_data = 1;
7370
7371         if (parent > 0) {
7372                 ref->parent = parent;
7373                 ref->owner = 0;
7374                 ref->offset = 0;
7375                 ref->node.full_backref = 1;
7376         } else {
7377                 ref->root = root;
7378                 ref->owner = owner;
7379                 ref->offset = offset;
7380                 ref->node.full_backref = 0;
7381         }
7382         ref->bytes = max_size;
7383         ref->found_ref = 0;
7384         ref->num_refs = 0;
7385         if (max_size > rec->max_size)
7386                 rec->max_size = max_size;
7387         return ref;
7388 }
7389
7390 /* Check if the type of extent matches with its chunk */
7391 static void check_extent_type(struct extent_record *rec)
7392 {
7393         struct btrfs_block_group_cache *bg_cache;
7394
7395         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7396         if (!bg_cache)
7397                 return;
7398
7399         /* data extent, check chunk directly*/
7400         if (!rec->metadata) {
7401                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7402                         rec->wrong_chunk_type = 1;
7403                 return;
7404         }
7405
7406         /* metadata extent, check the obvious case first */
7407         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7408                                  BTRFS_BLOCK_GROUP_METADATA))) {
7409                 rec->wrong_chunk_type = 1;
7410                 return;
7411         }
7412
7413         /*
7414          * Check SYSTEM extent, as it's also marked as metadata, we can only
7415          * make sure it's a SYSTEM extent by its backref
7416          */
7417         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7418                 struct extent_backref *node;
7419                 struct tree_backref *tback;
7420                 u64 bg_type;
7421
7422                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7423                 if (node->is_data) {
7424                         /* tree block shouldn't have data backref */
7425                         rec->wrong_chunk_type = 1;
7426                         return;
7427                 }
7428                 tback = container_of(node, struct tree_backref, node);
7429
7430                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7431                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7432                 else
7433                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
7434                 if (!(bg_cache->flags & bg_type))
7435                         rec->wrong_chunk_type = 1;
7436         }
7437 }
7438
7439 /*
7440  * Allocate a new extent record, fill default values from @tmpl and insert int
7441  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7442  * the cache, otherwise it fails.
7443  */
7444 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7445                 struct extent_record *tmpl)
7446 {
7447         struct extent_record *rec;
7448         int ret = 0;
7449
7450         BUG_ON(tmpl->max_size == 0);
7451         rec = malloc(sizeof(*rec));
7452         if (!rec)
7453                 return -ENOMEM;
7454         rec->start = tmpl->start;
7455         rec->max_size = tmpl->max_size;
7456         rec->nr = max(tmpl->nr, tmpl->max_size);
7457         rec->found_rec = tmpl->found_rec;
7458         rec->content_checked = tmpl->content_checked;
7459         rec->owner_ref_checked = tmpl->owner_ref_checked;
7460         rec->num_duplicates = 0;
7461         rec->metadata = tmpl->metadata;
7462         rec->flag_block_full_backref = FLAG_UNSET;
7463         rec->bad_full_backref = 0;
7464         rec->crossing_stripes = 0;
7465         rec->wrong_chunk_type = 0;
7466         rec->is_root = tmpl->is_root;
7467         rec->refs = tmpl->refs;
7468         rec->extent_item_refs = tmpl->extent_item_refs;
7469         rec->parent_generation = tmpl->parent_generation;
7470         INIT_LIST_HEAD(&rec->backrefs);
7471         INIT_LIST_HEAD(&rec->dups);
7472         INIT_LIST_HEAD(&rec->list);
7473         rec->backref_tree = RB_ROOT;
7474         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7475         rec->cache.start = tmpl->start;
7476         rec->cache.size = tmpl->nr;
7477         ret = insert_cache_extent(extent_cache, &rec->cache);
7478         if (ret) {
7479                 free(rec);
7480                 return ret;
7481         }
7482         bytes_used += rec->nr;
7483
7484         if (tmpl->metadata)
7485                 rec->crossing_stripes = check_crossing_stripes(global_info,
7486                                 rec->start, global_info->nodesize);
7487         check_extent_type(rec);
7488         return ret;
7489 }
7490
7491 /*
7492  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7493  * some are hints:
7494  * - refs              - if found, increase refs
7495  * - is_root           - if found, set
7496  * - content_checked   - if found, set
7497  * - owner_ref_checked - if found, set
7498  *
7499  * If not found, create a new one, initialize and insert.
7500  */
7501 static int add_extent_rec(struct cache_tree *extent_cache,
7502                 struct extent_record *tmpl)
7503 {
7504         struct extent_record *rec;
7505         struct cache_extent *cache;
7506         int ret = 0;
7507         int dup = 0;
7508
7509         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7510         if (cache) {
7511                 rec = container_of(cache, struct extent_record, cache);
7512                 if (tmpl->refs)
7513                         rec->refs++;
7514                 if (rec->nr == 1)
7515                         rec->nr = max(tmpl->nr, tmpl->max_size);
7516
7517                 /*
7518                  * We need to make sure to reset nr to whatever the extent
7519                  * record says was the real size, this way we can compare it to
7520                  * the backrefs.
7521                  */
7522                 if (tmpl->found_rec) {
7523                         if (tmpl->start != rec->start || rec->found_rec) {
7524                                 struct extent_record *tmp;
7525
7526                                 dup = 1;
7527                                 if (list_empty(&rec->list))
7528                                         list_add_tail(&rec->list,
7529                                                       &duplicate_extents);
7530
7531                                 /*
7532                                  * We have to do this song and dance in case we
7533                                  * find an extent record that falls inside of
7534                                  * our current extent record but does not have
7535                                  * the same objectid.
7536                                  */
7537                                 tmp = malloc(sizeof(*tmp));
7538                                 if (!tmp)
7539                                         return -ENOMEM;
7540                                 tmp->start = tmpl->start;
7541                                 tmp->max_size = tmpl->max_size;
7542                                 tmp->nr = tmpl->nr;
7543                                 tmp->found_rec = 1;
7544                                 tmp->metadata = tmpl->metadata;
7545                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7546                                 INIT_LIST_HEAD(&tmp->list);
7547                                 list_add_tail(&tmp->list, &rec->dups);
7548                                 rec->num_duplicates++;
7549                         } else {
7550                                 rec->nr = tmpl->nr;
7551                                 rec->found_rec = 1;
7552                         }
7553                 }
7554
7555                 if (tmpl->extent_item_refs && !dup) {
7556                         if (rec->extent_item_refs) {
7557                                 fprintf(stderr, "block %llu rec "
7558                                         "extent_item_refs %llu, passed %llu\n",
7559                                         (unsigned long long)tmpl->start,
7560                                         (unsigned long long)
7561                                                         rec->extent_item_refs,
7562                                         (unsigned long long)tmpl->extent_item_refs);
7563                         }
7564                         rec->extent_item_refs = tmpl->extent_item_refs;
7565                 }
7566                 if (tmpl->is_root)
7567                         rec->is_root = 1;
7568                 if (tmpl->content_checked)
7569                         rec->content_checked = 1;
7570                 if (tmpl->owner_ref_checked)
7571                         rec->owner_ref_checked = 1;
7572                 memcpy(&rec->parent_key, &tmpl->parent_key,
7573                                 sizeof(tmpl->parent_key));
7574                 if (tmpl->parent_generation)
7575                         rec->parent_generation = tmpl->parent_generation;
7576                 if (rec->max_size < tmpl->max_size)
7577                         rec->max_size = tmpl->max_size;
7578
7579                 /*
7580                  * A metadata extent can't cross stripe_len boundary, otherwise
7581                  * kernel scrub won't be able to handle it.
7582                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7583                  * it.
7584                  */
7585                 if (tmpl->metadata)
7586                         rec->crossing_stripes = check_crossing_stripes(
7587                                         global_info, rec->start,
7588                                         global_info->nodesize);
7589                 check_extent_type(rec);
7590                 maybe_free_extent_rec(extent_cache, rec);
7591                 return ret;
7592         }
7593
7594         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7595
7596         return ret;
7597 }
7598
7599 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7600                             u64 parent, u64 root, int found_ref)
7601 {
7602         struct extent_record *rec;
7603         struct tree_backref *back;
7604         struct cache_extent *cache;
7605         int ret;
7606         bool insert = false;
7607
7608         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7609         if (!cache) {
7610                 struct extent_record tmpl;
7611
7612                 memset(&tmpl, 0, sizeof(tmpl));
7613                 tmpl.start = bytenr;
7614                 tmpl.nr = 1;
7615                 tmpl.metadata = 1;
7616                 tmpl.max_size = 1;
7617
7618                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7619                 if (ret)
7620                         return ret;
7621
7622                 /* really a bug in cache_extent implement now */
7623                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7624                 if (!cache)
7625                         return -ENOENT;
7626         }
7627
7628         rec = container_of(cache, struct extent_record, cache);
7629         if (rec->start != bytenr) {
7630                 /*
7631                  * Several cause, from unaligned bytenr to over lapping extents
7632                  */
7633                 return -EEXIST;
7634         }
7635
7636         back = find_tree_backref(rec, parent, root);
7637         if (!back) {
7638                 back = alloc_tree_backref(rec, parent, root);
7639                 if (!back)
7640                         return -ENOMEM;
7641                 insert = true;
7642         }
7643
7644         if (found_ref) {
7645                 if (back->node.found_ref) {
7646                         fprintf(stderr, "Extent back ref already exists "
7647                                 "for %llu parent %llu root %llu \n",
7648                                 (unsigned long long)bytenr,
7649                                 (unsigned long long)parent,
7650                                 (unsigned long long)root);
7651                 }
7652                 back->node.found_ref = 1;
7653         } else {
7654                 if (back->node.found_extent_tree) {
7655                         fprintf(stderr, "Extent back ref already exists "
7656                                 "for %llu parent %llu root %llu \n",
7657                                 (unsigned long long)bytenr,
7658                                 (unsigned long long)parent,
7659                                 (unsigned long long)root);
7660                 }
7661                 back->node.found_extent_tree = 1;
7662         }
7663         if (insert)
7664                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7665                         compare_extent_backref));
7666         check_extent_type(rec);
7667         maybe_free_extent_rec(extent_cache, rec);
7668         return 0;
7669 }
7670
7671 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7672                             u64 parent, u64 root, u64 owner, u64 offset,
7673                             u32 num_refs, int found_ref, u64 max_size)
7674 {
7675         struct extent_record *rec;
7676         struct data_backref *back;
7677         struct cache_extent *cache;
7678         int ret;
7679         bool insert = false;
7680
7681         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7682         if (!cache) {
7683                 struct extent_record tmpl;
7684
7685                 memset(&tmpl, 0, sizeof(tmpl));
7686                 tmpl.start = bytenr;
7687                 tmpl.nr = 1;
7688                 tmpl.max_size = max_size;
7689
7690                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7691                 if (ret)
7692                         return ret;
7693
7694                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7695                 if (!cache)
7696                         abort();
7697         }
7698
7699         rec = container_of(cache, struct extent_record, cache);
7700         if (rec->max_size < max_size)
7701                 rec->max_size = max_size;
7702
7703         /*
7704          * If found_ref is set then max_size is the real size and must match the
7705          * existing refs.  So if we have already found a ref then we need to
7706          * make sure that this ref matches the existing one, otherwise we need
7707          * to add a new backref so we can notice that the backrefs don't match
7708          * and we need to figure out who is telling the truth.  This is to
7709          * account for that awful fsync bug I introduced where we'd end up with
7710          * a btrfs_file_extent_item that would have its length include multiple
7711          * prealloc extents or point inside of a prealloc extent.
7712          */
7713         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7714                                  bytenr, max_size);
7715         if (!back) {
7716                 back = alloc_data_backref(rec, parent, root, owner, offset,
7717                                           max_size);
7718                 BUG_ON(!back);
7719                 insert = true;
7720         }
7721
7722         if (found_ref) {
7723                 BUG_ON(num_refs != 1);
7724                 if (back->node.found_ref)
7725                         BUG_ON(back->bytes != max_size);
7726                 back->node.found_ref = 1;
7727                 back->found_ref += 1;
7728                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7729                         back->bytes = max_size;
7730                         back->disk_bytenr = bytenr;
7731
7732                         /* Need to reinsert if not already in the tree */
7733                         if (!insert) {
7734                                 rb_erase(&back->node.node, &rec->backref_tree);
7735                                 insert = true;
7736                         }
7737                 }
7738                 rec->refs += 1;
7739                 rec->content_checked = 1;
7740                 rec->owner_ref_checked = 1;
7741         } else {
7742                 if (back->node.found_extent_tree) {
7743                         fprintf(stderr, "Extent back ref already exists "
7744                                 "for %llu parent %llu root %llu "
7745                                 "owner %llu offset %llu num_refs %lu\n",
7746                                 (unsigned long long)bytenr,
7747                                 (unsigned long long)parent,
7748                                 (unsigned long long)root,
7749                                 (unsigned long long)owner,
7750                                 (unsigned long long)offset,
7751                                 (unsigned long)num_refs);
7752                 }
7753                 back->num_refs = num_refs;
7754                 back->node.found_extent_tree = 1;
7755         }
7756         if (insert)
7757                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7758                         compare_extent_backref));
7759
7760         maybe_free_extent_rec(extent_cache, rec);
7761         return 0;
7762 }
7763
7764 static int add_pending(struct cache_tree *pending,
7765                        struct cache_tree *seen, u64 bytenr, u32 size)
7766 {
7767         int ret;
7768         ret = add_cache_extent(seen, bytenr, size);
7769         if (ret)
7770                 return ret;
7771         add_cache_extent(pending, bytenr, size);
7772         return 0;
7773 }
7774
7775 static int pick_next_pending(struct cache_tree *pending,
7776                         struct cache_tree *reada,
7777                         struct cache_tree *nodes,
7778                         u64 last, struct block_info *bits, int bits_nr,
7779                         int *reada_bits)
7780 {
7781         unsigned long node_start = last;
7782         struct cache_extent *cache;
7783         int ret;
7784
7785         cache = search_cache_extent(reada, 0);
7786         if (cache) {
7787                 bits[0].start = cache->start;
7788                 bits[0].size = cache->size;
7789                 *reada_bits = 1;
7790                 return 1;
7791         }
7792         *reada_bits = 0;
7793         if (node_start > 32768)
7794                 node_start -= 32768;
7795
7796         cache = search_cache_extent(nodes, node_start);
7797         if (!cache)
7798                 cache = search_cache_extent(nodes, 0);
7799
7800         if (!cache) {
7801                  cache = search_cache_extent(pending, 0);
7802                  if (!cache)
7803                          return 0;
7804                  ret = 0;
7805                  do {
7806                          bits[ret].start = cache->start;
7807                          bits[ret].size = cache->size;
7808                          cache = next_cache_extent(cache);
7809                          ret++;
7810                  } while (cache && ret < bits_nr);
7811                  return ret;
7812         }
7813
7814         ret = 0;
7815         do {
7816                 bits[ret].start = cache->start;
7817                 bits[ret].size = cache->size;
7818                 cache = next_cache_extent(cache);
7819                 ret++;
7820         } while (cache && ret < bits_nr);
7821
7822         if (bits_nr - ret > 8) {
7823                 u64 lookup = bits[0].start + bits[0].size;
7824                 struct cache_extent *next;
7825                 next = search_cache_extent(pending, lookup);
7826                 while(next) {
7827                         if (next->start - lookup > 32768)
7828                                 break;
7829                         bits[ret].start = next->start;
7830                         bits[ret].size = next->size;
7831                         lookup = next->start + next->size;
7832                         ret++;
7833                         if (ret == bits_nr)
7834                                 break;
7835                         next = next_cache_extent(next);
7836                         if (!next)
7837                                 break;
7838                 }
7839         }
7840         return ret;
7841 }
7842
7843 static void free_chunk_record(struct cache_extent *cache)
7844 {
7845         struct chunk_record *rec;
7846
7847         rec = container_of(cache, struct chunk_record, cache);
7848         list_del_init(&rec->list);
7849         list_del_init(&rec->dextents);
7850         free(rec);
7851 }
7852
7853 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7854 {
7855         cache_tree_free_extents(chunk_cache, free_chunk_record);
7856 }
7857
7858 static void free_device_record(struct rb_node *node)
7859 {
7860         struct device_record *rec;
7861
7862         rec = container_of(node, struct device_record, node);
7863         free(rec);
7864 }
7865
7866 FREE_RB_BASED_TREE(device_cache, free_device_record);
7867
7868 int insert_block_group_record(struct block_group_tree *tree,
7869                               struct block_group_record *bg_rec)
7870 {
7871         int ret;
7872
7873         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7874         if (ret)
7875                 return ret;
7876
7877         list_add_tail(&bg_rec->list, &tree->block_groups);
7878         return 0;
7879 }
7880
7881 static void free_block_group_record(struct cache_extent *cache)
7882 {
7883         struct block_group_record *rec;
7884
7885         rec = container_of(cache, struct block_group_record, cache);
7886         list_del_init(&rec->list);
7887         free(rec);
7888 }
7889
7890 void free_block_group_tree(struct block_group_tree *tree)
7891 {
7892         cache_tree_free_extents(&tree->tree, free_block_group_record);
7893 }
7894
7895 int insert_device_extent_record(struct device_extent_tree *tree,
7896                                 struct device_extent_record *de_rec)
7897 {
7898         int ret;
7899
7900         /*
7901          * Device extent is a bit different from the other extents, because
7902          * the extents which belong to the different devices may have the
7903          * same start and size, so we need use the special extent cache
7904          * search/insert functions.
7905          */
7906         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7907         if (ret)
7908                 return ret;
7909
7910         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7911         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7912         return 0;
7913 }
7914
7915 static void free_device_extent_record(struct cache_extent *cache)
7916 {
7917         struct device_extent_record *rec;
7918
7919         rec = container_of(cache, struct device_extent_record, cache);
7920         if (!list_empty(&rec->chunk_list))
7921                 list_del_init(&rec->chunk_list);
7922         if (!list_empty(&rec->device_list))
7923                 list_del_init(&rec->device_list);
7924         free(rec);
7925 }
7926
7927 void free_device_extent_tree(struct device_extent_tree *tree)
7928 {
7929         cache_tree_free_extents(&tree->tree, free_device_extent_record);
7930 }
7931
7932 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7933 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7934                                  struct extent_buffer *leaf, int slot)
7935 {
7936         struct btrfs_extent_ref_v0 *ref0;
7937         struct btrfs_key key;
7938         int ret;
7939
7940         btrfs_item_key_to_cpu(leaf, &key, slot);
7941         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7942         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7943                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7944                                 0, 0);
7945         } else {
7946                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7947                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7948         }
7949         return ret;
7950 }
7951 #endif
7952
7953 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7954                                             struct btrfs_key *key,
7955                                             int slot)
7956 {
7957         struct btrfs_chunk *ptr;
7958         struct chunk_record *rec;
7959         int num_stripes, i;
7960
7961         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7962         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7963
7964         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7965         if (!rec) {
7966                 fprintf(stderr, "memory allocation failed\n");
7967                 exit(-1);
7968         }
7969
7970         INIT_LIST_HEAD(&rec->list);
7971         INIT_LIST_HEAD(&rec->dextents);
7972         rec->bg_rec = NULL;
7973
7974         rec->cache.start = key->offset;
7975         rec->cache.size = btrfs_chunk_length(leaf, ptr);
7976
7977         rec->generation = btrfs_header_generation(leaf);
7978
7979         rec->objectid = key->objectid;
7980         rec->type = key->type;
7981         rec->offset = key->offset;
7982
7983         rec->length = rec->cache.size;
7984         rec->owner = btrfs_chunk_owner(leaf, ptr);
7985         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7986         rec->type_flags = btrfs_chunk_type(leaf, ptr);
7987         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7988         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7989         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7990         rec->num_stripes = num_stripes;
7991         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7992
7993         for (i = 0; i < rec->num_stripes; ++i) {
7994                 rec->stripes[i].devid =
7995                         btrfs_stripe_devid_nr(leaf, ptr, i);
7996                 rec->stripes[i].offset =
7997                         btrfs_stripe_offset_nr(leaf, ptr, i);
7998                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7999                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
8000                                 BTRFS_UUID_SIZE);
8001         }
8002
8003         return rec;
8004 }
8005
8006 static int process_chunk_item(struct cache_tree *chunk_cache,
8007                               struct btrfs_key *key, struct extent_buffer *eb,
8008                               int slot)
8009 {
8010         struct chunk_record *rec;
8011         struct btrfs_chunk *chunk;
8012         int ret = 0;
8013
8014         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
8015         /*
8016          * Do extra check for this chunk item,
8017          *
8018          * It's still possible one can craft a leaf with CHUNK_ITEM, with
8019          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
8020          * and owner<->key_type check.
8021          */
8022         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
8023                                       key->offset);
8024         if (ret < 0) {
8025                 error("chunk(%llu, %llu) is not valid, ignore it",
8026                       key->offset, btrfs_chunk_length(eb, chunk));
8027                 return 0;
8028         }
8029         rec = btrfs_new_chunk_record(eb, key, slot);
8030         ret = insert_cache_extent(chunk_cache, &rec->cache);
8031         if (ret) {
8032                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
8033                         rec->offset, rec->length);
8034                 free(rec);
8035         }
8036
8037         return ret;
8038 }
8039
8040 static int process_device_item(struct rb_root *dev_cache,
8041                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
8042 {
8043         struct btrfs_dev_item *ptr;
8044         struct device_record *rec;
8045         int ret = 0;
8046
8047         ptr = btrfs_item_ptr(eb,
8048                 slot, struct btrfs_dev_item);
8049
8050         rec = malloc(sizeof(*rec));
8051         if (!rec) {
8052                 fprintf(stderr, "memory allocation failed\n");
8053                 return -ENOMEM;
8054         }
8055
8056         rec->devid = key->offset;
8057         rec->generation = btrfs_header_generation(eb);
8058
8059         rec->objectid = key->objectid;
8060         rec->type = key->type;
8061         rec->offset = key->offset;
8062
8063         rec->devid = btrfs_device_id(eb, ptr);
8064         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
8065         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
8066
8067         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
8068         if (ret) {
8069                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
8070                 free(rec);
8071         }
8072
8073         return ret;
8074 }
8075
8076 struct block_group_record *
8077 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
8078                              int slot)
8079 {
8080         struct btrfs_block_group_item *ptr;
8081         struct block_group_record *rec;
8082
8083         rec = calloc(1, sizeof(*rec));
8084         if (!rec) {
8085                 fprintf(stderr, "memory allocation failed\n");
8086                 exit(-1);
8087         }
8088
8089         rec->cache.start = key->objectid;
8090         rec->cache.size = key->offset;
8091
8092         rec->generation = btrfs_header_generation(leaf);
8093
8094         rec->objectid = key->objectid;
8095         rec->type = key->type;
8096         rec->offset = key->offset;
8097
8098         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
8099         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
8100
8101         INIT_LIST_HEAD(&rec->list);
8102
8103         return rec;
8104 }
8105
8106 static int process_block_group_item(struct block_group_tree *block_group_cache,
8107                                     struct btrfs_key *key,
8108                                     struct extent_buffer *eb, int slot)
8109 {
8110         struct block_group_record *rec;
8111         int ret = 0;
8112
8113         rec = btrfs_new_block_group_record(eb, key, slot);
8114         ret = insert_block_group_record(block_group_cache, rec);
8115         if (ret) {
8116                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
8117                         rec->objectid, rec->offset);
8118                 free(rec);
8119         }
8120
8121         return ret;
8122 }
8123
8124 struct device_extent_record *
8125 btrfs_new_device_extent_record(struct extent_buffer *leaf,
8126                                struct btrfs_key *key, int slot)
8127 {
8128         struct device_extent_record *rec;
8129         struct btrfs_dev_extent *ptr;
8130
8131         rec = calloc(1, sizeof(*rec));
8132         if (!rec) {
8133                 fprintf(stderr, "memory allocation failed\n");
8134                 exit(-1);
8135         }
8136
8137         rec->cache.objectid = key->objectid;
8138         rec->cache.start = key->offset;
8139
8140         rec->generation = btrfs_header_generation(leaf);
8141
8142         rec->objectid = key->objectid;
8143         rec->type = key->type;
8144         rec->offset = key->offset;
8145
8146         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
8147         rec->chunk_objecteid =
8148                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
8149         rec->chunk_offset =
8150                 btrfs_dev_extent_chunk_offset(leaf, ptr);
8151         rec->length = btrfs_dev_extent_length(leaf, ptr);
8152         rec->cache.size = rec->length;
8153
8154         INIT_LIST_HEAD(&rec->chunk_list);
8155         INIT_LIST_HEAD(&rec->device_list);
8156
8157         return rec;
8158 }
8159
8160 static int
8161 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
8162                            struct btrfs_key *key, struct extent_buffer *eb,
8163                            int slot)
8164 {
8165         struct device_extent_record *rec;
8166         int ret;
8167
8168         rec = btrfs_new_device_extent_record(eb, key, slot);
8169         ret = insert_device_extent_record(dev_extent_cache, rec);
8170         if (ret) {
8171                 fprintf(stderr,
8172                         "Device extent[%llu, %llu, %llu] existed.\n",
8173                         rec->objectid, rec->offset, rec->length);
8174                 free(rec);
8175         }
8176
8177         return ret;
8178 }
8179
8180 static int process_extent_item(struct btrfs_root *root,
8181                                struct cache_tree *extent_cache,
8182                                struct extent_buffer *eb, int slot)
8183 {
8184         struct btrfs_extent_item *ei;
8185         struct btrfs_extent_inline_ref *iref;
8186         struct btrfs_extent_data_ref *dref;
8187         struct btrfs_shared_data_ref *sref;
8188         struct btrfs_key key;
8189         struct extent_record tmpl;
8190         unsigned long end;
8191         unsigned long ptr;
8192         int ret;
8193         int type;
8194         u32 item_size = btrfs_item_size_nr(eb, slot);
8195         u64 refs = 0;
8196         u64 offset;
8197         u64 num_bytes;
8198         int metadata = 0;
8199
8200         btrfs_item_key_to_cpu(eb, &key, slot);
8201
8202         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8203                 metadata = 1;
8204                 num_bytes = root->fs_info->nodesize;
8205         } else {
8206                 num_bytes = key.offset;
8207         }
8208
8209         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8210                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8211                       key.objectid, root->fs_info->sectorsize);
8212                 return -EIO;
8213         }
8214         if (item_size < sizeof(*ei)) {
8215 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8216                 struct btrfs_extent_item_v0 *ei0;
8217                 BUG_ON(item_size != sizeof(*ei0));
8218                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8219                 refs = btrfs_extent_refs_v0(eb, ei0);
8220 #else
8221                 BUG();
8222 #endif
8223                 memset(&tmpl, 0, sizeof(tmpl));
8224                 tmpl.start = key.objectid;
8225                 tmpl.nr = num_bytes;
8226                 tmpl.extent_item_refs = refs;
8227                 tmpl.metadata = metadata;
8228                 tmpl.found_rec = 1;
8229                 tmpl.max_size = num_bytes;
8230
8231                 return add_extent_rec(extent_cache, &tmpl);
8232         }
8233
8234         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8235         refs = btrfs_extent_refs(eb, ei);
8236         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8237                 metadata = 1;
8238         else
8239                 metadata = 0;
8240         if (metadata && num_bytes != root->fs_info->nodesize) {
8241                 error("ignore invalid metadata extent, length %llu does not equal to %u",
8242                       num_bytes, root->fs_info->nodesize);
8243                 return -EIO;
8244         }
8245         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8246                 error("ignore invalid data extent, length %llu is not aligned to %u",
8247                       num_bytes, root->fs_info->sectorsize);
8248                 return -EIO;
8249         }
8250
8251         memset(&tmpl, 0, sizeof(tmpl));
8252         tmpl.start = key.objectid;
8253         tmpl.nr = num_bytes;
8254         tmpl.extent_item_refs = refs;
8255         tmpl.metadata = metadata;
8256         tmpl.found_rec = 1;
8257         tmpl.max_size = num_bytes;
8258         add_extent_rec(extent_cache, &tmpl);
8259
8260         ptr = (unsigned long)(ei + 1);
8261         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8262             key.type == BTRFS_EXTENT_ITEM_KEY)
8263                 ptr += sizeof(struct btrfs_tree_block_info);
8264
8265         end = (unsigned long)ei + item_size;
8266         while (ptr < end) {
8267                 iref = (struct btrfs_extent_inline_ref *)ptr;
8268                 type = btrfs_extent_inline_ref_type(eb, iref);
8269                 offset = btrfs_extent_inline_ref_offset(eb, iref);
8270                 switch (type) {
8271                 case BTRFS_TREE_BLOCK_REF_KEY:
8272                         ret = add_tree_backref(extent_cache, key.objectid,
8273                                         0, offset, 0);
8274                         if (ret < 0)
8275                                 error(
8276                         "add_tree_backref failed (extent items tree block): %s",
8277                                       strerror(-ret));
8278                         break;
8279                 case BTRFS_SHARED_BLOCK_REF_KEY:
8280                         ret = add_tree_backref(extent_cache, key.objectid,
8281                                         offset, 0, 0);
8282                         if (ret < 0)
8283                                 error(
8284                         "add_tree_backref failed (extent items shared block): %s",
8285                                       strerror(-ret));
8286                         break;
8287                 case BTRFS_EXTENT_DATA_REF_KEY:
8288                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8289                         add_data_backref(extent_cache, key.objectid, 0,
8290                                         btrfs_extent_data_ref_root(eb, dref),
8291                                         btrfs_extent_data_ref_objectid(eb,
8292                                                                        dref),
8293                                         btrfs_extent_data_ref_offset(eb, dref),
8294                                         btrfs_extent_data_ref_count(eb, dref),
8295                                         0, num_bytes);
8296                         break;
8297                 case BTRFS_SHARED_DATA_REF_KEY:
8298                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
8299                         add_data_backref(extent_cache, key.objectid, offset,
8300                                         0, 0, 0,
8301                                         btrfs_shared_data_ref_count(eb, sref),
8302                                         0, num_bytes);
8303                         break;
8304                 default:
8305                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8306                                 key.objectid, key.type, num_bytes);
8307                         goto out;
8308                 }
8309                 ptr += btrfs_extent_inline_ref_size(type);
8310         }
8311         WARN_ON(ptr > end);
8312 out:
8313         return 0;
8314 }
8315
8316 static int check_cache_range(struct btrfs_root *root,
8317                              struct btrfs_block_group_cache *cache,
8318                              u64 offset, u64 bytes)
8319 {
8320         struct btrfs_free_space *entry;
8321         u64 *logical;
8322         u64 bytenr;
8323         int stripe_len;
8324         int i, nr, ret;
8325
8326         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8327                 bytenr = btrfs_sb_offset(i);
8328                 ret = btrfs_rmap_block(root->fs_info,
8329                                        cache->key.objectid, bytenr, 0,
8330                                        &logical, &nr, &stripe_len);
8331                 if (ret)
8332                         return ret;
8333
8334                 while (nr--) {
8335                         if (logical[nr] + stripe_len <= offset)
8336                                 continue;
8337                         if (offset + bytes <= logical[nr])
8338                                 continue;
8339                         if (logical[nr] == offset) {
8340                                 if (stripe_len >= bytes) {
8341                                         free(logical);
8342                                         return 0;
8343                                 }
8344                                 bytes -= stripe_len;
8345                                 offset += stripe_len;
8346                         } else if (logical[nr] < offset) {
8347                                 if (logical[nr] + stripe_len >=
8348                                     offset + bytes) {
8349                                         free(logical);
8350                                         return 0;
8351                                 }
8352                                 bytes = (offset + bytes) -
8353                                         (logical[nr] + stripe_len);
8354                                 offset = logical[nr] + stripe_len;
8355                         } else {
8356                                 /*
8357                                  * Could be tricky, the super may land in the
8358                                  * middle of the area we're checking.  First
8359                                  * check the easiest case, it's at the end.
8360                                  */
8361                                 if (logical[nr] + stripe_len >=
8362                                     bytes + offset) {
8363                                         bytes = logical[nr] - offset;
8364                                         continue;
8365                                 }
8366
8367                                 /* Check the left side */
8368                                 ret = check_cache_range(root, cache,
8369                                                         offset,
8370                                                         logical[nr] - offset);
8371                                 if (ret) {
8372                                         free(logical);
8373                                         return ret;
8374                                 }
8375
8376                                 /* Now we continue with the right side */
8377                                 bytes = (offset + bytes) -
8378                                         (logical[nr] + stripe_len);
8379                                 offset = logical[nr] + stripe_len;
8380                         }
8381                 }
8382
8383                 free(logical);
8384         }
8385
8386         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8387         if (!entry) {
8388                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8389                         offset, offset+bytes);
8390                 return -EINVAL;
8391         }
8392
8393         if (entry->offset != offset) {
8394                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8395                         entry->offset);
8396                 return -EINVAL;
8397         }
8398
8399         if (entry->bytes != bytes) {
8400                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8401                         bytes, entry->bytes, offset);
8402                 return -EINVAL;
8403         }
8404
8405         unlink_free_space(cache->free_space_ctl, entry);
8406         free(entry);
8407         return 0;
8408 }
8409
8410 static int verify_space_cache(struct btrfs_root *root,
8411                               struct btrfs_block_group_cache *cache)
8412 {
8413         struct btrfs_path path;
8414         struct extent_buffer *leaf;
8415         struct btrfs_key key;
8416         u64 last;
8417         int ret = 0;
8418
8419         root = root->fs_info->extent_root;
8420
8421         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8422
8423         btrfs_init_path(&path);
8424         key.objectid = last;
8425         key.offset = 0;
8426         key.type = BTRFS_EXTENT_ITEM_KEY;
8427         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8428         if (ret < 0)
8429                 goto out;
8430         ret = 0;
8431         while (1) {
8432                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8433                         ret = btrfs_next_leaf(root, &path);
8434                         if (ret < 0)
8435                                 goto out;
8436                         if (ret > 0) {
8437                                 ret = 0;
8438                                 break;
8439                         }
8440                 }
8441                 leaf = path.nodes[0];
8442                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8443                 if (key.objectid >= cache->key.offset + cache->key.objectid)
8444                         break;
8445                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8446                     key.type != BTRFS_METADATA_ITEM_KEY) {
8447                         path.slots[0]++;
8448                         continue;
8449                 }
8450
8451                 if (last == key.objectid) {
8452                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
8453                                 last = key.objectid + key.offset;
8454                         else
8455                                 last = key.objectid + root->fs_info->nodesize;
8456                         path.slots[0]++;
8457                         continue;
8458                 }
8459
8460                 ret = check_cache_range(root, cache, last,
8461                                         key.objectid - last);
8462                 if (ret)
8463                         break;
8464                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8465                         last = key.objectid + key.offset;
8466                 else
8467                         last = key.objectid + root->fs_info->nodesize;
8468                 path.slots[0]++;
8469         }
8470
8471         if (last < cache->key.objectid + cache->key.offset)
8472                 ret = check_cache_range(root, cache, last,
8473                                         cache->key.objectid +
8474                                         cache->key.offset - last);
8475
8476 out:
8477         btrfs_release_path(&path);
8478
8479         if (!ret &&
8480             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8481                 fprintf(stderr, "There are still entries left in the space "
8482                         "cache\n");
8483                 ret = -EINVAL;
8484         }
8485
8486         return ret;
8487 }
8488
8489 static int check_space_cache(struct btrfs_root *root)
8490 {
8491         struct btrfs_block_group_cache *cache;
8492         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8493         int ret;
8494         int error = 0;
8495
8496         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8497             btrfs_super_generation(root->fs_info->super_copy) !=
8498             btrfs_super_cache_generation(root->fs_info->super_copy)) {
8499                 printf("cache and super generation don't match, space cache "
8500                        "will be invalidated\n");
8501                 return 0;
8502         }
8503
8504         if (ctx.progress_enabled) {
8505                 ctx.tp = TASK_FREE_SPACE;
8506                 task_start(ctx.info);
8507         }
8508
8509         while (1) {
8510                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8511                 if (!cache)
8512                         break;
8513
8514                 start = cache->key.objectid + cache->key.offset;
8515                 if (!cache->free_space_ctl) {
8516                         if (btrfs_init_free_space_ctl(cache,
8517                                                 root->fs_info->sectorsize)) {
8518                                 ret = -ENOMEM;
8519                                 break;
8520                         }
8521                 } else {
8522                         btrfs_remove_free_space_cache(cache);
8523                 }
8524
8525                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8526                         ret = exclude_super_stripes(root, cache);
8527                         if (ret) {
8528                                 fprintf(stderr, "could not exclude super stripes: %s\n",
8529                                         strerror(-ret));
8530                                 error++;
8531                                 continue;
8532                         }
8533                         ret = load_free_space_tree(root->fs_info, cache);
8534                         free_excluded_extents(root, cache);
8535                         if (ret < 0) {
8536                                 fprintf(stderr, "could not load free space tree: %s\n",
8537                                         strerror(-ret));
8538                                 error++;
8539                                 continue;
8540                         }
8541                         error += ret;
8542                 } else {
8543                         ret = load_free_space_cache(root->fs_info, cache);
8544                         if (!ret)
8545                                 continue;
8546                 }
8547
8548                 ret = verify_space_cache(root, cache);
8549                 if (ret) {
8550                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8551                                 cache->key.objectid);
8552                         error++;
8553                 }
8554         }
8555
8556         task_stop(ctx.info);
8557
8558         return error ? -EINVAL : 0;
8559 }
8560
8561 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8562                         u64 num_bytes, unsigned long leaf_offset,
8563                         struct extent_buffer *eb) {
8564
8565         struct btrfs_fs_info *fs_info = root->fs_info;
8566         u64 offset = 0;
8567         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8568         char *data;
8569         unsigned long csum_offset;
8570         u32 csum;
8571         u32 csum_expected;
8572         u64 read_len;
8573         u64 data_checked = 0;
8574         u64 tmp;
8575         int ret = 0;
8576         int mirror;
8577         int num_copies;
8578
8579         if (num_bytes % fs_info->sectorsize)
8580                 return -EINVAL;
8581
8582         data = malloc(num_bytes);
8583         if (!data)
8584                 return -ENOMEM;
8585
8586         while (offset < num_bytes) {
8587                 mirror = 0;
8588 again:
8589                 read_len = num_bytes - offset;
8590                 /* read as much space once a time */
8591                 ret = read_extent_data(fs_info, data + offset,
8592                                 bytenr + offset, &read_len, mirror);
8593                 if (ret)
8594                         goto out;
8595                 data_checked = 0;
8596                 /* verify every 4k data's checksum */
8597                 while (data_checked < read_len) {
8598                         csum = ~(u32)0;
8599                         tmp = offset + data_checked;
8600
8601                         csum = btrfs_csum_data((char *)data + tmp,
8602                                                csum, fs_info->sectorsize);
8603                         btrfs_csum_final(csum, (u8 *)&csum);
8604
8605                         csum_offset = leaf_offset +
8606                                  tmp / fs_info->sectorsize * csum_size;
8607                         read_extent_buffer(eb, (char *)&csum_expected,
8608                                            csum_offset, csum_size);
8609                         /* try another mirror */
8610                         if (csum != csum_expected) {
8611                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8612                                                 mirror, bytenr + tmp,
8613                                                 csum, csum_expected);
8614                                 num_copies = btrfs_num_copies(root->fs_info,
8615                                                 bytenr, num_bytes);
8616                                 if (mirror < num_copies - 1) {
8617                                         mirror += 1;
8618                                         goto again;
8619                                 }
8620                         }
8621                         data_checked += fs_info->sectorsize;
8622                 }
8623                 offset += read_len;
8624         }
8625 out:
8626         free(data);
8627         return ret;
8628 }
8629
8630 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8631                                u64 num_bytes)
8632 {
8633         struct btrfs_path path;
8634         struct extent_buffer *leaf;
8635         struct btrfs_key key;
8636         int ret;
8637
8638         btrfs_init_path(&path);
8639         key.objectid = bytenr;
8640         key.type = BTRFS_EXTENT_ITEM_KEY;
8641         key.offset = (u64)-1;
8642
8643 again:
8644         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8645                                 0, 0);
8646         if (ret < 0) {
8647                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8648                 btrfs_release_path(&path);
8649                 return ret;
8650         } else if (ret) {
8651                 if (path.slots[0] > 0) {
8652                         path.slots[0]--;
8653                 } else {
8654                         ret = btrfs_prev_leaf(root, &path);
8655                         if (ret < 0) {
8656                                 goto out;
8657                         } else if (ret > 0) {
8658                                 ret = 0;
8659                                 goto out;
8660                         }
8661                 }
8662         }
8663
8664         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8665
8666         /*
8667          * Block group items come before extent items if they have the same
8668          * bytenr, so walk back one more just in case.  Dear future traveller,
8669          * first congrats on mastering time travel.  Now if it's not too much
8670          * trouble could you go back to 2006 and tell Chris to make the
8671          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8672          * EXTENT_ITEM_KEY please?
8673          */
8674         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8675                 if (path.slots[0] > 0) {
8676                         path.slots[0]--;
8677                 } else {
8678                         ret = btrfs_prev_leaf(root, &path);
8679                         if (ret < 0) {
8680                                 goto out;
8681                         } else if (ret > 0) {
8682                                 ret = 0;
8683                                 goto out;
8684                         }
8685                 }
8686                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8687         }
8688
8689         while (num_bytes) {
8690                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8691                         ret = btrfs_next_leaf(root, &path);
8692                         if (ret < 0) {
8693                                 fprintf(stderr, "Error going to next leaf "
8694                                         "%d\n", ret);
8695                                 btrfs_release_path(&path);
8696                                 return ret;
8697                         } else if (ret) {
8698                                 break;
8699                         }
8700                 }
8701                 leaf = path.nodes[0];
8702                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8703                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8704                         path.slots[0]++;
8705                         continue;
8706                 }
8707                 if (key.objectid + key.offset < bytenr) {
8708                         path.slots[0]++;
8709                         continue;
8710                 }
8711                 if (key.objectid > bytenr + num_bytes)
8712                         break;
8713
8714                 if (key.objectid == bytenr) {
8715                         if (key.offset >= num_bytes) {
8716                                 num_bytes = 0;
8717                                 break;
8718                         }
8719                         num_bytes -= key.offset;
8720                         bytenr += key.offset;
8721                 } else if (key.objectid < bytenr) {
8722                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8723                                 num_bytes = 0;
8724                                 break;
8725                         }
8726                         num_bytes = (bytenr + num_bytes) -
8727                                 (key.objectid + key.offset);
8728                         bytenr = key.objectid + key.offset;
8729                 } else {
8730                         if (key.objectid + key.offset < bytenr + num_bytes) {
8731                                 u64 new_start = key.objectid + key.offset;
8732                                 u64 new_bytes = bytenr + num_bytes - new_start;
8733
8734                                 /*
8735                                  * Weird case, the extent is in the middle of
8736                                  * our range, we'll have to search one side
8737                                  * and then the other.  Not sure if this happens
8738                                  * in real life, but no harm in coding it up
8739                                  * anyway just in case.
8740                                  */
8741                                 btrfs_release_path(&path);
8742                                 ret = check_extent_exists(root, new_start,
8743                                                           new_bytes);
8744                                 if (ret) {
8745                                         fprintf(stderr, "Right section didn't "
8746                                                 "have a record\n");
8747                                         break;
8748                                 }
8749                                 num_bytes = key.objectid - bytenr;
8750                                 goto again;
8751                         }
8752                         num_bytes = key.objectid - bytenr;
8753                 }
8754                 path.slots[0]++;
8755         }
8756         ret = 0;
8757
8758 out:
8759         if (num_bytes && !ret) {
8760                 fprintf(stderr, "There are no extents for csum range "
8761                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8762                 ret = 1;
8763         }
8764
8765         btrfs_release_path(&path);
8766         return ret;
8767 }
8768
8769 static int check_csums(struct btrfs_root *root)
8770 {
8771         struct btrfs_path path;
8772         struct extent_buffer *leaf;
8773         struct btrfs_key key;
8774         u64 offset = 0, num_bytes = 0;
8775         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8776         int errors = 0;
8777         int ret;
8778         u64 data_len;
8779         unsigned long leaf_offset;
8780
8781         root = root->fs_info->csum_root;
8782         if (!extent_buffer_uptodate(root->node)) {
8783                 fprintf(stderr, "No valid csum tree found\n");
8784                 return -ENOENT;
8785         }
8786
8787         btrfs_init_path(&path);
8788         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8789         key.type = BTRFS_EXTENT_CSUM_KEY;
8790         key.offset = 0;
8791         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8792         if (ret < 0) {
8793                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8794                 btrfs_release_path(&path);
8795                 return ret;
8796         }
8797
8798         if (ret > 0 && path.slots[0])
8799                 path.slots[0]--;
8800         ret = 0;
8801
8802         while (1) {
8803                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8804                         ret = btrfs_next_leaf(root, &path);
8805                         if (ret < 0) {
8806                                 fprintf(stderr, "Error going to next leaf "
8807                                         "%d\n", ret);
8808                                 break;
8809                         }
8810                         if (ret)
8811                                 break;
8812                 }
8813                 leaf = path.nodes[0];
8814
8815                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8816                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8817                         path.slots[0]++;
8818                         continue;
8819                 }
8820
8821                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8822                               csum_size) * root->fs_info->sectorsize;
8823                 if (!check_data_csum)
8824                         goto skip_csum_check;
8825                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8826                 ret = check_extent_csums(root, key.offset, data_len,
8827                                          leaf_offset, leaf);
8828                 if (ret)
8829                         break;
8830 skip_csum_check:
8831                 if (!num_bytes) {
8832                         offset = key.offset;
8833                 } else if (key.offset != offset + num_bytes) {
8834                         ret = check_extent_exists(root, offset, num_bytes);
8835                         if (ret) {
8836                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8837                                         "there is no extent record\n",
8838                                         offset, offset+num_bytes);
8839                                 errors++;
8840                         }
8841                         offset = key.offset;
8842                         num_bytes = 0;
8843                 }
8844                 num_bytes += data_len;
8845                 path.slots[0]++;
8846         }
8847
8848         btrfs_release_path(&path);
8849         return errors;
8850 }
8851
8852 static int is_dropped_key(struct btrfs_key *key,
8853                           struct btrfs_key *drop_key) {
8854         if (key->objectid < drop_key->objectid)
8855                 return 1;
8856         else if (key->objectid == drop_key->objectid) {
8857                 if (key->type < drop_key->type)
8858                         return 1;
8859                 else if (key->type == drop_key->type) {
8860                         if (key->offset < drop_key->offset)
8861                                 return 1;
8862                 }
8863         }
8864         return 0;
8865 }
8866
8867 /*
8868  * Here are the rules for FULL_BACKREF.
8869  *
8870  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8871  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8872  *      FULL_BACKREF set.
8873  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
8874  *    if it happened after the relocation occurred since we'll have dropped the
8875  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8876  *    have no real way to know for sure.
8877  *
8878  * We process the blocks one root at a time, and we start from the lowest root
8879  * objectid and go to the highest.  So we can just lookup the owner backref for
8880  * the record and if we don't find it then we know it doesn't exist and we have
8881  * a FULL BACKREF.
8882  *
8883  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8884  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8885  * be set or not and then we can check later once we've gathered all the refs.
8886  */
8887 static int calc_extent_flag(struct cache_tree *extent_cache,
8888                            struct extent_buffer *buf,
8889                            struct root_item_record *ri,
8890                            u64 *flags)
8891 {
8892         struct extent_record *rec;
8893         struct cache_extent *cache;
8894         struct tree_backref *tback;
8895         u64 owner = 0;
8896
8897         cache = lookup_cache_extent(extent_cache, buf->start, 1);
8898         /* we have added this extent before */
8899         if (!cache)
8900                 return -ENOENT;
8901
8902         rec = container_of(cache, struct extent_record, cache);
8903
8904         /*
8905          * Except file/reloc tree, we can not have
8906          * FULL BACKREF MODE
8907          */
8908         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8909                 goto normal;
8910         /*
8911          * root node
8912          */
8913         if (buf->start == ri->bytenr)
8914                 goto normal;
8915
8916         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8917                 goto full_backref;
8918
8919         owner = btrfs_header_owner(buf);
8920         if (owner == ri->objectid)
8921                 goto normal;
8922
8923         tback = find_tree_backref(rec, 0, owner);
8924         if (!tback)
8925                 goto full_backref;
8926 normal:
8927         *flags = 0;
8928         if (rec->flag_block_full_backref != FLAG_UNSET &&
8929             rec->flag_block_full_backref != 0)
8930                 rec->bad_full_backref = 1;
8931         return 0;
8932 full_backref:
8933         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8934         if (rec->flag_block_full_backref != FLAG_UNSET &&
8935             rec->flag_block_full_backref != 1)
8936                 rec->bad_full_backref = 1;
8937         return 0;
8938 }
8939
8940 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8941 {
8942         fprintf(stderr, "Invalid key type(");
8943         print_key_type(stderr, 0, key_type);
8944         fprintf(stderr, ") found in root(");
8945         print_objectid(stderr, rootid, 0);
8946         fprintf(stderr, ")\n");
8947 }
8948
8949 /*
8950  * Check if the key is valid with its extent buffer.
8951  *
8952  * This is a early check in case invalid key exists in a extent buffer
8953  * This is not comprehensive yet, but should prevent wrong key/item passed
8954  * further
8955  */
8956 static int check_type_with_root(u64 rootid, u8 key_type)
8957 {
8958         switch (key_type) {
8959         /* Only valid in chunk tree */
8960         case BTRFS_DEV_ITEM_KEY:
8961         case BTRFS_CHUNK_ITEM_KEY:
8962                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8963                         goto err;
8964                 break;
8965         /* valid in csum and log tree */
8966         case BTRFS_CSUM_TREE_OBJECTID:
8967                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8968                       is_fstree(rootid)))
8969                         goto err;
8970                 break;
8971         case BTRFS_EXTENT_ITEM_KEY:
8972         case BTRFS_METADATA_ITEM_KEY:
8973         case BTRFS_BLOCK_GROUP_ITEM_KEY:
8974                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8975                         goto err;
8976                 break;
8977         case BTRFS_ROOT_ITEM_KEY:
8978                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8979                         goto err;
8980                 break;
8981         case BTRFS_DEV_EXTENT_KEY:
8982                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8983                         goto err;
8984                 break;
8985         }
8986         return 0;
8987 err:
8988         report_mismatch_key_root(key_type, rootid);
8989         return -EINVAL;
8990 }
8991
8992 static int run_next_block(struct btrfs_root *root,
8993                           struct block_info *bits,
8994                           int bits_nr,
8995                           u64 *last,
8996                           struct cache_tree *pending,
8997                           struct cache_tree *seen,
8998                           struct cache_tree *reada,
8999                           struct cache_tree *nodes,
9000                           struct cache_tree *extent_cache,
9001                           struct cache_tree *chunk_cache,
9002                           struct rb_root *dev_cache,
9003                           struct block_group_tree *block_group_cache,
9004                           struct device_extent_tree *dev_extent_cache,
9005                           struct root_item_record *ri)
9006 {
9007         struct btrfs_fs_info *fs_info = root->fs_info;
9008         struct extent_buffer *buf;
9009         struct extent_record *rec = NULL;
9010         u64 bytenr;
9011         u32 size;
9012         u64 parent;
9013         u64 owner;
9014         u64 flags;
9015         u64 ptr;
9016         u64 gen = 0;
9017         int ret = 0;
9018         int i;
9019         int nritems;
9020         struct btrfs_key key;
9021         struct cache_extent *cache;
9022         int reada_bits;
9023
9024         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
9025                                     bits_nr, &reada_bits);
9026         if (nritems == 0)
9027                 return 1;
9028
9029         if (!reada_bits) {
9030                 for(i = 0; i < nritems; i++) {
9031                         ret = add_cache_extent(reada, bits[i].start,
9032                                                bits[i].size);
9033                         if (ret == -EEXIST)
9034                                 continue;
9035
9036                         /* fixme, get the parent transid */
9037                         readahead_tree_block(fs_info, bits[i].start, 0);
9038                 }
9039         }
9040         *last = bits[0].start;
9041         bytenr = bits[0].start;
9042         size = bits[0].size;
9043
9044         cache = lookup_cache_extent(pending, bytenr, size);
9045         if (cache) {
9046                 remove_cache_extent(pending, cache);
9047                 free(cache);
9048         }
9049         cache = lookup_cache_extent(reada, bytenr, size);
9050         if (cache) {
9051                 remove_cache_extent(reada, cache);
9052                 free(cache);
9053         }
9054         cache = lookup_cache_extent(nodes, bytenr, size);
9055         if (cache) {
9056                 remove_cache_extent(nodes, cache);
9057                 free(cache);
9058         }
9059         cache = lookup_cache_extent(extent_cache, bytenr, size);
9060         if (cache) {
9061                 rec = container_of(cache, struct extent_record, cache);
9062                 gen = rec->parent_generation;
9063         }
9064
9065         /* fixme, get the real parent transid */
9066         buf = read_tree_block(root->fs_info, bytenr, gen);
9067         if (!extent_buffer_uptodate(buf)) {
9068                 record_bad_block_io(root->fs_info,
9069                                     extent_cache, bytenr, size);
9070                 goto out;
9071         }
9072
9073         nritems = btrfs_header_nritems(buf);
9074
9075         flags = 0;
9076         if (!init_extent_tree) {
9077                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
9078                                        btrfs_header_level(buf), 1, NULL,
9079                                        &flags);
9080                 if (ret < 0) {
9081                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9082                         if (ret < 0) {
9083                                 fprintf(stderr, "Couldn't calc extent flags\n");
9084                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9085                         }
9086                 }
9087         } else {
9088                 flags = 0;
9089                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9090                 if (ret < 0) {
9091                         fprintf(stderr, "Couldn't calc extent flags\n");
9092                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9093                 }
9094         }
9095
9096         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9097                 if (ri != NULL &&
9098                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
9099                     ri->objectid == btrfs_header_owner(buf)) {
9100                         /*
9101                          * Ok we got to this block from it's original owner and
9102                          * we have FULL_BACKREF set.  Relocation can leave
9103                          * converted blocks over so this is altogether possible,
9104                          * however it's not possible if the generation > the
9105                          * last snapshot, so check for this case.
9106                          */
9107                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
9108                             btrfs_header_generation(buf) > ri->last_snapshot) {
9109                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9110                                 rec->bad_full_backref = 1;
9111                         }
9112                 }
9113         } else {
9114                 if (ri != NULL &&
9115                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
9116                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
9117                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9118                         rec->bad_full_backref = 1;
9119                 }
9120         }
9121
9122         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9123                 rec->flag_block_full_backref = 1;
9124                 parent = bytenr;
9125                 owner = 0;
9126         } else {
9127                 rec->flag_block_full_backref = 0;
9128                 parent = 0;
9129                 owner = btrfs_header_owner(buf);
9130         }
9131
9132         ret = check_block(root, extent_cache, buf, flags);
9133         if (ret)
9134                 goto out;
9135
9136         if (btrfs_is_leaf(buf)) {
9137                 btree_space_waste += btrfs_leaf_free_space(root, buf);
9138                 for (i = 0; i < nritems; i++) {
9139                         struct btrfs_file_extent_item *fi;
9140                         btrfs_item_key_to_cpu(buf, &key, i);
9141                         /*
9142                          * Check key type against the leaf owner.
9143                          * Could filter quite a lot of early error if
9144                          * owner is correct
9145                          */
9146                         if (check_type_with_root(btrfs_header_owner(buf),
9147                                                  key.type)) {
9148                                 fprintf(stderr, "ignoring invalid key\n");
9149                                 continue;
9150                         }
9151                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
9152                                 process_extent_item(root, extent_cache, buf,
9153                                                     i);
9154                                 continue;
9155                         }
9156                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9157                                 process_extent_item(root, extent_cache, buf,
9158                                                     i);
9159                                 continue;
9160                         }
9161                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
9162                                 total_csum_bytes +=
9163                                         btrfs_item_size_nr(buf, i);
9164                                 continue;
9165                         }
9166                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
9167                                 process_chunk_item(chunk_cache, &key, buf, i);
9168                                 continue;
9169                         }
9170                         if (key.type == BTRFS_DEV_ITEM_KEY) {
9171                                 process_device_item(dev_cache, &key, buf, i);
9172                                 continue;
9173                         }
9174                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9175                                 process_block_group_item(block_group_cache,
9176                                         &key, buf, i);
9177                                 continue;
9178                         }
9179                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
9180                                 process_device_extent_item(dev_extent_cache,
9181                                         &key, buf, i);
9182                                 continue;
9183
9184                         }
9185                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9186 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9187                                 process_extent_ref_v0(extent_cache, buf, i);
9188 #else
9189                                 BUG();
9190 #endif
9191                                 continue;
9192                         }
9193
9194                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9195                                 ret = add_tree_backref(extent_cache,
9196                                                 key.objectid, 0, key.offset, 0);
9197                                 if (ret < 0)
9198                                         error(
9199                                 "add_tree_backref failed (leaf tree block): %s",
9200                                               strerror(-ret));
9201                                 continue;
9202                         }
9203                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9204                                 ret = add_tree_backref(extent_cache,
9205                                                 key.objectid, key.offset, 0, 0);
9206                                 if (ret < 0)
9207                                         error(
9208                                 "add_tree_backref failed (leaf shared block): %s",
9209                                               strerror(-ret));
9210                                 continue;
9211                         }
9212                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9213                                 struct btrfs_extent_data_ref *ref;
9214                                 ref = btrfs_item_ptr(buf, i,
9215                                                 struct btrfs_extent_data_ref);
9216                                 add_data_backref(extent_cache,
9217                                         key.objectid, 0,
9218                                         btrfs_extent_data_ref_root(buf, ref),
9219                                         btrfs_extent_data_ref_objectid(buf,
9220                                                                        ref),
9221                                         btrfs_extent_data_ref_offset(buf, ref),
9222                                         btrfs_extent_data_ref_count(buf, ref),
9223                                         0, root->fs_info->sectorsize);
9224                                 continue;
9225                         }
9226                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9227                                 struct btrfs_shared_data_ref *ref;
9228                                 ref = btrfs_item_ptr(buf, i,
9229                                                 struct btrfs_shared_data_ref);
9230                                 add_data_backref(extent_cache,
9231                                         key.objectid, key.offset, 0, 0, 0,
9232                                         btrfs_shared_data_ref_count(buf, ref),
9233                                         0, root->fs_info->sectorsize);
9234                                 continue;
9235                         }
9236                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9237                                 struct bad_item *bad;
9238
9239                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9240                                         continue;
9241                                 if (!owner)
9242                                         continue;
9243                                 bad = malloc(sizeof(struct bad_item));
9244                                 if (!bad)
9245                                         continue;
9246                                 INIT_LIST_HEAD(&bad->list);
9247                                 memcpy(&bad->key, &key,
9248                                        sizeof(struct btrfs_key));
9249                                 bad->root_id = owner;
9250                                 list_add_tail(&bad->list, &delete_items);
9251                                 continue;
9252                         }
9253                         if (key.type != BTRFS_EXTENT_DATA_KEY)
9254                                 continue;
9255                         fi = btrfs_item_ptr(buf, i,
9256                                             struct btrfs_file_extent_item);
9257                         if (btrfs_file_extent_type(buf, fi) ==
9258                             BTRFS_FILE_EXTENT_INLINE)
9259                                 continue;
9260                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9261                                 continue;
9262
9263                         data_bytes_allocated +=
9264                                 btrfs_file_extent_disk_num_bytes(buf, fi);
9265                         if (data_bytes_allocated < root->fs_info->sectorsize) {
9266                                 abort();
9267                         }
9268                         data_bytes_referenced +=
9269                                 btrfs_file_extent_num_bytes(buf, fi);
9270                         add_data_backref(extent_cache,
9271                                 btrfs_file_extent_disk_bytenr(buf, fi),
9272                                 parent, owner, key.objectid, key.offset -
9273                                 btrfs_file_extent_offset(buf, fi), 1, 1,
9274                                 btrfs_file_extent_disk_num_bytes(buf, fi));
9275                 }
9276         } else {
9277                 int level;
9278                 struct btrfs_key first_key;
9279
9280                 first_key.objectid = 0;
9281
9282                 if (nritems > 0)
9283                         btrfs_item_key_to_cpu(buf, &first_key, 0);
9284                 level = btrfs_header_level(buf);
9285                 for (i = 0; i < nritems; i++) {
9286                         struct extent_record tmpl;
9287
9288                         ptr = btrfs_node_blockptr(buf, i);
9289                         size = root->fs_info->nodesize;
9290                         btrfs_node_key_to_cpu(buf, &key, i);
9291                         if (ri != NULL) {
9292                                 if ((level == ri->drop_level)
9293                                     && is_dropped_key(&key, &ri->drop_key)) {
9294                                         continue;
9295                                 }
9296                         }
9297
9298                         memset(&tmpl, 0, sizeof(tmpl));
9299                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9300                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9301                         tmpl.start = ptr;
9302                         tmpl.nr = size;
9303                         tmpl.refs = 1;
9304                         tmpl.metadata = 1;
9305                         tmpl.max_size = size;
9306                         ret = add_extent_rec(extent_cache, &tmpl);
9307                         if (ret < 0)
9308                                 goto out;
9309
9310                         ret = add_tree_backref(extent_cache, ptr, parent,
9311                                         owner, 1);
9312                         if (ret < 0) {
9313                                 error(
9314                                 "add_tree_backref failed (non-leaf block): %s",
9315                                       strerror(-ret));
9316                                 continue;
9317                         }
9318
9319                         if (level > 1) {
9320                                 add_pending(nodes, seen, ptr, size);
9321                         } else {
9322                                 add_pending(pending, seen, ptr, size);
9323                         }
9324                 }
9325                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
9326                                       nritems) * sizeof(struct btrfs_key_ptr);
9327         }
9328         total_btree_bytes += buf->len;
9329         if (fs_root_objectid(btrfs_header_owner(buf)))
9330                 total_fs_tree_bytes += buf->len;
9331         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9332                 total_extent_tree_bytes += buf->len;
9333 out:
9334         free_extent_buffer(buf);
9335         return ret;
9336 }
9337
9338 static int add_root_to_pending(struct extent_buffer *buf,
9339                                struct cache_tree *extent_cache,
9340                                struct cache_tree *pending,
9341                                struct cache_tree *seen,
9342                                struct cache_tree *nodes,
9343                                u64 objectid)
9344 {
9345         struct extent_record tmpl;
9346         int ret;
9347
9348         if (btrfs_header_level(buf) > 0)
9349                 add_pending(nodes, seen, buf->start, buf->len);
9350         else
9351                 add_pending(pending, seen, buf->start, buf->len);
9352
9353         memset(&tmpl, 0, sizeof(tmpl));
9354         tmpl.start = buf->start;
9355         tmpl.nr = buf->len;
9356         tmpl.is_root = 1;
9357         tmpl.refs = 1;
9358         tmpl.metadata = 1;
9359         tmpl.max_size = buf->len;
9360         add_extent_rec(extent_cache, &tmpl);
9361
9362         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9363             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9364                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9365                                 0, 1);
9366         else
9367                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9368                                 1);
9369         return ret;
9370 }
9371
9372 /* as we fix the tree, we might be deleting blocks that
9373  * we're tracking for repair.  This hook makes sure we
9374  * remove any backrefs for blocks as we are fixing them.
9375  */
9376 static int free_extent_hook(struct btrfs_trans_handle *trans,
9377                             struct btrfs_root *root,
9378                             u64 bytenr, u64 num_bytes, u64 parent,
9379                             u64 root_objectid, u64 owner, u64 offset,
9380                             int refs_to_drop)
9381 {
9382         struct extent_record *rec;
9383         struct cache_extent *cache;
9384         int is_data;
9385         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9386
9387         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9388         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9389         if (!cache)
9390                 return 0;
9391
9392         rec = container_of(cache, struct extent_record, cache);
9393         if (is_data) {
9394                 struct data_backref *back;
9395                 back = find_data_backref(rec, parent, root_objectid, owner,
9396                                          offset, 1, bytenr, num_bytes);
9397                 if (!back)
9398                         goto out;
9399                 if (back->node.found_ref) {
9400                         back->found_ref -= refs_to_drop;
9401                         if (rec->refs)
9402                                 rec->refs -= refs_to_drop;
9403                 }
9404                 if (back->node.found_extent_tree) {
9405                         back->num_refs -= refs_to_drop;
9406                         if (rec->extent_item_refs)
9407                                 rec->extent_item_refs -= refs_to_drop;
9408                 }
9409                 if (back->found_ref == 0)
9410                         back->node.found_ref = 0;
9411                 if (back->num_refs == 0)
9412                         back->node.found_extent_tree = 0;
9413
9414                 if (!back->node.found_extent_tree && back->node.found_ref) {
9415                         rb_erase(&back->node.node, &rec->backref_tree);
9416                         free(back);
9417                 }
9418         } else {
9419                 struct tree_backref *back;
9420                 back = find_tree_backref(rec, parent, root_objectid);
9421                 if (!back)
9422                         goto out;
9423                 if (back->node.found_ref) {
9424                         if (rec->refs)
9425                                 rec->refs--;
9426                         back->node.found_ref = 0;
9427                 }
9428                 if (back->node.found_extent_tree) {
9429                         if (rec->extent_item_refs)
9430                                 rec->extent_item_refs--;
9431                         back->node.found_extent_tree = 0;
9432                 }
9433                 if (!back->node.found_extent_tree && back->node.found_ref) {
9434                         rb_erase(&back->node.node, &rec->backref_tree);
9435                         free(back);
9436                 }
9437         }
9438         maybe_free_extent_rec(extent_cache, rec);
9439 out:
9440         return 0;
9441 }
9442
9443 static int delete_extent_records(struct btrfs_trans_handle *trans,
9444                                  struct btrfs_root *root,
9445                                  struct btrfs_path *path,
9446                                  u64 bytenr)
9447 {
9448         struct btrfs_key key;
9449         struct btrfs_key found_key;
9450         struct extent_buffer *leaf;
9451         int ret;
9452         int slot;
9453
9454
9455         key.objectid = bytenr;
9456         key.type = (u8)-1;
9457         key.offset = (u64)-1;
9458
9459         while(1) {
9460                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9461                                         &key, path, 0, 1);
9462                 if (ret < 0)
9463                         break;
9464
9465                 if (ret > 0) {
9466                         ret = 0;
9467                         if (path->slots[0] == 0)
9468                                 break;
9469                         path->slots[0]--;
9470                 }
9471                 ret = 0;
9472
9473                 leaf = path->nodes[0];
9474                 slot = path->slots[0];
9475
9476                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9477                 if (found_key.objectid != bytenr)
9478                         break;
9479
9480                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9481                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
9482                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9483                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9484                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9485                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9486                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9487                         btrfs_release_path(path);
9488                         if (found_key.type == 0) {
9489                                 if (found_key.offset == 0)
9490                                         break;
9491                                 key.offset = found_key.offset - 1;
9492                                 key.type = found_key.type;
9493                         }
9494                         key.type = found_key.type - 1;
9495                         key.offset = (u64)-1;
9496                         continue;
9497                 }
9498
9499                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9500                         found_key.objectid, found_key.type, found_key.offset);
9501
9502                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9503                 if (ret)
9504                         break;
9505                 btrfs_release_path(path);
9506
9507                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9508                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
9509                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9510                                 found_key.offset : root->fs_info->nodesize;
9511
9512                         ret = btrfs_update_block_group(trans, root, bytenr,
9513                                                        bytes, 0, 0);
9514                         if (ret)
9515                                 break;
9516                 }
9517         }
9518
9519         btrfs_release_path(path);
9520         return ret;
9521 }
9522
9523 /*
9524  * for a single backref, this will allocate a new extent
9525  * and add the backref to it.
9526  */
9527 static int record_extent(struct btrfs_trans_handle *trans,
9528                          struct btrfs_fs_info *info,
9529                          struct btrfs_path *path,
9530                          struct extent_record *rec,
9531                          struct extent_backref *back,
9532                          int allocated, u64 flags)
9533 {
9534         int ret = 0;
9535         struct btrfs_root *extent_root = info->extent_root;
9536         struct extent_buffer *leaf;
9537         struct btrfs_key ins_key;
9538         struct btrfs_extent_item *ei;
9539         struct data_backref *dback;
9540         struct btrfs_tree_block_info *bi;
9541
9542         if (!back->is_data)
9543                 rec->max_size = max_t(u64, rec->max_size,
9544                                     info->nodesize);
9545
9546         if (!allocated) {
9547                 u32 item_size = sizeof(*ei);
9548
9549                 if (!back->is_data)
9550                         item_size += sizeof(*bi);
9551
9552                 ins_key.objectid = rec->start;
9553                 ins_key.offset = rec->max_size;
9554                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9555
9556                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9557                                         &ins_key, item_size);
9558                 if (ret)
9559                         goto fail;
9560
9561                 leaf = path->nodes[0];
9562                 ei = btrfs_item_ptr(leaf, path->slots[0],
9563                                     struct btrfs_extent_item);
9564
9565                 btrfs_set_extent_refs(leaf, ei, 0);
9566                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9567
9568                 if (back->is_data) {
9569                         btrfs_set_extent_flags(leaf, ei,
9570                                                BTRFS_EXTENT_FLAG_DATA);
9571                 } else {
9572                         struct btrfs_disk_key copy_key;;
9573
9574                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9575                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9576                                              sizeof(*bi));
9577
9578                         btrfs_set_disk_key_objectid(&copy_key,
9579                                                     rec->info_objectid);
9580                         btrfs_set_disk_key_type(&copy_key, 0);
9581                         btrfs_set_disk_key_offset(&copy_key, 0);
9582
9583                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9584                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9585
9586                         btrfs_set_extent_flags(leaf, ei,
9587                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9588                 }
9589
9590                 btrfs_mark_buffer_dirty(leaf);
9591                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9592                                                rec->max_size, 1, 0);
9593                 if (ret)
9594                         goto fail;
9595                 btrfs_release_path(path);
9596         }
9597
9598         if (back->is_data) {
9599                 u64 parent;
9600                 int i;
9601
9602                 dback = to_data_backref(back);
9603                 if (back->full_backref)
9604                         parent = dback->parent;
9605                 else
9606                         parent = 0;
9607
9608                 for (i = 0; i < dback->found_ref; i++) {
9609                         /* if parent != 0, we're doing a full backref
9610                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9611                          * just makes the backref allocator create a data
9612                          * backref
9613                          */
9614                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9615                                                    rec->start, rec->max_size,
9616                                                    parent,
9617                                                    dback->root,
9618                                                    parent ?
9619                                                    BTRFS_FIRST_FREE_OBJECTID :
9620                                                    dback->owner,
9621                                                    dback->offset);
9622                         if (ret)
9623                                 break;
9624                 }
9625                 fprintf(stderr, "adding new data backref"
9626                                 " on %llu %s %llu owner %llu"
9627                                 " offset %llu found %d\n",
9628                                 (unsigned long long)rec->start,
9629                                 back->full_backref ?
9630                                 "parent" : "root",
9631                                 back->full_backref ?
9632                                 (unsigned long long)parent :
9633                                 (unsigned long long)dback->root,
9634                                 (unsigned long long)dback->owner,
9635                                 (unsigned long long)dback->offset,
9636                                 dback->found_ref);
9637         } else {
9638                 u64 parent;
9639                 struct tree_backref *tback;
9640
9641                 tback = to_tree_backref(back);
9642                 if (back->full_backref)
9643                         parent = tback->parent;
9644                 else
9645                         parent = 0;
9646
9647                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9648                                            rec->start, rec->max_size,
9649                                            parent, tback->root, 0, 0);
9650                 fprintf(stderr, "adding new tree backref on "
9651                         "start %llu len %llu parent %llu root %llu\n",
9652                         rec->start, rec->max_size, parent, tback->root);
9653         }
9654 fail:
9655         btrfs_release_path(path);
9656         return ret;
9657 }
9658
9659 static struct extent_entry *find_entry(struct list_head *entries,
9660                                        u64 bytenr, u64 bytes)
9661 {
9662         struct extent_entry *entry = NULL;
9663
9664         list_for_each_entry(entry, entries, list) {
9665                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9666                         return entry;
9667         }
9668
9669         return NULL;
9670 }
9671
9672 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9673 {
9674         struct extent_entry *entry, *best = NULL, *prev = NULL;
9675
9676         list_for_each_entry(entry, entries, list) {
9677                 /*
9678                  * If there are as many broken entries as entries then we know
9679                  * not to trust this particular entry.
9680                  */
9681                 if (entry->broken == entry->count)
9682                         continue;
9683
9684                 /*
9685                  * Special case, when there are only two entries and 'best' is
9686                  * the first one
9687                  */
9688                 if (!prev) {
9689                         best = entry;
9690                         prev = entry;
9691                         continue;
9692                 }
9693
9694                 /*
9695                  * If our current entry == best then we can't be sure our best
9696                  * is really the best, so we need to keep searching.
9697                  */
9698                 if (best && best->count == entry->count) {
9699                         prev = entry;
9700                         best = NULL;
9701                         continue;
9702                 }
9703
9704                 /* Prev == entry, not good enough, have to keep searching */
9705                 if (!prev->broken && prev->count == entry->count)
9706                         continue;
9707
9708                 if (!best)
9709                         best = (prev->count > entry->count) ? prev : entry;
9710                 else if (best->count < entry->count)
9711                         best = entry;
9712                 prev = entry;
9713         }
9714
9715         return best;
9716 }
9717
9718 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9719                       struct data_backref *dback, struct extent_entry *entry)
9720 {
9721         struct btrfs_trans_handle *trans;
9722         struct btrfs_root *root;
9723         struct btrfs_file_extent_item *fi;
9724         struct extent_buffer *leaf;
9725         struct btrfs_key key;
9726         u64 bytenr, bytes;
9727         int ret, err;
9728
9729         key.objectid = dback->root;
9730         key.type = BTRFS_ROOT_ITEM_KEY;
9731         key.offset = (u64)-1;
9732         root = btrfs_read_fs_root(info, &key);
9733         if (IS_ERR(root)) {
9734                 fprintf(stderr, "Couldn't find root for our ref\n");
9735                 return -EINVAL;
9736         }
9737
9738         /*
9739          * The backref points to the original offset of the extent if it was
9740          * split, so we need to search down to the offset we have and then walk
9741          * forward until we find the backref we're looking for.
9742          */
9743         key.objectid = dback->owner;
9744         key.type = BTRFS_EXTENT_DATA_KEY;
9745         key.offset = dback->offset;
9746         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9747         if (ret < 0) {
9748                 fprintf(stderr, "Error looking up ref %d\n", ret);
9749                 return ret;
9750         }
9751
9752         while (1) {
9753                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9754                         ret = btrfs_next_leaf(root, path);
9755                         if (ret) {
9756                                 fprintf(stderr, "Couldn't find our ref, next\n");
9757                                 return -EINVAL;
9758                         }
9759                 }
9760                 leaf = path->nodes[0];
9761                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9762                 if (key.objectid != dback->owner ||
9763                     key.type != BTRFS_EXTENT_DATA_KEY) {
9764                         fprintf(stderr, "Couldn't find our ref, search\n");
9765                         return -EINVAL;
9766                 }
9767                 fi = btrfs_item_ptr(leaf, path->slots[0],
9768                                     struct btrfs_file_extent_item);
9769                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9770                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9771
9772                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9773                         break;
9774                 path->slots[0]++;
9775         }
9776
9777         btrfs_release_path(path);
9778
9779         trans = btrfs_start_transaction(root, 1);
9780         if (IS_ERR(trans))
9781                 return PTR_ERR(trans);
9782
9783         /*
9784          * Ok we have the key of the file extent we want to fix, now we can cow
9785          * down to the thing and fix it.
9786          */
9787         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9788         if (ret < 0) {
9789                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9790                         key.objectid, key.type, key.offset, ret);
9791                 goto out;
9792         }
9793         if (ret > 0) {
9794                 fprintf(stderr, "Well that's odd, we just found this key "
9795                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9796                         key.offset);
9797                 ret = -EINVAL;
9798                 goto out;
9799         }
9800         leaf = path->nodes[0];
9801         fi = btrfs_item_ptr(leaf, path->slots[0],
9802                             struct btrfs_file_extent_item);
9803
9804         if (btrfs_file_extent_compression(leaf, fi) &&
9805             dback->disk_bytenr != entry->bytenr) {
9806                 fprintf(stderr, "Ref doesn't match the record start and is "
9807                         "compressed, please take a btrfs-image of this file "
9808                         "system and send it to a btrfs developer so they can "
9809                         "complete this functionality for bytenr %Lu\n",
9810                         dback->disk_bytenr);
9811                 ret = -EINVAL;
9812                 goto out;
9813         }
9814
9815         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9816                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9817         } else if (dback->disk_bytenr > entry->bytenr) {
9818                 u64 off_diff, offset;
9819
9820                 off_diff = dback->disk_bytenr - entry->bytenr;
9821                 offset = btrfs_file_extent_offset(leaf, fi);
9822                 if (dback->disk_bytenr + offset +
9823                     btrfs_file_extent_num_bytes(leaf, fi) >
9824                     entry->bytenr + entry->bytes) {
9825                         fprintf(stderr, "Ref is past the entry end, please "
9826                                 "take a btrfs-image of this file system and "
9827                                 "send it to a btrfs developer, ref %Lu\n",
9828                                 dback->disk_bytenr);
9829                         ret = -EINVAL;
9830                         goto out;
9831                 }
9832                 offset += off_diff;
9833                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9834                 btrfs_set_file_extent_offset(leaf, fi, offset);
9835         } else if (dback->disk_bytenr < entry->bytenr) {
9836                 u64 offset;
9837
9838                 offset = btrfs_file_extent_offset(leaf, fi);
9839                 if (dback->disk_bytenr + offset < entry->bytenr) {
9840                         fprintf(stderr, "Ref is before the entry start, please"
9841                                 " take a btrfs-image of this file system and "
9842                                 "send it to a btrfs developer, ref %Lu\n",
9843                                 dback->disk_bytenr);
9844                         ret = -EINVAL;
9845                         goto out;
9846                 }
9847
9848                 offset += dback->disk_bytenr;
9849                 offset -= entry->bytenr;
9850                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9851                 btrfs_set_file_extent_offset(leaf, fi, offset);
9852         }
9853
9854         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9855
9856         /*
9857          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9858          * only do this if we aren't using compression, otherwise it's a
9859          * trickier case.
9860          */
9861         if (!btrfs_file_extent_compression(leaf, fi))
9862                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9863         else
9864                 printf("ram bytes may be wrong?\n");
9865         btrfs_mark_buffer_dirty(leaf);
9866 out:
9867         err = btrfs_commit_transaction(trans, root);
9868         btrfs_release_path(path);
9869         return ret ? ret : err;
9870 }
9871
9872 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9873                            struct extent_record *rec)
9874 {
9875         struct extent_backref *back, *tmp;
9876         struct data_backref *dback;
9877         struct extent_entry *entry, *best = NULL;
9878         LIST_HEAD(entries);
9879         int nr_entries = 0;
9880         int broken_entries = 0;
9881         int ret = 0;
9882         short mismatch = 0;
9883
9884         /*
9885          * Metadata is easy and the backrefs should always agree on bytenr and
9886          * size, if not we've got bigger issues.
9887          */
9888         if (rec->metadata)
9889                 return 0;
9890
9891         rbtree_postorder_for_each_entry_safe(back, tmp,
9892                                              &rec->backref_tree, node) {
9893                 if (back->full_backref || !back->is_data)
9894                         continue;
9895
9896                 dback = to_data_backref(back);
9897
9898                 /*
9899                  * We only pay attention to backrefs that we found a real
9900                  * backref for.
9901                  */
9902                 if (dback->found_ref == 0)
9903                         continue;
9904
9905                 /*
9906                  * For now we only catch when the bytes don't match, not the
9907                  * bytenr.  We can easily do this at the same time, but I want
9908                  * to have a fs image to test on before we just add repair
9909                  * functionality willy-nilly so we know we won't screw up the
9910                  * repair.
9911                  */
9912
9913                 entry = find_entry(&entries, dback->disk_bytenr,
9914                                    dback->bytes);
9915                 if (!entry) {
9916                         entry = malloc(sizeof(struct extent_entry));
9917                         if (!entry) {
9918                                 ret = -ENOMEM;
9919                                 goto out;
9920                         }
9921                         memset(entry, 0, sizeof(*entry));
9922                         entry->bytenr = dback->disk_bytenr;
9923                         entry->bytes = dback->bytes;
9924                         list_add_tail(&entry->list, &entries);
9925                         nr_entries++;
9926                 }
9927
9928                 /*
9929                  * If we only have on entry we may think the entries agree when
9930                  * in reality they don't so we have to do some extra checking.
9931                  */
9932                 if (dback->disk_bytenr != rec->start ||
9933                     dback->bytes != rec->nr || back->broken)
9934                         mismatch = 1;
9935
9936                 if (back->broken) {
9937                         entry->broken++;
9938                         broken_entries++;
9939                 }
9940
9941                 entry->count++;
9942         }
9943
9944         /* Yay all the backrefs agree, carry on good sir */
9945         if (nr_entries <= 1 && !mismatch)
9946                 goto out;
9947
9948         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9949                 "%Lu\n", rec->start);
9950
9951         /*
9952          * First we want to see if the backrefs can agree amongst themselves who
9953          * is right, so figure out which one of the entries has the highest
9954          * count.
9955          */
9956         best = find_most_right_entry(&entries);
9957
9958         /*
9959          * Ok so we may have an even split between what the backrefs think, so
9960          * this is where we use the extent ref to see what it thinks.
9961          */
9962         if (!best) {
9963                 entry = find_entry(&entries, rec->start, rec->nr);
9964                 if (!entry && (!broken_entries || !rec->found_rec)) {
9965                         fprintf(stderr, "Backrefs don't agree with each other "
9966                                 "and extent record doesn't agree with anybody,"
9967                                 " so we can't fix bytenr %Lu bytes %Lu\n",
9968                                 rec->start, rec->nr);
9969                         ret = -EINVAL;
9970                         goto out;
9971                 } else if (!entry) {
9972                         /*
9973                          * Ok our backrefs were broken, we'll assume this is the
9974                          * correct value and add an entry for this range.
9975                          */
9976                         entry = malloc(sizeof(struct extent_entry));
9977                         if (!entry) {
9978                                 ret = -ENOMEM;
9979                                 goto out;
9980                         }
9981                         memset(entry, 0, sizeof(*entry));
9982                         entry->bytenr = rec->start;
9983                         entry->bytes = rec->nr;
9984                         list_add_tail(&entry->list, &entries);
9985                         nr_entries++;
9986                 }
9987                 entry->count++;
9988                 best = find_most_right_entry(&entries);
9989                 if (!best) {
9990                         fprintf(stderr, "Backrefs and extent record evenly "
9991                                 "split on who is right, this is going to "
9992                                 "require user input to fix bytenr %Lu bytes "
9993                                 "%Lu\n", rec->start, rec->nr);
9994                         ret = -EINVAL;
9995                         goto out;
9996                 }
9997         }
9998
9999         /*
10000          * I don't think this can happen currently as we'll abort() if we catch
10001          * this case higher up, but in case somebody removes that we still can't
10002          * deal with it properly here yet, so just bail out of that's the case.
10003          */
10004         if (best->bytenr != rec->start) {
10005                 fprintf(stderr, "Extent start and backref starts don't match, "
10006                         "please use btrfs-image on this file system and send "
10007                         "it to a btrfs developer so they can make fsck fix "
10008                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
10009                         rec->start, rec->nr);
10010                 ret = -EINVAL;
10011                 goto out;
10012         }
10013
10014         /*
10015          * Ok great we all agreed on an extent record, let's go find the real
10016          * references and fix up the ones that don't match.
10017          */
10018         rbtree_postorder_for_each_entry_safe(back, tmp,
10019                                              &rec->backref_tree, node) {
10020                 if (back->full_backref || !back->is_data)
10021                         continue;
10022
10023                 dback = to_data_backref(back);
10024
10025                 /*
10026                  * Still ignoring backrefs that don't have a real ref attached
10027                  * to them.
10028                  */
10029                 if (dback->found_ref == 0)
10030                         continue;
10031
10032                 if (dback->bytes == best->bytes &&
10033                     dback->disk_bytenr == best->bytenr)
10034                         continue;
10035
10036                 ret = repair_ref(info, path, dback, best);
10037                 if (ret)
10038                         goto out;
10039         }
10040
10041         /*
10042          * Ok we messed with the actual refs, which means we need to drop our
10043          * entire cache and go back and rescan.  I know this is a huge pain and
10044          * adds a lot of extra work, but it's the only way to be safe.  Once all
10045          * the backrefs agree we may not need to do anything to the extent
10046          * record itself.
10047          */
10048         ret = -EAGAIN;
10049 out:
10050         while (!list_empty(&entries)) {
10051                 entry = list_entry(entries.next, struct extent_entry, list);
10052                 list_del_init(&entry->list);
10053                 free(entry);
10054         }
10055         return ret;
10056 }
10057
10058 static int process_duplicates(struct cache_tree *extent_cache,
10059                               struct extent_record *rec)
10060 {
10061         struct extent_record *good, *tmp;
10062         struct cache_extent *cache;
10063         int ret;
10064
10065         /*
10066          * If we found a extent record for this extent then return, or if we
10067          * have more than one duplicate we are likely going to need to delete
10068          * something.
10069          */
10070         if (rec->found_rec || rec->num_duplicates > 1)
10071                 return 0;
10072
10073         /* Shouldn't happen but just in case */
10074         BUG_ON(!rec->num_duplicates);
10075
10076         /*
10077          * So this happens if we end up with a backref that doesn't match the
10078          * actual extent entry.  So either the backref is bad or the extent
10079          * entry is bad.  Either way we want to have the extent_record actually
10080          * reflect what we found in the extent_tree, so we need to take the
10081          * duplicate out and use that as the extent_record since the only way we
10082          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
10083          */
10084         remove_cache_extent(extent_cache, &rec->cache);
10085
10086         good = to_extent_record(rec->dups.next);
10087         list_del_init(&good->list);
10088         INIT_LIST_HEAD(&good->backrefs);
10089         INIT_LIST_HEAD(&good->dups);
10090         good->cache.start = good->start;
10091         good->cache.size = good->nr;
10092         good->content_checked = 0;
10093         good->owner_ref_checked = 0;
10094         good->num_duplicates = 0;
10095         good->refs = rec->refs;
10096         list_splice_init(&rec->backrefs, &good->backrefs);
10097         while (1) {
10098                 cache = lookup_cache_extent(extent_cache, good->start,
10099                                             good->nr);
10100                 if (!cache)
10101                         break;
10102                 tmp = container_of(cache, struct extent_record, cache);
10103
10104                 /*
10105                  * If we find another overlapping extent and it's found_rec is
10106                  * set then it's a duplicate and we need to try and delete
10107                  * something.
10108                  */
10109                 if (tmp->found_rec || tmp->num_duplicates > 0) {
10110                         if (list_empty(&good->list))
10111                                 list_add_tail(&good->list,
10112                                               &duplicate_extents);
10113                         good->num_duplicates += tmp->num_duplicates + 1;
10114                         list_splice_init(&tmp->dups, &good->dups);
10115                         list_del_init(&tmp->list);
10116                         list_add_tail(&tmp->list, &good->dups);
10117                         remove_cache_extent(extent_cache, &tmp->cache);
10118                         continue;
10119                 }
10120
10121                 /*
10122                  * Ok we have another non extent item backed extent rec, so lets
10123                  * just add it to this extent and carry on like we did above.
10124                  */
10125                 good->refs += tmp->refs;
10126                 list_splice_init(&tmp->backrefs, &good->backrefs);
10127                 remove_cache_extent(extent_cache, &tmp->cache);
10128                 free(tmp);
10129         }
10130         ret = insert_cache_extent(extent_cache, &good->cache);
10131         BUG_ON(ret);
10132         free(rec);
10133         return good->num_duplicates ? 0 : 1;
10134 }
10135
10136 static int delete_duplicate_records(struct btrfs_root *root,
10137                                     struct extent_record *rec)
10138 {
10139         struct btrfs_trans_handle *trans;
10140         LIST_HEAD(delete_list);
10141         struct btrfs_path path;
10142         struct extent_record *tmp, *good, *n;
10143         int nr_del = 0;
10144         int ret = 0, err;
10145         struct btrfs_key key;
10146
10147         btrfs_init_path(&path);
10148
10149         good = rec;
10150         /* Find the record that covers all of the duplicates. */
10151         list_for_each_entry(tmp, &rec->dups, list) {
10152                 if (good->start < tmp->start)
10153                         continue;
10154                 if (good->nr > tmp->nr)
10155                         continue;
10156
10157                 if (tmp->start + tmp->nr < good->start + good->nr) {
10158                         fprintf(stderr, "Ok we have overlapping extents that "
10159                                 "aren't completely covered by each other, this "
10160                                 "is going to require more careful thought.  "
10161                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
10162                                 tmp->start, tmp->nr, good->start, good->nr);
10163                         abort();
10164                 }
10165                 good = tmp;
10166         }
10167
10168         if (good != rec)
10169                 list_add_tail(&rec->list, &delete_list);
10170
10171         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10172                 if (tmp == good)
10173                         continue;
10174                 list_move_tail(&tmp->list, &delete_list);
10175         }
10176
10177         root = root->fs_info->extent_root;
10178         trans = btrfs_start_transaction(root, 1);
10179         if (IS_ERR(trans)) {
10180                 ret = PTR_ERR(trans);
10181                 goto out;
10182         }
10183
10184         list_for_each_entry(tmp, &delete_list, list) {
10185                 if (tmp->found_rec == 0)
10186                         continue;
10187                 key.objectid = tmp->start;
10188                 key.type = BTRFS_EXTENT_ITEM_KEY;
10189                 key.offset = tmp->nr;
10190
10191                 /* Shouldn't happen but just in case */
10192                 if (tmp->metadata) {
10193                         fprintf(stderr, "Well this shouldn't happen, extent "
10194                                 "record overlaps but is metadata? "
10195                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10196                         abort();
10197                 }
10198
10199                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10200                 if (ret) {
10201                         if (ret > 0)
10202                                 ret = -EINVAL;
10203                         break;
10204                 }
10205                 ret = btrfs_del_item(trans, root, &path);
10206                 if (ret)
10207                         break;
10208                 btrfs_release_path(&path);
10209                 nr_del++;
10210         }
10211         err = btrfs_commit_transaction(trans, root);
10212         if (err && !ret)
10213                 ret = err;
10214 out:
10215         while (!list_empty(&delete_list)) {
10216                 tmp = to_extent_record(delete_list.next);
10217                 list_del_init(&tmp->list);
10218                 if (tmp == rec)
10219                         continue;
10220                 free(tmp);
10221         }
10222
10223         while (!list_empty(&rec->dups)) {
10224                 tmp = to_extent_record(rec->dups.next);
10225                 list_del_init(&tmp->list);
10226                 free(tmp);
10227         }
10228
10229         btrfs_release_path(&path);
10230
10231         if (!ret && !nr_del)
10232                 rec->num_duplicates = 0;
10233
10234         return ret ? ret : nr_del;
10235 }
10236
10237 static int find_possible_backrefs(struct btrfs_fs_info *info,
10238                                   struct btrfs_path *path,
10239                                   struct cache_tree *extent_cache,
10240                                   struct extent_record *rec)
10241 {
10242         struct btrfs_root *root;
10243         struct extent_backref *back, *tmp;
10244         struct data_backref *dback;
10245         struct cache_extent *cache;
10246         struct btrfs_file_extent_item *fi;
10247         struct btrfs_key key;
10248         u64 bytenr, bytes;
10249         int ret;
10250
10251         rbtree_postorder_for_each_entry_safe(back, tmp,
10252                                              &rec->backref_tree, node) {
10253                 /* Don't care about full backrefs (poor unloved backrefs) */
10254                 if (back->full_backref || !back->is_data)
10255                         continue;
10256
10257                 dback = to_data_backref(back);
10258
10259                 /* We found this one, we don't need to do a lookup */
10260                 if (dback->found_ref)
10261                         continue;
10262
10263                 key.objectid = dback->root;
10264                 key.type = BTRFS_ROOT_ITEM_KEY;
10265                 key.offset = (u64)-1;
10266
10267                 root = btrfs_read_fs_root(info, &key);
10268
10269                 /* No root, definitely a bad ref, skip */
10270                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10271                         continue;
10272                 /* Other err, exit */
10273                 if (IS_ERR(root))
10274                         return PTR_ERR(root);
10275
10276                 key.objectid = dback->owner;
10277                 key.type = BTRFS_EXTENT_DATA_KEY;
10278                 key.offset = dback->offset;
10279                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10280                 if (ret) {
10281                         btrfs_release_path(path);
10282                         if (ret < 0)
10283                                 return ret;
10284                         /* Didn't find it, we can carry on */
10285                         ret = 0;
10286                         continue;
10287                 }
10288
10289                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10290                                     struct btrfs_file_extent_item);
10291                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10292                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10293                 btrfs_release_path(path);
10294                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10295                 if (cache) {
10296                         struct extent_record *tmp;
10297                         tmp = container_of(cache, struct extent_record, cache);
10298
10299                         /*
10300                          * If we found an extent record for the bytenr for this
10301                          * particular backref then we can't add it to our
10302                          * current extent record.  We only want to add backrefs
10303                          * that don't have a corresponding extent item in the
10304                          * extent tree since they likely belong to this record
10305                          * and we need to fix it if it doesn't match bytenrs.
10306                          */
10307                         if  (tmp->found_rec)
10308                                 continue;
10309                 }
10310
10311                 dback->found_ref += 1;
10312                 dback->disk_bytenr = bytenr;
10313                 dback->bytes = bytes;
10314
10315                 /*
10316                  * Set this so the verify backref code knows not to trust the
10317                  * values in this backref.
10318                  */
10319                 back->broken = 1;
10320         }
10321
10322         return 0;
10323 }
10324
10325 /*
10326  * Record orphan data ref into corresponding root.
10327  *
10328  * Return 0 if the extent item contains data ref and recorded.
10329  * Return 1 if the extent item contains no useful data ref
10330  *   On that case, it may contains only shared_dataref or metadata backref
10331  *   or the file extent exists(this should be handled by the extent bytenr
10332  *   recovery routine)
10333  * Return <0 if something goes wrong.
10334  */
10335 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10336                                       struct extent_record *rec)
10337 {
10338         struct btrfs_key key;
10339         struct btrfs_root *dest_root;
10340         struct extent_backref *back, *tmp;
10341         struct data_backref *dback;
10342         struct orphan_data_extent *orphan;
10343         struct btrfs_path path;
10344         int recorded_data_ref = 0;
10345         int ret = 0;
10346
10347         if (rec->metadata)
10348                 return 1;
10349         btrfs_init_path(&path);
10350         rbtree_postorder_for_each_entry_safe(back, tmp,
10351                                              &rec->backref_tree, node) {
10352                 if (back->full_backref || !back->is_data ||
10353                     !back->found_extent_tree)
10354                         continue;
10355                 dback = to_data_backref(back);
10356                 if (dback->found_ref)
10357                         continue;
10358                 key.objectid = dback->root;
10359                 key.type = BTRFS_ROOT_ITEM_KEY;
10360                 key.offset = (u64)-1;
10361
10362                 dest_root = btrfs_read_fs_root(fs_info, &key);
10363
10364                 /* For non-exist root we just skip it */
10365                 if (IS_ERR(dest_root) || !dest_root)
10366                         continue;
10367
10368                 key.objectid = dback->owner;
10369                 key.type = BTRFS_EXTENT_DATA_KEY;
10370                 key.offset = dback->offset;
10371
10372                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10373                 btrfs_release_path(&path);
10374                 /*
10375                  * For ret < 0, it's OK since the fs-tree may be corrupted,
10376                  * we need to record it for inode/file extent rebuild.
10377                  * For ret > 0, we record it only for file extent rebuild.
10378                  * For ret == 0, the file extent exists but only bytenr
10379                  * mismatch, let the original bytenr fix routine to handle,
10380                  * don't record it.
10381                  */
10382                 if (ret == 0)
10383                         continue;
10384                 ret = 0;
10385                 orphan = malloc(sizeof(*orphan));
10386                 if (!orphan) {
10387                         ret = -ENOMEM;
10388                         goto out;
10389                 }
10390                 INIT_LIST_HEAD(&orphan->list);
10391                 orphan->root = dback->root;
10392                 orphan->objectid = dback->owner;
10393                 orphan->offset = dback->offset;
10394                 orphan->disk_bytenr = rec->cache.start;
10395                 orphan->disk_len = rec->cache.size;
10396                 list_add(&dest_root->orphan_data_extents, &orphan->list);
10397                 recorded_data_ref = 1;
10398         }
10399 out:
10400         btrfs_release_path(&path);
10401         if (!ret)
10402                 return !recorded_data_ref;
10403         else
10404                 return ret;
10405 }
10406
10407 /*
10408  * when an incorrect extent item is found, this will delete
10409  * all of the existing entries for it and recreate them
10410  * based on what the tree scan found.
10411  */
10412 static int fixup_extent_refs(struct btrfs_fs_info *info,
10413                              struct cache_tree *extent_cache,
10414                              struct extent_record *rec)
10415 {
10416         struct btrfs_trans_handle *trans = NULL;
10417         int ret;
10418         struct btrfs_path path;
10419         struct cache_extent *cache;
10420         struct extent_backref *back, *tmp;
10421         int allocated = 0;
10422         u64 flags = 0;
10423
10424         if (rec->flag_block_full_backref)
10425                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10426
10427         btrfs_init_path(&path);
10428         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10429                 /*
10430                  * Sometimes the backrefs themselves are so broken they don't
10431                  * get attached to any meaningful rec, so first go back and
10432                  * check any of our backrefs that we couldn't find and throw
10433                  * them into the list if we find the backref so that
10434                  * verify_backrefs can figure out what to do.
10435                  */
10436                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10437                 if (ret < 0)
10438                         goto out;
10439         }
10440
10441         /* step one, make sure all of the backrefs agree */
10442         ret = verify_backrefs(info, &path, rec);
10443         if (ret < 0)
10444                 goto out;
10445
10446         trans = btrfs_start_transaction(info->extent_root, 1);
10447         if (IS_ERR(trans)) {
10448                 ret = PTR_ERR(trans);
10449                 goto out;
10450         }
10451
10452         /* step two, delete all the existing records */
10453         ret = delete_extent_records(trans, info->extent_root, &path,
10454                                     rec->start);
10455
10456         if (ret < 0)
10457                 goto out;
10458
10459         /* was this block corrupt?  If so, don't add references to it */
10460         cache = lookup_cache_extent(info->corrupt_blocks,
10461                                     rec->start, rec->max_size);
10462         if (cache) {
10463                 ret = 0;
10464                 goto out;
10465         }
10466
10467         /* step three, recreate all the refs we did find */
10468         rbtree_postorder_for_each_entry_safe(back, tmp,
10469                                              &rec->backref_tree, node) {
10470                 /*
10471                  * if we didn't find any references, don't create a
10472                  * new extent record
10473                  */
10474                 if (!back->found_ref)
10475                         continue;
10476
10477                 rec->bad_full_backref = 0;
10478                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10479                 allocated = 1;
10480
10481                 if (ret)
10482                         goto out;
10483         }
10484 out:
10485         if (trans) {
10486                 int err = btrfs_commit_transaction(trans, info->extent_root);
10487                 if (!ret)
10488                         ret = err;
10489         }
10490
10491         if (!ret)
10492                 fprintf(stderr, "Repaired extent references for %llu\n",
10493                                 (unsigned long long)rec->start);
10494
10495         btrfs_release_path(&path);
10496         return ret;
10497 }
10498
10499 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10500                               struct extent_record *rec)
10501 {
10502         struct btrfs_trans_handle *trans;
10503         struct btrfs_root *root = fs_info->extent_root;
10504         struct btrfs_path path;
10505         struct btrfs_extent_item *ei;
10506         struct btrfs_key key;
10507         u64 flags;
10508         int ret = 0;
10509
10510         key.objectid = rec->start;
10511         if (rec->metadata) {
10512                 key.type = BTRFS_METADATA_ITEM_KEY;
10513                 key.offset = rec->info_level;
10514         } else {
10515                 key.type = BTRFS_EXTENT_ITEM_KEY;
10516                 key.offset = rec->max_size;
10517         }
10518
10519         trans = btrfs_start_transaction(root, 0);
10520         if (IS_ERR(trans))
10521                 return PTR_ERR(trans);
10522
10523         btrfs_init_path(&path);
10524         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10525         if (ret < 0) {
10526                 btrfs_release_path(&path);
10527                 btrfs_commit_transaction(trans, root);
10528                 return ret;
10529         } else if (ret) {
10530                 fprintf(stderr, "Didn't find extent for %llu\n",
10531                         (unsigned long long)rec->start);
10532                 btrfs_release_path(&path);
10533                 btrfs_commit_transaction(trans, root);
10534                 return -ENOENT;
10535         }
10536
10537         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10538                             struct btrfs_extent_item);
10539         flags = btrfs_extent_flags(path.nodes[0], ei);
10540         if (rec->flag_block_full_backref) {
10541                 fprintf(stderr, "setting full backref on %llu\n",
10542                         (unsigned long long)key.objectid);
10543                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10544         } else {
10545                 fprintf(stderr, "clearing full backref on %llu\n",
10546                         (unsigned long long)key.objectid);
10547                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10548         }
10549         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10550         btrfs_mark_buffer_dirty(path.nodes[0]);
10551         btrfs_release_path(&path);
10552         ret = btrfs_commit_transaction(trans, root);
10553         if (!ret)
10554                 fprintf(stderr, "Repaired extent flags for %llu\n",
10555                                 (unsigned long long)rec->start);
10556
10557         return ret;
10558 }
10559
10560 /* right now we only prune from the extent allocation tree */
10561 static int prune_one_block(struct btrfs_trans_handle *trans,
10562                            struct btrfs_fs_info *info,
10563                            struct btrfs_corrupt_block *corrupt)
10564 {
10565         int ret;
10566         struct btrfs_path path;
10567         struct extent_buffer *eb;
10568         u64 found;
10569         int slot;
10570         int nritems;
10571         int level = corrupt->level + 1;
10572
10573         btrfs_init_path(&path);
10574 again:
10575         /* we want to stop at the parent to our busted block */
10576         path.lowest_level = level;
10577
10578         ret = btrfs_search_slot(trans, info->extent_root,
10579                                 &corrupt->key, &path, -1, 1);
10580
10581         if (ret < 0)
10582                 goto out;
10583
10584         eb = path.nodes[level];
10585         if (!eb) {
10586                 ret = -ENOENT;
10587                 goto out;
10588         }
10589
10590         /*
10591          * hopefully the search gave us the block we want to prune,
10592          * lets try that first
10593          */
10594         slot = path.slots[level];
10595         found =  btrfs_node_blockptr(eb, slot);
10596         if (found == corrupt->cache.start)
10597                 goto del_ptr;
10598
10599         nritems = btrfs_header_nritems(eb);
10600
10601         /* the search failed, lets scan this node and hope we find it */
10602         for (slot = 0; slot < nritems; slot++) {
10603                 found =  btrfs_node_blockptr(eb, slot);
10604                 if (found == corrupt->cache.start)
10605                         goto del_ptr;
10606         }
10607         /*
10608          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10609          * to this block
10610          */
10611         if (eb == info->extent_root->node) {
10612                 ret = -ENOENT;
10613                 goto out;
10614         } else {
10615                 level++;
10616                 btrfs_release_path(&path);
10617                 goto again;
10618         }
10619
10620 del_ptr:
10621         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10622         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10623
10624 out:
10625         btrfs_release_path(&path);
10626         return ret;
10627 }
10628
10629 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10630 {
10631         struct btrfs_trans_handle *trans = NULL;
10632         struct cache_extent *cache;
10633         struct btrfs_corrupt_block *corrupt;
10634
10635         while (1) {
10636                 cache = search_cache_extent(info->corrupt_blocks, 0);
10637                 if (!cache)
10638                         break;
10639                 if (!trans) {
10640                         trans = btrfs_start_transaction(info->extent_root, 1);
10641                         if (IS_ERR(trans))
10642                                 return PTR_ERR(trans);
10643                 }
10644                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10645                 prune_one_block(trans, info, corrupt);
10646                 remove_cache_extent(info->corrupt_blocks, cache);
10647         }
10648         if (trans)
10649                 return btrfs_commit_transaction(trans, info->extent_root);
10650         return 0;
10651 }
10652
10653 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10654 {
10655         struct btrfs_block_group_cache *cache;
10656         u64 start, end;
10657         int ret;
10658
10659         while (1) {
10660                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10661                                             &start, &end, EXTENT_DIRTY);
10662                 if (ret)
10663                         break;
10664                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10665         }
10666
10667         start = 0;
10668         while (1) {
10669                 cache = btrfs_lookup_first_block_group(fs_info, start);
10670                 if (!cache)
10671                         break;
10672                 if (cache->cached)
10673                         cache->cached = 0;
10674                 start = cache->key.objectid + cache->key.offset;
10675         }
10676 }
10677
10678 static int check_extent_refs(struct btrfs_root *root,
10679                              struct cache_tree *extent_cache)
10680 {
10681         struct extent_record *rec;
10682         struct cache_extent *cache;
10683         int ret = 0;
10684         int had_dups = 0;
10685
10686         if (repair) {
10687                 /*
10688                  * if we're doing a repair, we have to make sure
10689                  * we don't allocate from the problem extents.
10690                  * In the worst case, this will be all the
10691                  * extents in the FS
10692                  */
10693                 cache = search_cache_extent(extent_cache, 0);
10694                 while(cache) {
10695                         rec = container_of(cache, struct extent_record, cache);
10696                         set_extent_dirty(root->fs_info->excluded_extents,
10697                                          rec->start,
10698                                          rec->start + rec->max_size - 1);
10699                         cache = next_cache_extent(cache);
10700                 }
10701
10702                 /* pin down all the corrupted blocks too */
10703                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10704                 while(cache) {
10705                         set_extent_dirty(root->fs_info->excluded_extents,
10706                                          cache->start,
10707                                          cache->start + cache->size - 1);
10708                         cache = next_cache_extent(cache);
10709                 }
10710                 prune_corrupt_blocks(root->fs_info);
10711                 reset_cached_block_groups(root->fs_info);
10712         }
10713
10714         reset_cached_block_groups(root->fs_info);
10715
10716         /*
10717          * We need to delete any duplicate entries we find first otherwise we
10718          * could mess up the extent tree when we have backrefs that actually
10719          * belong to a different extent item and not the weird duplicate one.
10720          */
10721         while (repair && !list_empty(&duplicate_extents)) {
10722                 rec = to_extent_record(duplicate_extents.next);
10723                 list_del_init(&rec->list);
10724
10725                 /* Sometimes we can find a backref before we find an actual
10726                  * extent, so we need to process it a little bit to see if there
10727                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10728                  * if this is a backref screwup.  If we need to delete stuff
10729                  * process_duplicates() will return 0, otherwise it will return
10730                  * 1 and we
10731                  */
10732                 if (process_duplicates(extent_cache, rec))
10733                         continue;
10734                 ret = delete_duplicate_records(root, rec);
10735                 if (ret < 0)
10736                         return ret;
10737                 /*
10738                  * delete_duplicate_records will return the number of entries
10739                  * deleted, so if it's greater than 0 then we know we actually
10740                  * did something and we need to remove.
10741                  */
10742                 if (ret)
10743                         had_dups = 1;
10744         }
10745
10746         if (had_dups)
10747                 return -EAGAIN;
10748
10749         while(1) {
10750                 int cur_err = 0;
10751                 int fix = 0;
10752
10753                 cache = search_cache_extent(extent_cache, 0);
10754                 if (!cache)
10755                         break;
10756                 rec = container_of(cache, struct extent_record, cache);
10757                 if (rec->num_duplicates) {
10758                         fprintf(stderr, "extent item %llu has multiple extent "
10759                                 "items\n", (unsigned long long)rec->start);
10760                         cur_err = 1;
10761                 }
10762
10763                 if (rec->refs != rec->extent_item_refs) {
10764                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10765                                 (unsigned long long)rec->start,
10766                                 (unsigned long long)rec->nr);
10767                         fprintf(stderr, "extent item %llu, found %llu\n",
10768                                 (unsigned long long)rec->extent_item_refs,
10769                                 (unsigned long long)rec->refs);
10770                         ret = record_orphan_data_extents(root->fs_info, rec);
10771                         if (ret < 0)
10772                                 goto repair_abort;
10773                         fix = ret;
10774                         cur_err = 1;
10775                 }
10776                 if (all_backpointers_checked(rec, 1)) {
10777                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10778                                 (unsigned long long)rec->start,
10779                                 (unsigned long long)rec->nr);
10780                         fix = 1;
10781                         cur_err = 1;
10782                 }
10783                 if (!rec->owner_ref_checked) {
10784                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10785                                 (unsigned long long)rec->start,
10786                                 (unsigned long long)rec->nr);
10787                         fix = 1;
10788                         cur_err = 1;
10789                 }
10790
10791                 if (repair && fix) {
10792                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10793                         if (ret)
10794                                 goto repair_abort;
10795                 }
10796
10797
10798                 if (rec->bad_full_backref) {
10799                         fprintf(stderr, "bad full backref, on [%llu]\n",
10800                                 (unsigned long long)rec->start);
10801                         if (repair) {
10802                                 ret = fixup_extent_flags(root->fs_info, rec);
10803                                 if (ret)
10804                                         goto repair_abort;
10805                                 fix = 1;
10806                         }
10807                         cur_err = 1;
10808                 }
10809                 /*
10810                  * Although it's not a extent ref's problem, we reuse this
10811                  * routine for error reporting.
10812                  * No repair function yet.
10813                  */
10814                 if (rec->crossing_stripes) {
10815                         fprintf(stderr,
10816                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10817                                 rec->start, rec->start + rec->max_size);
10818                         cur_err = 1;
10819                 }
10820
10821                 if (rec->wrong_chunk_type) {
10822                         fprintf(stderr,
10823                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10824                                 rec->start, rec->start + rec->max_size);
10825                         cur_err = 1;
10826                 }
10827
10828                 remove_cache_extent(extent_cache, cache);
10829                 free_all_extent_backrefs(rec);
10830                 if (!init_extent_tree && repair && (!cur_err || fix))
10831                         clear_extent_dirty(root->fs_info->excluded_extents,
10832                                            rec->start,
10833                                            rec->start + rec->max_size - 1);
10834                 free(rec);
10835         }
10836 repair_abort:
10837         if (repair) {
10838                 if (ret && ret != -EAGAIN) {
10839                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10840                         exit(1);
10841                 } else if (!ret) {
10842                         struct btrfs_trans_handle *trans;
10843
10844                         root = root->fs_info->extent_root;
10845                         trans = btrfs_start_transaction(root, 1);
10846                         if (IS_ERR(trans)) {
10847                                 ret = PTR_ERR(trans);
10848                                 goto repair_abort;
10849                         }
10850
10851                         ret = btrfs_fix_block_accounting(trans, root);
10852                         if (ret)
10853                                 goto repair_abort;
10854                         ret = btrfs_commit_transaction(trans, root);
10855                         if (ret)
10856                                 goto repair_abort;
10857                 }
10858                 return ret;
10859         }
10860         return 0;
10861 }
10862
10863 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10864 {
10865         u64 stripe_size;
10866
10867         if (type & BTRFS_BLOCK_GROUP_RAID0) {
10868                 stripe_size = length;
10869                 stripe_size /= num_stripes;
10870         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10871                 stripe_size = length * 2;
10872                 stripe_size /= num_stripes;
10873         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10874                 stripe_size = length;
10875                 stripe_size /= (num_stripes - 1);
10876         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10877                 stripe_size = length;
10878                 stripe_size /= (num_stripes - 2);
10879         } else {
10880                 stripe_size = length;
10881         }
10882         return stripe_size;
10883 }
10884
10885 /*
10886  * Check the chunk with its block group/dev list ref:
10887  * Return 0 if all refs seems valid.
10888  * Return 1 if part of refs seems valid, need later check for rebuild ref
10889  * like missing block group and needs to search extent tree to rebuild them.
10890  * Return -1 if essential refs are missing and unable to rebuild.
10891  */
10892 static int check_chunk_refs(struct chunk_record *chunk_rec,
10893                             struct block_group_tree *block_group_cache,
10894                             struct device_extent_tree *dev_extent_cache,
10895                             int silent)
10896 {
10897         struct cache_extent *block_group_item;
10898         struct block_group_record *block_group_rec;
10899         struct cache_extent *dev_extent_item;
10900         struct device_extent_record *dev_extent_rec;
10901         u64 devid;
10902         u64 offset;
10903         u64 length;
10904         int metadump_v2 = 0;
10905         int i;
10906         int ret = 0;
10907
10908         block_group_item = lookup_cache_extent(&block_group_cache->tree,
10909                                                chunk_rec->offset,
10910                                                chunk_rec->length);
10911         if (block_group_item) {
10912                 block_group_rec = container_of(block_group_item,
10913                                                struct block_group_record,
10914                                                cache);
10915                 if (chunk_rec->length != block_group_rec->offset ||
10916                     chunk_rec->offset != block_group_rec->objectid ||
10917                     (!metadump_v2 &&
10918                      chunk_rec->type_flags != block_group_rec->flags)) {
10919                         if (!silent)
10920                                 fprintf(stderr,
10921                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10922                                         chunk_rec->objectid,
10923                                         chunk_rec->type,
10924                                         chunk_rec->offset,
10925                                         chunk_rec->length,
10926                                         chunk_rec->offset,
10927                                         chunk_rec->type_flags,
10928                                         block_group_rec->objectid,
10929                                         block_group_rec->type,
10930                                         block_group_rec->offset,
10931                                         block_group_rec->offset,
10932                                         block_group_rec->objectid,
10933                                         block_group_rec->flags);
10934                         ret = -1;
10935                 } else {
10936                         list_del_init(&block_group_rec->list);
10937                         chunk_rec->bg_rec = block_group_rec;
10938                 }
10939         } else {
10940                 if (!silent)
10941                         fprintf(stderr,
10942                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10943                                 chunk_rec->objectid,
10944                                 chunk_rec->type,
10945                                 chunk_rec->offset,
10946                                 chunk_rec->length,
10947                                 chunk_rec->offset,
10948                                 chunk_rec->type_flags);
10949                 ret = 1;
10950         }
10951
10952         if (metadump_v2)
10953                 return ret;
10954
10955         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10956                                     chunk_rec->num_stripes);
10957         for (i = 0; i < chunk_rec->num_stripes; ++i) {
10958                 devid = chunk_rec->stripes[i].devid;
10959                 offset = chunk_rec->stripes[i].offset;
10960                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10961                                                        devid, offset, length);
10962                 if (dev_extent_item) {
10963                         dev_extent_rec = container_of(dev_extent_item,
10964                                                 struct device_extent_record,
10965                                                 cache);
10966                         if (dev_extent_rec->objectid != devid ||
10967                             dev_extent_rec->offset != offset ||
10968                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
10969                             dev_extent_rec->length != length) {
10970                                 if (!silent)
10971                                         fprintf(stderr,
10972                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10973                                                 chunk_rec->objectid,
10974                                                 chunk_rec->type,
10975                                                 chunk_rec->offset,
10976                                                 chunk_rec->stripes[i].devid,
10977                                                 chunk_rec->stripes[i].offset,
10978                                                 dev_extent_rec->objectid,
10979                                                 dev_extent_rec->offset,
10980                                                 dev_extent_rec->length);
10981                                 ret = -1;
10982                         } else {
10983                                 list_move(&dev_extent_rec->chunk_list,
10984                                           &chunk_rec->dextents);
10985                         }
10986                 } else {
10987                         if (!silent)
10988                                 fprintf(stderr,
10989                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10990                                         chunk_rec->objectid,
10991                                         chunk_rec->type,
10992                                         chunk_rec->offset,
10993                                         chunk_rec->stripes[i].devid,
10994                                         chunk_rec->stripes[i].offset);
10995                         ret = -1;
10996                 }
10997         }
10998         return ret;
10999 }
11000
11001 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
11002 int check_chunks(struct cache_tree *chunk_cache,
11003                  struct block_group_tree *block_group_cache,
11004                  struct device_extent_tree *dev_extent_cache,
11005                  struct list_head *good, struct list_head *bad,
11006                  struct list_head *rebuild, int silent)
11007 {
11008         struct cache_extent *chunk_item;
11009         struct chunk_record *chunk_rec;
11010         struct block_group_record *bg_rec;
11011         struct device_extent_record *dext_rec;
11012         int err;
11013         int ret = 0;
11014
11015         chunk_item = first_cache_extent(chunk_cache);
11016         while (chunk_item) {
11017                 chunk_rec = container_of(chunk_item, struct chunk_record,
11018                                          cache);
11019                 err = check_chunk_refs(chunk_rec, block_group_cache,
11020                                        dev_extent_cache, silent);
11021                 if (err < 0)
11022                         ret = err;
11023                 if (err == 0 && good)
11024                         list_add_tail(&chunk_rec->list, good);
11025                 if (err > 0 && rebuild)
11026                         list_add_tail(&chunk_rec->list, rebuild);
11027                 if (err < 0 && bad)
11028                         list_add_tail(&chunk_rec->list, bad);
11029                 chunk_item = next_cache_extent(chunk_item);
11030         }
11031
11032         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
11033                 if (!silent)
11034                         fprintf(stderr,
11035                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
11036                                 bg_rec->objectid,
11037                                 bg_rec->offset,
11038                                 bg_rec->flags);
11039                 if (!ret)
11040                         ret = 1;
11041         }
11042
11043         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
11044                             chunk_list) {
11045                 if (!silent)
11046                         fprintf(stderr,
11047                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
11048                                 dext_rec->objectid,
11049                                 dext_rec->offset,
11050                                 dext_rec->length);
11051                 if (!ret)
11052                         ret = 1;
11053         }
11054         return ret;
11055 }
11056
11057
11058 static int check_device_used(struct device_record *dev_rec,
11059                              struct device_extent_tree *dext_cache)
11060 {
11061         struct cache_extent *cache;
11062         struct device_extent_record *dev_extent_rec;
11063         u64 total_byte = 0;
11064
11065         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
11066         while (cache) {
11067                 dev_extent_rec = container_of(cache,
11068                                               struct device_extent_record,
11069                                               cache);
11070                 if (dev_extent_rec->objectid != dev_rec->devid)
11071                         break;
11072
11073                 list_del_init(&dev_extent_rec->device_list);
11074                 total_byte += dev_extent_rec->length;
11075                 cache = next_cache_extent(cache);
11076         }
11077
11078         if (total_byte != dev_rec->byte_used) {
11079                 fprintf(stderr,
11080                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
11081                         total_byte, dev_rec->byte_used, dev_rec->objectid,
11082                         dev_rec->type, dev_rec->offset);
11083                 return -1;
11084         } else {
11085                 return 0;
11086         }
11087 }
11088
11089 /* check btrfs_dev_item -> btrfs_dev_extent */
11090 static int check_devices(struct rb_root *dev_cache,
11091                          struct device_extent_tree *dev_extent_cache)
11092 {
11093         struct rb_node *dev_node;
11094         struct device_record *dev_rec;
11095         struct device_extent_record *dext_rec;
11096         int err;
11097         int ret = 0;
11098
11099         dev_node = rb_first(dev_cache);
11100         while (dev_node) {
11101                 dev_rec = container_of(dev_node, struct device_record, node);
11102                 err = check_device_used(dev_rec, dev_extent_cache);
11103                 if (err)
11104                         ret = err;
11105
11106                 dev_node = rb_next(dev_node);
11107         }
11108         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11109                             device_list) {
11110                 fprintf(stderr,
11111                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11112                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
11113                 if (!ret)
11114                         ret = 1;
11115         }
11116         return ret;
11117 }
11118
11119 static int add_root_item_to_list(struct list_head *head,
11120                                   u64 objectid, u64 bytenr, u64 last_snapshot,
11121                                   u8 level, u8 drop_level,
11122                                   struct btrfs_key *drop_key)
11123 {
11124
11125         struct root_item_record *ri_rec;
11126         ri_rec = malloc(sizeof(*ri_rec));
11127         if (!ri_rec)
11128                 return -ENOMEM;
11129         ri_rec->bytenr = bytenr;
11130         ri_rec->objectid = objectid;
11131         ri_rec->level = level;
11132         ri_rec->drop_level = drop_level;
11133         ri_rec->last_snapshot = last_snapshot;
11134         if (drop_key)
11135                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11136         list_add_tail(&ri_rec->list, head);
11137
11138         return 0;
11139 }
11140
11141 static void free_root_item_list(struct list_head *list)
11142 {
11143         struct root_item_record *ri_rec;
11144
11145         while (!list_empty(list)) {
11146                 ri_rec = list_first_entry(list, struct root_item_record,
11147                                           list);
11148                 list_del_init(&ri_rec->list);
11149                 free(ri_rec);
11150         }
11151 }
11152
11153 static int deal_root_from_list(struct list_head *list,
11154                                struct btrfs_root *root,
11155                                struct block_info *bits,
11156                                int bits_nr,
11157                                struct cache_tree *pending,
11158                                struct cache_tree *seen,
11159                                struct cache_tree *reada,
11160                                struct cache_tree *nodes,
11161                                struct cache_tree *extent_cache,
11162                                struct cache_tree *chunk_cache,
11163                                struct rb_root *dev_cache,
11164                                struct block_group_tree *block_group_cache,
11165                                struct device_extent_tree *dev_extent_cache)
11166 {
11167         int ret = 0;
11168         u64 last;
11169
11170         while (!list_empty(list)) {
11171                 struct root_item_record *rec;
11172                 struct extent_buffer *buf;
11173                 rec = list_entry(list->next,
11174                                  struct root_item_record, list);
11175                 last = 0;
11176                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11177                 if (!extent_buffer_uptodate(buf)) {
11178                         free_extent_buffer(buf);
11179                         ret = -EIO;
11180                         break;
11181                 }
11182                 ret = add_root_to_pending(buf, extent_cache, pending,
11183                                     seen, nodes, rec->objectid);
11184                 if (ret < 0)
11185                         break;
11186                 /*
11187                  * To rebuild extent tree, we need deal with snapshot
11188                  * one by one, otherwise we deal with node firstly which
11189                  * can maximize readahead.
11190                  */
11191                 while (1) {
11192                         ret = run_next_block(root, bits, bits_nr, &last,
11193                                              pending, seen, reada, nodes,
11194                                              extent_cache, chunk_cache,
11195                                              dev_cache, block_group_cache,
11196                                              dev_extent_cache, rec);
11197                         if (ret != 0)
11198                                 break;
11199                 }
11200                 free_extent_buffer(buf);
11201                 list_del(&rec->list);
11202                 free(rec);
11203                 if (ret < 0)
11204                         break;
11205         }
11206         while (ret >= 0) {
11207                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11208                                      reada, nodes, extent_cache, chunk_cache,
11209                                      dev_cache, block_group_cache,
11210                                      dev_extent_cache, NULL);
11211                 if (ret != 0) {
11212                         if (ret > 0)
11213                                 ret = 0;
11214                         break;
11215                 }
11216         }
11217         return ret;
11218 }
11219
11220 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11221 {
11222         struct rb_root dev_cache;
11223         struct cache_tree chunk_cache;
11224         struct block_group_tree block_group_cache;
11225         struct device_extent_tree dev_extent_cache;
11226         struct cache_tree extent_cache;
11227         struct cache_tree seen;
11228         struct cache_tree pending;
11229         struct cache_tree reada;
11230         struct cache_tree nodes;
11231         struct extent_io_tree excluded_extents;
11232         struct cache_tree corrupt_blocks;
11233         struct btrfs_path path;
11234         struct btrfs_key key;
11235         struct btrfs_key found_key;
11236         int ret, err = 0;
11237         struct block_info *bits;
11238         int bits_nr;
11239         struct extent_buffer *leaf;
11240         int slot;
11241         struct btrfs_root_item ri;
11242         struct list_head dropping_trees;
11243         struct list_head normal_trees;
11244         struct btrfs_root *root1;
11245         struct btrfs_root *root;
11246         u64 objectid;
11247         u8 level;
11248
11249         root = fs_info->fs_root;
11250         dev_cache = RB_ROOT;
11251         cache_tree_init(&chunk_cache);
11252         block_group_tree_init(&block_group_cache);
11253         device_extent_tree_init(&dev_extent_cache);
11254
11255         cache_tree_init(&extent_cache);
11256         cache_tree_init(&seen);
11257         cache_tree_init(&pending);
11258         cache_tree_init(&nodes);
11259         cache_tree_init(&reada);
11260         cache_tree_init(&corrupt_blocks);
11261         extent_io_tree_init(&excluded_extents);
11262         INIT_LIST_HEAD(&dropping_trees);
11263         INIT_LIST_HEAD(&normal_trees);
11264
11265         if (repair) {
11266                 fs_info->excluded_extents = &excluded_extents;
11267                 fs_info->fsck_extent_cache = &extent_cache;
11268                 fs_info->free_extent_hook = free_extent_hook;
11269                 fs_info->corrupt_blocks = &corrupt_blocks;
11270         }
11271
11272         bits_nr = 1024;
11273         bits = malloc(bits_nr * sizeof(struct block_info));
11274         if (!bits) {
11275                 perror("malloc");
11276                 exit(1);
11277         }
11278
11279         if (ctx.progress_enabled) {
11280                 ctx.tp = TASK_EXTENTS;
11281                 task_start(ctx.info);
11282         }
11283
11284 again:
11285         root1 = fs_info->tree_root;
11286         level = btrfs_header_level(root1->node);
11287         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11288                                     root1->node->start, 0, level, 0, NULL);
11289         if (ret < 0)
11290                 goto out;
11291         root1 = fs_info->chunk_root;
11292         level = btrfs_header_level(root1->node);
11293         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11294                                     root1->node->start, 0, level, 0, NULL);
11295         if (ret < 0)
11296                 goto out;
11297         btrfs_init_path(&path);
11298         key.offset = 0;
11299         key.objectid = 0;
11300         key.type = BTRFS_ROOT_ITEM_KEY;
11301         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11302         if (ret < 0)
11303                 goto out;
11304         while(1) {
11305                 leaf = path.nodes[0];
11306                 slot = path.slots[0];
11307                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11308                         ret = btrfs_next_leaf(root, &path);
11309                         if (ret != 0)
11310                                 break;
11311                         leaf = path.nodes[0];
11312                         slot = path.slots[0];
11313                 }
11314                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11315                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11316                         unsigned long offset;
11317                         u64 last_snapshot;
11318
11319                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11320                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11321                         last_snapshot = btrfs_root_last_snapshot(&ri);
11322                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11323                                 level = btrfs_root_level(&ri);
11324                                 ret = add_root_item_to_list(&normal_trees,
11325                                                 found_key.objectid,
11326                                                 btrfs_root_bytenr(&ri),
11327                                                 last_snapshot, level,
11328                                                 0, NULL);
11329                                 if (ret < 0)
11330                                         goto out;
11331                         } else {
11332                                 level = btrfs_root_level(&ri);
11333                                 objectid = found_key.objectid;
11334                                 btrfs_disk_key_to_cpu(&found_key,
11335                                                       &ri.drop_progress);
11336                                 ret = add_root_item_to_list(&dropping_trees,
11337                                                 objectid,
11338                                                 btrfs_root_bytenr(&ri),
11339                                                 last_snapshot, level,
11340                                                 ri.drop_level, &found_key);
11341                                 if (ret < 0)
11342                                         goto out;
11343                         }
11344                 }
11345                 path.slots[0]++;
11346         }
11347         btrfs_release_path(&path);
11348
11349         /*
11350          * check_block can return -EAGAIN if it fixes something, please keep
11351          * this in mind when dealing with return values from these functions, if
11352          * we get -EAGAIN we want to fall through and restart the loop.
11353          */
11354         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11355                                   &seen, &reada, &nodes, &extent_cache,
11356                                   &chunk_cache, &dev_cache, &block_group_cache,
11357                                   &dev_extent_cache);
11358         if (ret < 0) {
11359                 if (ret == -EAGAIN)
11360                         goto loop;
11361                 goto out;
11362         }
11363         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11364                                   &pending, &seen, &reada, &nodes,
11365                                   &extent_cache, &chunk_cache, &dev_cache,
11366                                   &block_group_cache, &dev_extent_cache);
11367         if (ret < 0) {
11368                 if (ret == -EAGAIN)
11369                         goto loop;
11370                 goto out;
11371         }
11372
11373         ret = check_chunks(&chunk_cache, &block_group_cache,
11374                            &dev_extent_cache, NULL, NULL, NULL, 0);
11375         if (ret) {
11376                 if (ret == -EAGAIN)
11377                         goto loop;
11378                 err = ret;
11379         }
11380
11381         ret = check_extent_refs(root, &extent_cache);
11382         if (ret < 0) {
11383                 if (ret == -EAGAIN)
11384                         goto loop;
11385                 goto out;
11386         }
11387
11388         ret = check_devices(&dev_cache, &dev_extent_cache);
11389         if (ret && err)
11390                 ret = err;
11391
11392 out:
11393         task_stop(ctx.info);
11394         if (repair) {
11395                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11396                 extent_io_tree_cleanup(&excluded_extents);
11397                 fs_info->fsck_extent_cache = NULL;
11398                 fs_info->free_extent_hook = NULL;
11399                 fs_info->corrupt_blocks = NULL;
11400                 fs_info->excluded_extents = NULL;
11401         }
11402         free(bits);
11403         free_chunk_cache_tree(&chunk_cache);
11404         free_device_cache_tree(&dev_cache);
11405         free_block_group_tree(&block_group_cache);
11406         free_device_extent_tree(&dev_extent_cache);
11407         free_extent_cache_tree(&seen);
11408         free_extent_cache_tree(&pending);
11409         free_extent_cache_tree(&reada);
11410         free_extent_cache_tree(&nodes);
11411         free_root_item_list(&normal_trees);
11412         free_root_item_list(&dropping_trees);
11413         return ret;
11414 loop:
11415         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11416         free_extent_cache_tree(&seen);
11417         free_extent_cache_tree(&pending);
11418         free_extent_cache_tree(&reada);
11419         free_extent_cache_tree(&nodes);
11420         free_chunk_cache_tree(&chunk_cache);
11421         free_block_group_tree(&block_group_cache);
11422         free_device_cache_tree(&dev_cache);
11423         free_device_extent_tree(&dev_extent_cache);
11424         free_extent_record_cache(&extent_cache);
11425         free_root_item_list(&normal_trees);
11426         free_root_item_list(&dropping_trees);
11427         extent_io_tree_cleanup(&excluded_extents);
11428         goto again;
11429 }
11430
11431 /*
11432  * Check backrefs of a tree block given by @bytenr or @eb.
11433  *
11434  * @root:       the root containing the @bytenr or @eb
11435  * @eb:         tree block extent buffer, can be NULL
11436  * @bytenr:     bytenr of the tree block to search
11437  * @level:      tree level of the tree block
11438  * @owner:      owner of the tree block
11439  *
11440  * Return >0 for any error found and output error message
11441  * Return 0 for no error found
11442  */
11443 static int check_tree_block_ref(struct btrfs_root *root,
11444                                 struct extent_buffer *eb, u64 bytenr,
11445                                 int level, u64 owner, struct node_refs *nrefs)
11446 {
11447         struct btrfs_key key;
11448         struct btrfs_root *extent_root = root->fs_info->extent_root;
11449         struct btrfs_path path;
11450         struct btrfs_extent_item *ei;
11451         struct btrfs_extent_inline_ref *iref;
11452         struct extent_buffer *leaf;
11453         unsigned long end;
11454         unsigned long ptr;
11455         int slot;
11456         int skinny_level;
11457         int root_level = btrfs_header_level(root->node);
11458         int type;
11459         u32 nodesize = root->fs_info->nodesize;
11460         u32 item_size;
11461         u64 offset;
11462         int tree_reloc_root = 0;
11463         int found_ref = 0;
11464         int err = 0;
11465         int ret;
11466         int strict = 1;
11467         int parent = 0;
11468
11469         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
11470             btrfs_header_bytenr(root->node) == bytenr)
11471                 tree_reloc_root = 1;
11472         btrfs_init_path(&path);
11473         key.objectid = bytenr;
11474         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11475                 key.type = BTRFS_METADATA_ITEM_KEY;
11476         else
11477                 key.type = BTRFS_EXTENT_ITEM_KEY;
11478         key.offset = (u64)-1;
11479
11480         /* Search for the backref in extent tree */
11481         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11482         if (ret < 0) {
11483                 err |= BACKREF_MISSING;
11484                 goto out;
11485         }
11486         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11487         if (ret) {
11488                 err |= BACKREF_MISSING;
11489                 goto out;
11490         }
11491
11492         leaf = path.nodes[0];
11493         slot = path.slots[0];
11494         btrfs_item_key_to_cpu(leaf, &key, slot);
11495
11496         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11497
11498         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11499                 skinny_level = (int)key.offset;
11500                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11501         } else {
11502                 struct btrfs_tree_block_info *info;
11503
11504                 info = (struct btrfs_tree_block_info *)(ei + 1);
11505                 skinny_level = btrfs_tree_block_level(leaf, info);
11506                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11507         }
11508
11509
11510         if (eb) {
11511                 u64 header_gen;
11512                 u64 extent_gen;
11513
11514                 /*
11515                  * Due to the feature of shared tree blocks, if the upper node
11516                  * is a fs root or shared node, the extent of checked node may
11517                  * not be updated until the next CoW.
11518                  */
11519                 if (nrefs)
11520                         strict = should_check_extent_strictly(root, nrefs,
11521                                         level);
11522                 if (!(btrfs_extent_flags(leaf, ei) &
11523                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11524                         error(
11525                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11526                                 key.objectid, nodesize,
11527                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11528                         err = BACKREF_MISMATCH;
11529                 }
11530                 header_gen = btrfs_header_generation(eb);
11531                 extent_gen = btrfs_extent_generation(leaf, ei);
11532                 if (header_gen != extent_gen) {
11533                         error(
11534         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11535                                 key.objectid, nodesize, header_gen,
11536                                 extent_gen);
11537                         err = BACKREF_MISMATCH;
11538                 }
11539                 if (level != skinny_level) {
11540                         error(
11541                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11542                                 key.objectid, nodesize, level, skinny_level);
11543                         err = BACKREF_MISMATCH;
11544                 }
11545                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11546                         error(
11547                         "extent[%llu %u] is referred by other roots than %llu",
11548                                 key.objectid, nodesize, root->objectid);
11549                         err = BACKREF_MISMATCH;
11550                 }
11551         }
11552
11553         /*
11554          * Iterate the extent/metadata item to find the exact backref
11555          */
11556         item_size = btrfs_item_size_nr(leaf, slot);
11557         ptr = (unsigned long)iref;
11558         end = (unsigned long)ei + item_size;
11559
11560         while (ptr < end) {
11561                 iref = (struct btrfs_extent_inline_ref *)ptr;
11562                 type = btrfs_extent_inline_ref_type(leaf, iref);
11563                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11564
11565                 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11566                         if (offset == root->objectid)
11567                                 found_ref = 1;
11568                         if (!strict && owner == offset)
11569                                 found_ref = 1;
11570                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11571                         /*
11572                          * Backref of tree reloc root points to itself, no need
11573                          * to check backref any more.
11574                          */
11575                         if (tree_reloc_root) {
11576                                 found_ref = 1;
11577                         } else {
11578                                 /*
11579                                  * Check if the backref points to valid
11580                                  * referencer
11581                                  */
11582                                 found_ref = !check_tree_block_ref( root, NULL,
11583                                                 offset, level + 1, owner,
11584                                                 NULL);
11585                         }
11586                 }
11587
11588                 if (found_ref)
11589                         break;
11590                 ptr += btrfs_extent_inline_ref_size(type);
11591         }
11592
11593         /*
11594          * Inlined extent item doesn't have what we need, check
11595          * TREE_BLOCK_REF_KEY
11596          */
11597         if (!found_ref) {
11598                 btrfs_release_path(&path);
11599                 key.objectid = bytenr;
11600                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11601                 key.offset = root->objectid;
11602
11603                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11604                 if (!ret)
11605                         found_ref = 1;
11606         }
11607         if (!found_ref)
11608                 err |= BACKREF_MISSING;
11609 out:
11610         btrfs_release_path(&path);
11611         if (nrefs && strict &&
11612             level < root_level && nrefs->full_backref[level + 1])
11613                 parent = nrefs->bytenr[level + 1];
11614         if (eb && (err & BACKREF_MISSING))
11615                 error(
11616         "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11617                       bytenr, nodesize, owner, level,
11618                       parent ? "parent" : "root",
11619                       parent ? parent : root->objectid);
11620         return err;
11621 }
11622
11623 /*
11624  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11625  *
11626  * Return >0 any error found and output error message
11627  * Return 0 for no error found
11628  */
11629 static int check_extent_data_item(struct btrfs_root *root,
11630                                   struct btrfs_path *pathp,
11631                                   struct node_refs *nrefs,  int account_bytes)
11632 {
11633         struct btrfs_file_extent_item *fi;
11634         struct extent_buffer *eb = pathp->nodes[0];
11635         struct btrfs_path path;
11636         struct btrfs_root *extent_root = root->fs_info->extent_root;
11637         struct btrfs_key fi_key;
11638         struct btrfs_key dbref_key;
11639         struct extent_buffer *leaf;
11640         struct btrfs_extent_item *ei;
11641         struct btrfs_extent_inline_ref *iref;
11642         struct btrfs_extent_data_ref *dref;
11643         u64 owner;
11644         u64 disk_bytenr;
11645         u64 disk_num_bytes;
11646         u64 extent_num_bytes;
11647         u64 extent_flags;
11648         u32 item_size;
11649         unsigned long end;
11650         unsigned long ptr;
11651         int type;
11652         u64 ref_root;
11653         int found_dbackref = 0;
11654         int slot = pathp->slots[0];
11655         int err = 0;
11656         int ret;
11657         int strict;
11658
11659         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11660         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11661
11662         /* Nothing to check for hole and inline data extents */
11663         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11664             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11665                 return 0;
11666
11667         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11668         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11669         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11670
11671         /* Check unaligned disk_num_bytes and num_bytes */
11672         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11673                 error(
11674 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11675                         fi_key.objectid, fi_key.offset, disk_num_bytes,
11676                         root->fs_info->sectorsize);
11677                 err |= BYTES_UNALIGNED;
11678         } else if (account_bytes) {
11679                 data_bytes_allocated += disk_num_bytes;
11680         }
11681         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11682                 error(
11683 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11684                         fi_key.objectid, fi_key.offset, extent_num_bytes,
11685                         root->fs_info->sectorsize);
11686                 err |= BYTES_UNALIGNED;
11687         } else if (account_bytes) {
11688                 data_bytes_referenced += extent_num_bytes;
11689         }
11690         owner = btrfs_header_owner(eb);
11691
11692         /* Check the extent item of the file extent in extent tree */
11693         btrfs_init_path(&path);
11694         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11695         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11696         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11697
11698         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11699         if (ret)
11700                 goto out;
11701
11702         leaf = path.nodes[0];
11703         slot = path.slots[0];
11704         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11705
11706         extent_flags = btrfs_extent_flags(leaf, ei);
11707
11708         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11709                 error(
11710                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11711                     disk_bytenr, disk_num_bytes,
11712                     BTRFS_EXTENT_FLAG_DATA);
11713                 err |= BACKREF_MISMATCH;
11714         }
11715
11716         /* Check data backref inside that extent item */
11717         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11718         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11719         ptr = (unsigned long)iref;
11720         end = (unsigned long)ei + item_size;
11721         strict = should_check_extent_strictly(root, nrefs, -1);
11722
11723         while (ptr < end) {
11724                 iref = (struct btrfs_extent_inline_ref *)ptr;
11725                 type = btrfs_extent_inline_ref_type(leaf, iref);
11726                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11727
11728                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11729                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
11730                         if (ref_root == root->objectid)
11731                                 found_dbackref = 1;
11732                         else if (!strict && owner == ref_root)
11733                                 found_dbackref = 1;
11734                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11735                         found_dbackref = !check_tree_block_ref(root, NULL,
11736                                 btrfs_extent_inline_ref_offset(leaf, iref),
11737                                 0, owner, NULL);
11738                 }
11739
11740                 if (found_dbackref)
11741                         break;
11742                 ptr += btrfs_extent_inline_ref_size(type);
11743         }
11744
11745         if (!found_dbackref) {
11746                 btrfs_release_path(&path);
11747
11748                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11749                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11750                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11751                 dbref_key.offset = hash_extent_data_ref(root->objectid,
11752                                 fi_key.objectid, fi_key.offset);
11753
11754                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11755                                         &dbref_key, &path, 0, 0);
11756                 if (!ret) {
11757                         found_dbackref = 1;
11758                         goto out;
11759                 }
11760
11761                 btrfs_release_path(&path);
11762
11763                 /*
11764                  * Neither inlined nor EXTENT_DATA_REF found, try
11765                  * SHARED_DATA_REF as last chance.
11766                  */
11767                 dbref_key.objectid = disk_bytenr;
11768                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11769                 dbref_key.offset = eb->start;
11770
11771                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11772                                         &dbref_key, &path, 0, 0);
11773                 if (!ret) {
11774                         found_dbackref = 1;
11775                         goto out;
11776                 }
11777         }
11778
11779 out:
11780         if (!found_dbackref)
11781                 err |= BACKREF_MISSING;
11782         btrfs_release_path(&path);
11783         if (err & BACKREF_MISSING) {
11784                 error("data extent[%llu %llu] backref lost",
11785                       disk_bytenr, disk_num_bytes);
11786         }
11787         return err;
11788 }
11789
11790 /*
11791  * Get real tree block level for the case like shared block
11792  * Return >= 0 as tree level
11793  * Return <0 for error
11794  */
11795 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11796 {
11797         struct extent_buffer *eb;
11798         struct btrfs_path path;
11799         struct btrfs_key key;
11800         struct btrfs_extent_item *ei;
11801         u64 flags;
11802         u64 transid;
11803         u8 backref_level;
11804         u8 header_level;
11805         int ret;
11806
11807         /* Search extent tree for extent generation and level */
11808         key.objectid = bytenr;
11809         key.type = BTRFS_METADATA_ITEM_KEY;
11810         key.offset = (u64)-1;
11811
11812         btrfs_init_path(&path);
11813         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11814         if (ret < 0)
11815                 goto release_out;
11816         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11817         if (ret < 0)
11818                 goto release_out;
11819         if (ret > 0) {
11820                 ret = -ENOENT;
11821                 goto release_out;
11822         }
11823
11824         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11825         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11826                             struct btrfs_extent_item);
11827         flags = btrfs_extent_flags(path.nodes[0], ei);
11828         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11829                 ret = -ENOENT;
11830                 goto release_out;
11831         }
11832
11833         /* Get transid for later read_tree_block() check */
11834         transid = btrfs_extent_generation(path.nodes[0], ei);
11835
11836         /* Get backref level as one source */
11837         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11838                 backref_level = key.offset;
11839         } else {
11840                 struct btrfs_tree_block_info *info;
11841
11842                 info = (struct btrfs_tree_block_info *)(ei + 1);
11843                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11844         }
11845         btrfs_release_path(&path);
11846
11847         /* Get level from tree block as an alternative source */
11848         eb = read_tree_block(fs_info, bytenr, transid);
11849         if (!extent_buffer_uptodate(eb)) {
11850                 free_extent_buffer(eb);
11851                 return -EIO;
11852         }
11853         header_level = btrfs_header_level(eb);
11854         free_extent_buffer(eb);
11855
11856         if (header_level != backref_level)
11857                 return -EIO;
11858         return header_level;
11859
11860 release_out:
11861         btrfs_release_path(&path);
11862         return ret;
11863 }
11864
11865 /*
11866  * Check if a tree block backref is valid (points to a valid tree block)
11867  * if level == -1, level will be resolved
11868  * Return >0 for any error found and print error message
11869  */
11870 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11871                                     u64 bytenr, int level)
11872 {
11873         struct btrfs_root *root;
11874         struct btrfs_key key;
11875         struct btrfs_path path;
11876         struct extent_buffer *eb;
11877         struct extent_buffer *node;
11878         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11879         int err = 0;
11880         int ret;
11881
11882         /* Query level for level == -1 special case */
11883         if (level == -1)
11884                 level = query_tree_block_level(fs_info, bytenr);
11885         if (level < 0) {
11886                 err |= REFERENCER_MISSING;
11887                 goto out;
11888         }
11889
11890         key.objectid = root_id;
11891         key.type = BTRFS_ROOT_ITEM_KEY;
11892         key.offset = (u64)-1;
11893
11894         root = btrfs_read_fs_root(fs_info, &key);
11895         if (IS_ERR(root)) {
11896                 err |= REFERENCER_MISSING;
11897                 goto out;
11898         }
11899
11900         /* Read out the tree block to get item/node key */
11901         eb = read_tree_block(fs_info, bytenr, 0);
11902         if (!extent_buffer_uptodate(eb)) {
11903                 err |= REFERENCER_MISSING;
11904                 free_extent_buffer(eb);
11905                 goto out;
11906         }
11907
11908         /* Empty tree, no need to check key */
11909         if (!btrfs_header_nritems(eb) && !level) {
11910                 free_extent_buffer(eb);
11911                 goto out;
11912         }
11913
11914         if (level)
11915                 btrfs_node_key_to_cpu(eb, &key, 0);
11916         else
11917                 btrfs_item_key_to_cpu(eb, &key, 0);
11918
11919         free_extent_buffer(eb);
11920
11921         btrfs_init_path(&path);
11922         path.lowest_level = level;
11923         /* Search with the first key, to ensure we can reach it */
11924         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11925         if (ret < 0) {
11926                 err |= REFERENCER_MISSING;
11927                 goto release_out;
11928         }
11929
11930         node = path.nodes[level];
11931         if (btrfs_header_bytenr(node) != bytenr) {
11932                 error(
11933         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11934                         bytenr, nodesize, bytenr,
11935                         btrfs_header_bytenr(node));
11936                 err |= REFERENCER_MISMATCH;
11937         }
11938         if (btrfs_header_level(node) != level) {
11939                 error(
11940         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11941                         bytenr, nodesize, level,
11942                         btrfs_header_level(node));
11943                 err |= REFERENCER_MISMATCH;
11944         }
11945
11946 release_out:
11947         btrfs_release_path(&path);
11948 out:
11949         if (err & REFERENCER_MISSING) {
11950                 if (level < 0)
11951                         error("extent [%llu %d] lost referencer (owner: %llu)",
11952                                 bytenr, nodesize, root_id);
11953                 else
11954                         error(
11955                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11956                                 bytenr, nodesize, root_id, level);
11957         }
11958
11959         return err;
11960 }
11961
11962 /*
11963  * Check if tree block @eb is tree reloc root.
11964  * Return 0 if it's not or any problem happens
11965  * Return 1 if it's a tree reloc root
11966  */
11967 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11968                                  struct extent_buffer *eb)
11969 {
11970         struct btrfs_root *tree_reloc_root;
11971         struct btrfs_key key;
11972         u64 bytenr = btrfs_header_bytenr(eb);
11973         u64 owner = btrfs_header_owner(eb);
11974         int ret = 0;
11975
11976         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11977         key.offset = owner;
11978         key.type = BTRFS_ROOT_ITEM_KEY;
11979
11980         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11981         if (IS_ERR(tree_reloc_root))
11982                 return 0;
11983
11984         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11985                 ret = 1;
11986         btrfs_free_fs_root(tree_reloc_root);
11987         return ret;
11988 }
11989
11990 /*
11991  * Check referencer for shared block backref
11992  * If level == -1, this function will resolve the level.
11993  */
11994 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11995                                      u64 parent, u64 bytenr, int level)
11996 {
11997         struct extent_buffer *eb;
11998         u32 nr;
11999         int found_parent = 0;
12000         int i;
12001
12002         eb = read_tree_block(fs_info, parent, 0);
12003         if (!extent_buffer_uptodate(eb))
12004                 goto out;
12005
12006         if (level == -1)
12007                 level = query_tree_block_level(fs_info, bytenr);
12008         if (level < 0)
12009                 goto out;
12010
12011         /* It's possible it's a tree reloc root */
12012         if (parent == bytenr) {
12013                 if (is_tree_reloc_root(fs_info, eb))
12014                         found_parent = 1;
12015                 goto out;
12016         }
12017
12018         if (level + 1 != btrfs_header_level(eb))
12019                 goto out;
12020
12021         nr = btrfs_header_nritems(eb);
12022         for (i = 0; i < nr; i++) {
12023                 if (bytenr == btrfs_node_blockptr(eb, i)) {
12024                         found_parent = 1;
12025                         break;
12026                 }
12027         }
12028 out:
12029         free_extent_buffer(eb);
12030         if (!found_parent) {
12031                 error(
12032         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12033                         bytenr, fs_info->nodesize, parent, level);
12034                 return REFERENCER_MISSING;
12035         }
12036         return 0;
12037 }
12038
12039 /*
12040  * Check referencer for normal (inlined) data ref
12041  * If len == 0, it will be resolved by searching in extent tree
12042  */
12043 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12044                                      u64 root_id, u64 objectid, u64 offset,
12045                                      u64 bytenr, u64 len, u32 count)
12046 {
12047         struct btrfs_root *root;
12048         struct btrfs_root *extent_root = fs_info->extent_root;
12049         struct btrfs_key key;
12050         struct btrfs_path path;
12051         struct extent_buffer *leaf;
12052         struct btrfs_file_extent_item *fi;
12053         u32 found_count = 0;
12054         int slot;
12055         int ret = 0;
12056
12057         if (!len) {
12058                 key.objectid = bytenr;
12059                 key.type = BTRFS_EXTENT_ITEM_KEY;
12060                 key.offset = (u64)-1;
12061
12062                 btrfs_init_path(&path);
12063                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12064                 if (ret < 0)
12065                         goto out;
12066                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12067                 if (ret)
12068                         goto out;
12069                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12070                 if (key.objectid != bytenr ||
12071                     key.type != BTRFS_EXTENT_ITEM_KEY)
12072                         goto out;
12073                 len = key.offset;
12074                 btrfs_release_path(&path);
12075         }
12076         key.objectid = root_id;
12077         key.type = BTRFS_ROOT_ITEM_KEY;
12078         key.offset = (u64)-1;
12079         btrfs_init_path(&path);
12080
12081         root = btrfs_read_fs_root(fs_info, &key);
12082         if (IS_ERR(root))
12083                 goto out;
12084
12085         key.objectid = objectid;
12086         key.type = BTRFS_EXTENT_DATA_KEY;
12087         /*
12088          * It can be nasty as data backref offset is
12089          * file offset - file extent offset, which is smaller or
12090          * equal to original backref offset.  The only special case is
12091          * overflow.  So we need to special check and do further search.
12092          */
12093         key.offset = offset & (1ULL << 63) ? 0 : offset;
12094
12095         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12096         if (ret < 0)
12097                 goto out;
12098
12099         /*
12100          * Search afterwards to get correct one
12101          * NOTE: As we must do a comprehensive check on the data backref to
12102          * make sure the dref count also matches, we must iterate all file
12103          * extents for that inode.
12104          */
12105         while (1) {
12106                 leaf = path.nodes[0];
12107                 slot = path.slots[0];
12108
12109                 if (slot >= btrfs_header_nritems(leaf))
12110                         goto next;
12111                 btrfs_item_key_to_cpu(leaf, &key, slot);
12112                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12113                         break;
12114                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12115                 /*
12116                  * Except normal disk bytenr and disk num bytes, we still
12117                  * need to do extra check on dbackref offset as
12118                  * dbackref offset = file_offset - file_extent_offset
12119                  */
12120                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12121                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12122                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12123                     offset)
12124                         found_count++;
12125
12126 next:
12127                 ret = btrfs_next_item(root, &path);
12128                 if (ret)
12129                         break;
12130         }
12131 out:
12132         btrfs_release_path(&path);
12133         if (found_count != count) {
12134                 error(
12135 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12136                         bytenr, len, root_id, objectid, offset, count, found_count);
12137                 return REFERENCER_MISSING;
12138         }
12139         return 0;
12140 }
12141
12142 /*
12143  * Check if the referencer of a shared data backref exists
12144  */
12145 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12146                                      u64 parent, u64 bytenr)
12147 {
12148         struct extent_buffer *eb;
12149         struct btrfs_key key;
12150         struct btrfs_file_extent_item *fi;
12151         u32 nr;
12152         int found_parent = 0;
12153         int i;
12154
12155         eb = read_tree_block(fs_info, parent, 0);
12156         if (!extent_buffer_uptodate(eb))
12157                 goto out;
12158
12159         nr = btrfs_header_nritems(eb);
12160         for (i = 0; i < nr; i++) {
12161                 btrfs_item_key_to_cpu(eb, &key, i);
12162                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12163                         continue;
12164
12165                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12166                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12167                         continue;
12168
12169                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12170                         found_parent = 1;
12171                         break;
12172                 }
12173         }
12174
12175 out:
12176         free_extent_buffer(eb);
12177         if (!found_parent) {
12178                 error("shared extent %llu referencer lost (parent: %llu)",
12179                         bytenr, parent);
12180                 return REFERENCER_MISSING;
12181         }
12182         return 0;
12183 }
12184
12185 /*
12186  * Only delete backref if REFERENCER_MISSING now
12187  *
12188  * Returns <0   the extent was deleted
12189  * Returns >0   the backref was deleted but extent still exists, returned value
12190  *               means error after repair
12191  * Returns  0   nothing happened
12192  */
12193 static int repair_extent_item(struct btrfs_trans_handle *trans,
12194                       struct btrfs_root *root, struct btrfs_path *path,
12195                       u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12196                       u64 owner, u64 offset, int err)
12197 {
12198         struct btrfs_key old_key;
12199         int freed = 0;
12200         int ret;
12201
12202         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12203
12204         if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12205                 /* delete the backref */
12206                 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12207                           num_bytes, parent, root_objectid, owner, offset);
12208                 if (!ret) {
12209                         freed = 1;
12210                         err &= ~REFERENCER_MISSING;
12211                         printf("Delete backref in extent [%llu %llu]\n",
12212                                bytenr, num_bytes);
12213                 } else {
12214                         error("fail to delete backref in extent [%llu %llu]",
12215                                bytenr, num_bytes);
12216                 }
12217         }
12218
12219         /* btrfs_free_extent may delete the extent */
12220         btrfs_release_path(path);
12221         ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12222
12223         if (ret)
12224                 ret = -ENOENT;
12225         else if (freed)
12226                 ret = err;
12227         return ret;
12228 }
12229
12230 /*
12231  * This function will check a given extent item, including its backref and
12232  * itself (like crossing stripe boundary and type)
12233  *
12234  * Since we don't use extent_record anymore, introduce new error bit
12235  */
12236 static int check_extent_item(struct btrfs_trans_handle *trans,
12237                              struct btrfs_fs_info *fs_info,
12238                              struct btrfs_path *path)
12239 {
12240         struct btrfs_extent_item *ei;
12241         struct btrfs_extent_inline_ref *iref;
12242         struct btrfs_extent_data_ref *dref;
12243         struct extent_buffer *eb = path->nodes[0];
12244         unsigned long end;
12245         unsigned long ptr;
12246         int slot = path->slots[0];
12247         int type;
12248         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12249         u32 item_size = btrfs_item_size_nr(eb, slot);
12250         u64 flags;
12251         u64 offset;
12252         u64 parent;
12253         u64 num_bytes;
12254         u64 root_objectid;
12255         u64 owner;
12256         u64 owner_offset;
12257         int metadata = 0;
12258         int level;
12259         struct btrfs_key key;
12260         int ret;
12261         int err = 0;
12262
12263         btrfs_item_key_to_cpu(eb, &key, slot);
12264         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12265                 bytes_used += key.offset;
12266                 num_bytes = key.offset;
12267         } else {
12268                 bytes_used += nodesize;
12269                 num_bytes = nodesize;
12270         }
12271
12272         if (item_size < sizeof(*ei)) {
12273                 /*
12274                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12275                  * old thing when on disk format is still un-determined.
12276                  * No need to care about it anymore
12277                  */
12278                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12279                 return -ENOTTY;
12280         }
12281
12282         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12283         flags = btrfs_extent_flags(eb, ei);
12284
12285         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12286                 metadata = 1;
12287         if (metadata && check_crossing_stripes(global_info, key.objectid,
12288                                                eb->len)) {
12289                 error("bad metadata [%llu, %llu) crossing stripe boundary",
12290                       key.objectid, key.objectid + nodesize);
12291                 err |= CROSSING_STRIPE_BOUNDARY;
12292         }
12293
12294         ptr = (unsigned long)(ei + 1);
12295
12296         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12297                 /* Old EXTENT_ITEM metadata */
12298                 struct btrfs_tree_block_info *info;
12299
12300                 info = (struct btrfs_tree_block_info *)ptr;
12301                 level = btrfs_tree_block_level(eb, info);
12302                 ptr += sizeof(struct btrfs_tree_block_info);
12303         } else {
12304                 /* New METADATA_ITEM */
12305                 level = key.offset;
12306         }
12307         end = (unsigned long)ei + item_size;
12308
12309 next:
12310         /* Reached extent item end normally */
12311         if (ptr == end)
12312                 goto out;
12313
12314         /* Beyond extent item end, wrong item size */
12315         if (ptr > end) {
12316                 err |= ITEM_SIZE_MISMATCH;
12317                 error("extent item at bytenr %llu slot %d has wrong size",
12318                         eb->start, slot);
12319                 goto out;
12320         }
12321
12322         parent = 0;
12323         root_objectid = 0;
12324         owner = 0;
12325         owner_offset = 0;
12326         /* Now check every backref in this extent item */
12327         iref = (struct btrfs_extent_inline_ref *)ptr;
12328         type = btrfs_extent_inline_ref_type(eb, iref);
12329         offset = btrfs_extent_inline_ref_offset(eb, iref);
12330         switch (type) {
12331         case BTRFS_TREE_BLOCK_REF_KEY:
12332                 root_objectid = offset;
12333                 owner = level;
12334                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12335                                                level);
12336                 err |= ret;
12337                 break;
12338         case BTRFS_SHARED_BLOCK_REF_KEY:
12339                 parent = offset;
12340                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12341                                                  level);
12342                 err |= ret;
12343                 break;
12344         case BTRFS_EXTENT_DATA_REF_KEY:
12345                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12346                 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12347                 owner = btrfs_extent_data_ref_objectid(eb, dref);
12348                 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12349                 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12350                                         owner_offset, key.objectid, key.offset,
12351                                         btrfs_extent_data_ref_count(eb, dref));
12352                 err |= ret;
12353                 break;
12354         case BTRFS_SHARED_DATA_REF_KEY:
12355                 parent = offset;
12356                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12357                 err |= ret;
12358                 break;
12359         default:
12360                 error("extent[%llu %d %llu] has unknown ref type: %d",
12361                         key.objectid, key.type, key.offset, type);
12362                 ret = UNKNOWN_TYPE;
12363                 err |= ret;
12364                 goto out;
12365         }
12366
12367         if (err && repair) {
12368                 ret = repair_extent_item(trans, fs_info->extent_root, path,
12369                          key.objectid, num_bytes, parent, root_objectid,
12370                          owner, owner_offset, ret);
12371                 if (ret < 0)
12372                         goto out;
12373                 if (ret) {
12374                         goto next;
12375                         err = ret;
12376                 }
12377         }
12378
12379         ptr += btrfs_extent_inline_ref_size(type);
12380         goto next;
12381
12382 out:
12383         return err;
12384 }
12385
12386 /*
12387  * Check if a dev extent item is referred correctly by its chunk
12388  */
12389 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12390                                  struct extent_buffer *eb, int slot)
12391 {
12392         struct btrfs_root *chunk_root = fs_info->chunk_root;
12393         struct btrfs_dev_extent *ptr;
12394         struct btrfs_path path;
12395         struct btrfs_key chunk_key;
12396         struct btrfs_key devext_key;
12397         struct btrfs_chunk *chunk;
12398         struct extent_buffer *l;
12399         int num_stripes;
12400         u64 length;
12401         int i;
12402         int found_chunk = 0;
12403         int ret;
12404
12405         btrfs_item_key_to_cpu(eb, &devext_key, slot);
12406         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12407         length = btrfs_dev_extent_length(eb, ptr);
12408
12409         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12410         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12411         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12412
12413         btrfs_init_path(&path);
12414         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12415         if (ret)
12416                 goto out;
12417
12418         l = path.nodes[0];
12419         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12420         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12421                                       chunk_key.offset);
12422         if (ret < 0)
12423                 goto out;
12424
12425         if (btrfs_stripe_length(fs_info, l, chunk) != length)
12426                 goto out;
12427
12428         num_stripes = btrfs_chunk_num_stripes(l, chunk);
12429         for (i = 0; i < num_stripes; i++) {
12430                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12431                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12432
12433                 if (devid == devext_key.objectid &&
12434                     offset == devext_key.offset) {
12435                         found_chunk = 1;
12436                         break;
12437                 }
12438         }
12439 out:
12440         btrfs_release_path(&path);
12441         if (!found_chunk) {
12442                 error(
12443                 "device extent[%llu, %llu, %llu] did not find the related chunk",
12444                         devext_key.objectid, devext_key.offset, length);
12445                 return REFERENCER_MISSING;
12446         }
12447         return 0;
12448 }
12449
12450 /*
12451  * Check if the used space is correct with the dev item
12452  */
12453 static int check_dev_item(struct btrfs_fs_info *fs_info,
12454                           struct extent_buffer *eb, int slot)
12455 {
12456         struct btrfs_root *dev_root = fs_info->dev_root;
12457         struct btrfs_dev_item *dev_item;
12458         struct btrfs_path path;
12459         struct btrfs_key key;
12460         struct btrfs_dev_extent *ptr;
12461         u64 dev_id;
12462         u64 used;
12463         u64 total = 0;
12464         int ret;
12465
12466         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12467         dev_id = btrfs_device_id(eb, dev_item);
12468         used = btrfs_device_bytes_used(eb, dev_item);
12469
12470         key.objectid = dev_id;
12471         key.type = BTRFS_DEV_EXTENT_KEY;
12472         key.offset = 0;
12473
12474         btrfs_init_path(&path);
12475         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12476         if (ret < 0) {
12477                 btrfs_item_key_to_cpu(eb, &key, slot);
12478                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12479                         key.objectid, key.type, key.offset);
12480                 btrfs_release_path(&path);
12481                 return REFERENCER_MISSING;
12482         }
12483
12484         /* Iterate dev_extents to calculate the used space of a device */
12485         while (1) {
12486                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12487                         goto next;
12488
12489                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12490                 if (key.objectid > dev_id)
12491                         break;
12492                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12493                         goto next;
12494
12495                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12496                                      struct btrfs_dev_extent);
12497                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12498 next:
12499                 ret = btrfs_next_item(dev_root, &path);
12500                 if (ret)
12501                         break;
12502         }
12503         btrfs_release_path(&path);
12504
12505         if (used != total) {
12506                 btrfs_item_key_to_cpu(eb, &key, slot);
12507                 error(
12508 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12509                         total, used, BTRFS_ROOT_TREE_OBJECTID,
12510                         BTRFS_DEV_EXTENT_KEY, dev_id);
12511                 return ACCOUNTING_MISMATCH;
12512         }
12513         return 0;
12514 }
12515
12516 /*
12517  * Check a block group item with its referener (chunk) and its used space
12518  * with extent/metadata item
12519  */
12520 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12521                                   struct extent_buffer *eb, int slot)
12522 {
12523         struct btrfs_root *extent_root = fs_info->extent_root;
12524         struct btrfs_root *chunk_root = fs_info->chunk_root;
12525         struct btrfs_block_group_item *bi;
12526         struct btrfs_block_group_item bg_item;
12527         struct btrfs_path path;
12528         struct btrfs_key bg_key;
12529         struct btrfs_key chunk_key;
12530         struct btrfs_key extent_key;
12531         struct btrfs_chunk *chunk;
12532         struct extent_buffer *leaf;
12533         struct btrfs_extent_item *ei;
12534         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12535         u64 flags;
12536         u64 bg_flags;
12537         u64 used;
12538         u64 total = 0;
12539         int ret;
12540         int err = 0;
12541
12542         btrfs_item_key_to_cpu(eb, &bg_key, slot);
12543         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12544         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12545         used = btrfs_block_group_used(&bg_item);
12546         bg_flags = btrfs_block_group_flags(&bg_item);
12547
12548         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12549         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12550         chunk_key.offset = bg_key.objectid;
12551
12552         btrfs_init_path(&path);
12553         /* Search for the referencer chunk */
12554         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12555         if (ret) {
12556                 error(
12557                 "block group[%llu %llu] did not find the related chunk item",
12558                         bg_key.objectid, bg_key.offset);
12559                 err |= REFERENCER_MISSING;
12560         } else {
12561                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12562                                         struct btrfs_chunk);
12563                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12564                                                 bg_key.offset) {
12565                         error(
12566         "block group[%llu %llu] related chunk item length does not match",
12567                                 bg_key.objectid, bg_key.offset);
12568                         err |= REFERENCER_MISMATCH;
12569                 }
12570         }
12571         btrfs_release_path(&path);
12572
12573         /* Search from the block group bytenr */
12574         extent_key.objectid = bg_key.objectid;
12575         extent_key.type = 0;
12576         extent_key.offset = 0;
12577
12578         btrfs_init_path(&path);
12579         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12580         if (ret < 0)
12581                 goto out;
12582
12583         /* Iterate extent tree to account used space */
12584         while (1) {
12585                 leaf = path.nodes[0];
12586
12587                 /* Search slot can point to the last item beyond leaf nritems */
12588                 if (path.slots[0] >= btrfs_header_nritems(leaf))
12589                         goto next;
12590
12591                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12592                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12593                         break;
12594
12595                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12596                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12597                         goto next;
12598                 if (extent_key.objectid < bg_key.objectid)
12599                         goto next;
12600
12601                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12602                         total += nodesize;
12603                 else
12604                         total += extent_key.offset;
12605
12606                 ei = btrfs_item_ptr(leaf, path.slots[0],
12607                                     struct btrfs_extent_item);
12608                 flags = btrfs_extent_flags(leaf, ei);
12609                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12610                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12611                                 error(
12612                         "bad extent[%llu, %llu) type mismatch with chunk",
12613                                         extent_key.objectid,
12614                                         extent_key.objectid + extent_key.offset);
12615                                 err |= CHUNK_TYPE_MISMATCH;
12616                         }
12617                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12618                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12619                                     BTRFS_BLOCK_GROUP_METADATA))) {
12620                                 error(
12621                         "bad extent[%llu, %llu) type mismatch with chunk",
12622                                         extent_key.objectid,
12623                                         extent_key.objectid + nodesize);
12624                                 err |= CHUNK_TYPE_MISMATCH;
12625                         }
12626                 }
12627 next:
12628                 ret = btrfs_next_item(extent_root, &path);
12629                 if (ret)
12630                         break;
12631         }
12632
12633 out:
12634         btrfs_release_path(&path);
12635
12636         if (total != used) {
12637                 error(
12638                 "block group[%llu %llu] used %llu but extent items used %llu",
12639                         bg_key.objectid, bg_key.offset, used, total);
12640                 err |= ACCOUNTING_MISMATCH;
12641         }
12642         return err;
12643 }
12644
12645 /*
12646  * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12647  * FIXME: We still need to repair error of dev_item.
12648  *
12649  * Returns error after repair.
12650  */
12651 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12652                              struct btrfs_root *chunk_root,
12653                              struct btrfs_path *path, int err)
12654 {
12655         struct btrfs_chunk *chunk;
12656         struct btrfs_key chunk_key;
12657         struct extent_buffer *eb = path->nodes[0];
12658         u64 length;
12659         int slot = path->slots[0];
12660         u64 type;
12661         int ret = 0;
12662
12663         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12664         if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12665                 return err;
12666         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12667         type = btrfs_chunk_type(path->nodes[0], chunk);
12668         length = btrfs_chunk_length(eb, chunk);
12669
12670         if (err & REFERENCER_MISSING) {
12671                 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12672                      type, chunk_key.objectid, chunk_key.offset, length);
12673                 if (ret) {
12674                         error("fail to add block group item[%llu %llu]",
12675                               chunk_key.offset, length);
12676                         goto out;
12677                 } else {
12678                         err &= ~REFERENCER_MISSING;
12679                         printf("Added block group item[%llu %llu]\n",
12680                                chunk_key.offset, length);
12681                 }
12682         }
12683
12684 out:
12685         return err;
12686 }
12687
12688 /*
12689  * Check a chunk item.
12690  * Including checking all referred dev_extents and block group
12691  */
12692 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12693                             struct extent_buffer *eb, int slot)
12694 {
12695         struct btrfs_root *extent_root = fs_info->extent_root;
12696         struct btrfs_root *dev_root = fs_info->dev_root;
12697         struct btrfs_path path;
12698         struct btrfs_key chunk_key;
12699         struct btrfs_key bg_key;
12700         struct btrfs_key devext_key;
12701         struct btrfs_chunk *chunk;
12702         struct extent_buffer *leaf;
12703         struct btrfs_block_group_item *bi;
12704         struct btrfs_block_group_item bg_item;
12705         struct btrfs_dev_extent *ptr;
12706         u64 length;
12707         u64 chunk_end;
12708         u64 stripe_len;
12709         u64 type;
12710         int num_stripes;
12711         u64 offset;
12712         u64 objectid;
12713         int i;
12714         int ret;
12715         int err = 0;
12716
12717         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12718         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12719         length = btrfs_chunk_length(eb, chunk);
12720         chunk_end = chunk_key.offset + length;
12721         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12722                                       chunk_key.offset);
12723         if (ret < 0) {
12724                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12725                         chunk_end);
12726                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12727                 goto out;
12728         }
12729         type = btrfs_chunk_type(eb, chunk);
12730
12731         bg_key.objectid = chunk_key.offset;
12732         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12733         bg_key.offset = length;
12734
12735         btrfs_init_path(&path);
12736         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12737         if (ret) {
12738                 error(
12739                 "chunk[%llu %llu) did not find the related block group item",
12740                         chunk_key.offset, chunk_end);
12741                 err |= REFERENCER_MISSING;
12742         } else{
12743                 leaf = path.nodes[0];
12744                 bi = btrfs_item_ptr(leaf, path.slots[0],
12745                                     struct btrfs_block_group_item);
12746                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12747                                    sizeof(bg_item));
12748                 if (btrfs_block_group_flags(&bg_item) != type) {
12749                         error(
12750 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12751                                 chunk_key.offset, chunk_end, type,
12752                                 btrfs_block_group_flags(&bg_item));
12753                         err |= REFERENCER_MISSING;
12754                 }
12755         }
12756
12757         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12758         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12759         for (i = 0; i < num_stripes; i++) {
12760                 btrfs_release_path(&path);
12761                 btrfs_init_path(&path);
12762                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12763                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12764                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12765
12766                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12767                                         0, 0);
12768                 if (ret)
12769                         goto not_match_dev;
12770
12771                 leaf = path.nodes[0];
12772                 ptr = btrfs_item_ptr(leaf, path.slots[0],
12773                                      struct btrfs_dev_extent);
12774                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12775                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12776                 if (objectid != chunk_key.objectid ||
12777                     offset != chunk_key.offset ||
12778                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12779                         goto not_match_dev;
12780                 continue;
12781 not_match_dev:
12782                 err |= BACKREF_MISSING;
12783                 error(
12784                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12785                         chunk_key.objectid, chunk_end, i);
12786                 continue;
12787         }
12788         btrfs_release_path(&path);
12789 out:
12790         return err;
12791 }
12792
12793 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
12794                                    struct btrfs_root *root,
12795                                    struct btrfs_path *path)
12796 {
12797         struct btrfs_key key;
12798         int ret = 0;
12799
12800         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
12801         btrfs_release_path(path);
12802         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
12803         if (ret) {
12804                 ret = -ENOENT;
12805                 goto out;
12806         }
12807
12808         ret = btrfs_del_item(trans, root, path);
12809         if (ret)
12810                 goto out;
12811
12812         if (path->slots[0] == 0)
12813                 btrfs_prev_leaf(root, path);
12814         else
12815                 path->slots[0]--;
12816 out:
12817         if (ret)
12818                 error("failed to delete root %llu item[%llu, %u, %llu]",
12819                       root->objectid, key.objectid, key.type, key.offset);
12820         else
12821                 printf("Deleted root %llu item[%llu, %u, %llu]\n",
12822                        root->objectid, key.objectid, key.type, key.offset);
12823         return ret;
12824 }
12825
12826 /*
12827  * Main entry function to check known items and update related accounting info
12828  */
12829 static int check_leaf_items(struct btrfs_trans_handle *trans,
12830                             struct btrfs_root *root, struct btrfs_path *path,
12831                             struct node_refs *nrefs, int account_bytes)
12832 {
12833         struct btrfs_fs_info *fs_info = root->fs_info;
12834         struct btrfs_key key;
12835         struct extent_buffer *eb;
12836         int slot;
12837         int type;
12838         struct btrfs_extent_data_ref *dref;
12839         int ret = 0;
12840         int err = 0;
12841
12842 again:
12843         eb = path->nodes[0];
12844         slot = path->slots[0];
12845         if (slot >= btrfs_header_nritems(eb)) {
12846                 if (slot == 0) {
12847                         error("empty leaf [%llu %u] root %llu", eb->start,
12848                                 root->fs_info->nodesize, root->objectid);
12849                         err |= EIO;
12850                 }
12851                 goto out;
12852         }
12853
12854         btrfs_item_key_to_cpu(eb, &key, slot);
12855         type = key.type;
12856
12857         switch (type) {
12858         case BTRFS_EXTENT_DATA_KEY:
12859                 ret = check_extent_data_item(root, path, nrefs, account_bytes);
12860                 err |= ret;
12861                 break;
12862         case BTRFS_BLOCK_GROUP_ITEM_KEY:
12863                 ret = check_block_group_item(fs_info, eb, slot);
12864                 if (repair &&
12865                     ret & REFERENCER_MISSING)
12866                         ret = delete_extent_tree_item(trans, root, path);
12867                 err |= ret;
12868                 break;
12869         case BTRFS_DEV_ITEM_KEY:
12870                 ret = check_dev_item(fs_info, eb, slot);
12871                 err |= ret;
12872                 break;
12873         case BTRFS_CHUNK_ITEM_KEY:
12874                 ret = check_chunk_item(fs_info, eb, slot);
12875                 if (repair && ret)
12876                         ret = repair_chunk_item(trans, root, path, ret);
12877                 err |= ret;
12878                 break;
12879         case BTRFS_DEV_EXTENT_KEY:
12880                 ret = check_dev_extent_item(fs_info, eb, slot);
12881                 err |= ret;
12882                 break;
12883         case BTRFS_EXTENT_ITEM_KEY:
12884         case BTRFS_METADATA_ITEM_KEY:
12885                 ret = check_extent_item(trans, fs_info, path);
12886                 err |= ret;
12887                 break;
12888         case BTRFS_EXTENT_CSUM_KEY:
12889                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12890                 err |= ret;
12891                 break;
12892         case BTRFS_TREE_BLOCK_REF_KEY:
12893                 ret = check_tree_block_backref(fs_info, key.offset,
12894                                                key.objectid, -1);
12895                 if (repair &&
12896                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12897                         ret = delete_extent_tree_item(trans, root, path);
12898                 err |= ret;
12899                 break;
12900         case BTRFS_EXTENT_DATA_REF_KEY:
12901                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12902                 ret = check_extent_data_backref(fs_info,
12903                                 btrfs_extent_data_ref_root(eb, dref),
12904                                 btrfs_extent_data_ref_objectid(eb, dref),
12905                                 btrfs_extent_data_ref_offset(eb, dref),
12906                                 key.objectid, 0,
12907                                 btrfs_extent_data_ref_count(eb, dref));
12908                 if (repair &&
12909                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12910                         ret = delete_extent_tree_item(trans, root, path);
12911                 err |= ret;
12912                 break;
12913         case BTRFS_SHARED_BLOCK_REF_KEY:
12914                 ret = check_shared_block_backref(fs_info, key.offset,
12915                                                  key.objectid, -1);
12916                 if (repair &&
12917                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12918                         ret = delete_extent_tree_item(trans, root, path);
12919                 err |= ret;
12920                 break;
12921         case BTRFS_SHARED_DATA_REF_KEY:
12922                 ret = check_shared_data_backref(fs_info, key.offset,
12923                                                 key.objectid);
12924                 if (repair &&
12925                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12926                         ret = delete_extent_tree_item(trans, root, path);
12927                 err |= ret;
12928                 break;
12929         default:
12930                 break;
12931         }
12932
12933         ++path->slots[0];
12934         goto again;
12935 out:
12936         return err;
12937 }
12938
12939 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info);
12940
12941 /*
12942  * Low memory usage version check_chunks_and_extents.
12943  */
12944 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12945 {
12946         struct btrfs_trans_handle *trans = NULL;
12947         struct btrfs_path path;
12948         struct btrfs_key old_key;
12949         struct btrfs_key key;
12950         struct btrfs_root *root1;
12951         struct btrfs_root *root;
12952         struct btrfs_root *cur_root;
12953         int err = 0;
12954         int ret;
12955
12956         root = fs_info->fs_root;
12957
12958         if (repair) {
12959                 /* pin every tree block to avoid extent overwrite */
12960                 ret = pin_metadata_blocks(fs_info);
12961                 if (ret) {
12962                         error("failed to pin metadata blocks");
12963                         return ret;
12964                 }
12965                 trans = btrfs_start_transaction(fs_info->extent_root, 1);
12966                 if (IS_ERR(trans)) {
12967                         error("failed to start transaction before check");
12968                         return PTR_ERR(trans);
12969                 }
12970         }
12971
12972         root1 = root->fs_info->chunk_root;
12973         ret = check_btrfs_root(trans, root1, 0, 1);
12974         err |= ret;
12975
12976         root1 = root->fs_info->tree_root;
12977         ret = check_btrfs_root(trans, root1, 0, 1);
12978         err |= ret;
12979
12980         btrfs_init_path(&path);
12981         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12982         key.offset = 0;
12983         key.type = BTRFS_ROOT_ITEM_KEY;
12984
12985         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12986         if (ret) {
12987                 error("cannot find extent tree in tree_root");
12988                 goto out;
12989         }
12990
12991         while (1) {
12992                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12993                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12994                         goto next;
12995                 old_key = key;
12996                 key.offset = (u64)-1;
12997
12998                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12999                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13000                                         &key);
13001                 else
13002                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
13003                 if (IS_ERR(cur_root) || !cur_root) {
13004                         error("failed to read tree: %lld", key.objectid);
13005                         goto next;
13006                 }
13007
13008                 ret = check_btrfs_root(trans, cur_root, 0, 1);
13009                 err |= ret;
13010
13011                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13012                         btrfs_free_fs_root(cur_root);
13013
13014                 btrfs_release_path(&path);
13015                 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13016                                         &old_key, &path, 0, 0);
13017                 if (ret)
13018                         goto out;
13019 next:
13020                 ret = btrfs_next_item(root1, &path);
13021                 if (ret)
13022                         goto out;
13023         }
13024 out:
13025
13026         /* if repair, update block accounting */
13027         if (repair) {
13028                 ret = btrfs_fix_block_accounting(trans, root);
13029                 if (ret)
13030                         err |= ret;
13031         }
13032
13033         if (trans)
13034                 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13035
13036         btrfs_release_path(&path);
13037
13038         return err;
13039 }
13040
13041 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13042 {
13043         int ret;
13044
13045         if (!ctx.progress_enabled)
13046                 fprintf(stderr, "checking extents\n");
13047         if (check_mode == CHECK_MODE_LOWMEM)
13048                 ret = check_chunks_and_extents_v2(fs_info);
13049         else
13050                 ret = check_chunks_and_extents(fs_info);
13051
13052         return ret;
13053 }
13054
13055 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13056                            struct btrfs_root *root, int overwrite)
13057 {
13058         struct extent_buffer *c;
13059         struct extent_buffer *old = root->node;
13060         int level;
13061         int ret;
13062         struct btrfs_disk_key disk_key = {0,0,0};
13063
13064         level = 0;
13065
13066         if (overwrite) {
13067                 c = old;
13068                 extent_buffer_get(c);
13069                 goto init;
13070         }
13071         c = btrfs_alloc_free_block(trans, root,
13072                                    root->fs_info->nodesize,
13073                                    root->root_key.objectid,
13074                                    &disk_key, level, 0, 0);
13075         if (IS_ERR(c)) {
13076                 c = old;
13077                 extent_buffer_get(c);
13078                 overwrite = 1;
13079         }
13080 init:
13081         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13082         btrfs_set_header_level(c, level);
13083         btrfs_set_header_bytenr(c, c->start);
13084         btrfs_set_header_generation(c, trans->transid);
13085         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13086         btrfs_set_header_owner(c, root->root_key.objectid);
13087
13088         write_extent_buffer(c, root->fs_info->fsid,
13089                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
13090
13091         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13092                             btrfs_header_chunk_tree_uuid(c),
13093                             BTRFS_UUID_SIZE);
13094
13095         btrfs_mark_buffer_dirty(c);
13096         /*
13097          * this case can happen in the following case:
13098          *
13099          * 1.overwrite previous root.
13100          *
13101          * 2.reinit reloc data root, this is because we skip pin
13102          * down reloc data tree before which means we can allocate
13103          * same block bytenr here.
13104          */
13105         if (old->start == c->start) {
13106                 btrfs_set_root_generation(&root->root_item,
13107                                           trans->transid);
13108                 root->root_item.level = btrfs_header_level(root->node);
13109                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13110                                         &root->root_key, &root->root_item);
13111                 if (ret) {
13112                         free_extent_buffer(c);
13113                         return ret;
13114                 }
13115         }
13116         free_extent_buffer(old);
13117         root->node = c;
13118         add_root_to_dirty_list(root);
13119         return 0;
13120 }
13121
13122 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13123                                 struct extent_buffer *eb, int tree_root)
13124 {
13125         struct extent_buffer *tmp;
13126         struct btrfs_root_item *ri;
13127         struct btrfs_key key;
13128         u64 bytenr;
13129         int level = btrfs_header_level(eb);
13130         int nritems;
13131         int ret;
13132         int i;
13133
13134         /*
13135          * If we have pinned this block before, don't pin it again.
13136          * This can not only avoid forever loop with broken filesystem
13137          * but also give us some speedups.
13138          */
13139         if (test_range_bit(&fs_info->pinned_extents, eb->start,
13140                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13141                 return 0;
13142
13143         btrfs_pin_extent(fs_info, eb->start, eb->len);
13144
13145         nritems = btrfs_header_nritems(eb);
13146         for (i = 0; i < nritems; i++) {
13147                 if (level == 0) {
13148                         btrfs_item_key_to_cpu(eb, &key, i);
13149                         if (key.type != BTRFS_ROOT_ITEM_KEY)
13150                                 continue;
13151                         /* Skip the extent root and reloc roots */
13152                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13153                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13154                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13155                                 continue;
13156                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13157                         bytenr = btrfs_disk_root_bytenr(eb, ri);
13158
13159                         /*
13160                          * If at any point we start needing the real root we
13161                          * will have to build a stump root for the root we are
13162                          * in, but for now this doesn't actually use the root so
13163                          * just pass in extent_root.
13164                          */
13165                         tmp = read_tree_block(fs_info, bytenr, 0);
13166                         if (!extent_buffer_uptodate(tmp)) {
13167                                 fprintf(stderr, "Error reading root block\n");
13168                                 return -EIO;
13169                         }
13170                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
13171                         free_extent_buffer(tmp);
13172                         if (ret)
13173                                 return ret;
13174                 } else {
13175                         bytenr = btrfs_node_blockptr(eb, i);
13176
13177                         /* If we aren't the tree root don't read the block */
13178                         if (level == 1 && !tree_root) {
13179                                 btrfs_pin_extent(fs_info, bytenr,
13180                                                 fs_info->nodesize);
13181                                 continue;
13182                         }
13183
13184                         tmp = read_tree_block(fs_info, bytenr, 0);
13185                         if (!extent_buffer_uptodate(tmp)) {
13186                                 fprintf(stderr, "Error reading tree block\n");
13187                                 return -EIO;
13188                         }
13189                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13190                         free_extent_buffer(tmp);
13191                         if (ret)
13192                                 return ret;
13193                 }
13194         }
13195
13196         return 0;
13197 }
13198
13199 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13200 {
13201         int ret;
13202
13203         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13204         if (ret)
13205                 return ret;
13206
13207         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13208 }
13209
13210 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13211 {
13212         struct btrfs_block_group_cache *cache;
13213         struct btrfs_path path;
13214         struct extent_buffer *leaf;
13215         struct btrfs_chunk *chunk;
13216         struct btrfs_key key;
13217         int ret;
13218         u64 start;
13219
13220         btrfs_init_path(&path);
13221         key.objectid = 0;
13222         key.type = BTRFS_CHUNK_ITEM_KEY;
13223         key.offset = 0;
13224         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13225         if (ret < 0) {
13226                 btrfs_release_path(&path);
13227                 return ret;
13228         }
13229
13230         /*
13231          * We do this in case the block groups were screwed up and had alloc
13232          * bits that aren't actually set on the chunks.  This happens with
13233          * restored images every time and could happen in real life I guess.
13234          */
13235         fs_info->avail_data_alloc_bits = 0;
13236         fs_info->avail_metadata_alloc_bits = 0;
13237         fs_info->avail_system_alloc_bits = 0;
13238
13239         /* First we need to create the in-memory block groups */
13240         while (1) {
13241                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13242                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13243                         if (ret < 0) {
13244                                 btrfs_release_path(&path);
13245                                 return ret;
13246                         }
13247                         if (ret) {
13248                                 ret = 0;
13249                                 break;
13250                         }
13251                 }
13252                 leaf = path.nodes[0];
13253                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13254                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13255                         path.slots[0]++;
13256                         continue;
13257                 }
13258
13259                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13260                 btrfs_add_block_group(fs_info, 0,
13261                                       btrfs_chunk_type(leaf, chunk),
13262                                       key.objectid, key.offset,
13263                                       btrfs_chunk_length(leaf, chunk));
13264                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13265                                  key.offset + btrfs_chunk_length(leaf, chunk));
13266                 path.slots[0]++;
13267         }
13268         start = 0;
13269         while (1) {
13270                 cache = btrfs_lookup_first_block_group(fs_info, start);
13271                 if (!cache)
13272                         break;
13273                 cache->cached = 1;
13274                 start = cache->key.objectid + cache->key.offset;
13275         }
13276
13277         btrfs_release_path(&path);
13278         return 0;
13279 }
13280
13281 static int reset_balance(struct btrfs_trans_handle *trans,
13282                          struct btrfs_fs_info *fs_info)
13283 {
13284         struct btrfs_root *root = fs_info->tree_root;
13285         struct btrfs_path path;
13286         struct extent_buffer *leaf;
13287         struct btrfs_key key;
13288         int del_slot, del_nr = 0;
13289         int ret;
13290         int found = 0;
13291
13292         btrfs_init_path(&path);
13293         key.objectid = BTRFS_BALANCE_OBJECTID;
13294         key.type = BTRFS_BALANCE_ITEM_KEY;
13295         key.offset = 0;
13296         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13297         if (ret) {
13298                 if (ret > 0)
13299                         ret = 0;
13300                 if (!ret)
13301                         goto reinit_data_reloc;
13302                 else
13303                         goto out;
13304         }
13305
13306         ret = btrfs_del_item(trans, root, &path);
13307         if (ret)
13308                 goto out;
13309         btrfs_release_path(&path);
13310
13311         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13312         key.type = BTRFS_ROOT_ITEM_KEY;
13313         key.offset = 0;
13314         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13315         if (ret < 0)
13316                 goto out;
13317         while (1) {
13318                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13319                         if (!found)
13320                                 break;
13321
13322                         if (del_nr) {
13323                                 ret = btrfs_del_items(trans, root, &path,
13324                                                       del_slot, del_nr);
13325                                 del_nr = 0;
13326                                 if (ret)
13327                                         goto out;
13328                         }
13329                         key.offset++;
13330                         btrfs_release_path(&path);
13331
13332                         found = 0;
13333                         ret = btrfs_search_slot(trans, root, &key, &path,
13334                                                 -1, 1);
13335                         if (ret < 0)
13336                                 goto out;
13337                         continue;
13338                 }
13339                 found = 1;
13340                 leaf = path.nodes[0];
13341                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13342                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13343                         break;
13344                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13345                         path.slots[0]++;
13346                         continue;
13347                 }
13348                 if (!del_nr) {
13349                         del_slot = path.slots[0];
13350                         del_nr = 1;
13351                 } else {
13352                         del_nr++;
13353                 }
13354                 path.slots[0]++;
13355         }
13356
13357         if (del_nr) {
13358                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13359                 if (ret)
13360                         goto out;
13361         }
13362         btrfs_release_path(&path);
13363
13364 reinit_data_reloc:
13365         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13366         key.type = BTRFS_ROOT_ITEM_KEY;
13367         key.offset = (u64)-1;
13368         root = btrfs_read_fs_root(fs_info, &key);
13369         if (IS_ERR(root)) {
13370                 fprintf(stderr, "Error reading data reloc tree\n");
13371                 ret = PTR_ERR(root);
13372                 goto out;
13373         }
13374         record_root_in_trans(trans, root);
13375         ret = btrfs_fsck_reinit_root(trans, root, 0);
13376         if (ret)
13377                 goto out;
13378         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13379 out:
13380         btrfs_release_path(&path);
13381         return ret;
13382 }
13383
13384 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13385                               struct btrfs_fs_info *fs_info)
13386 {
13387         u64 start = 0;
13388         int ret;
13389
13390         /*
13391          * The only reason we don't do this is because right now we're just
13392          * walking the trees we find and pinning down their bytes, we don't look
13393          * at any of the leaves.  In order to do mixed groups we'd have to check
13394          * the leaves of any fs roots and pin down the bytes for any file
13395          * extents we find.  Not hard but why do it if we don't have to?
13396          */
13397         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13398                 fprintf(stderr, "We don't support re-initing the extent tree "
13399                         "for mixed block groups yet, please notify a btrfs "
13400                         "developer you want to do this so they can add this "
13401                         "functionality.\n");
13402                 return -EINVAL;
13403         }
13404
13405         /*
13406          * first we need to walk all of the trees except the extent tree and pin
13407          * down the bytes that are in use so we don't overwrite any existing
13408          * metadata.
13409          */
13410         ret = pin_metadata_blocks(fs_info);
13411         if (ret) {
13412                 fprintf(stderr, "error pinning down used bytes\n");
13413                 return ret;
13414         }
13415
13416         /*
13417          * Need to drop all the block groups since we're going to recreate all
13418          * of them again.
13419          */
13420         btrfs_free_block_groups(fs_info);
13421         ret = reset_block_groups(fs_info);
13422         if (ret) {
13423                 fprintf(stderr, "error resetting the block groups\n");
13424                 return ret;
13425         }
13426
13427         /* Ok we can allocate now, reinit the extent root */
13428         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13429         if (ret) {
13430                 fprintf(stderr, "extent root initialization failed\n");
13431                 /*
13432                  * When the transaction code is updated we should end the
13433                  * transaction, but for now progs only knows about commit so
13434                  * just return an error.
13435                  */
13436                 return ret;
13437         }
13438
13439         /*
13440          * Now we have all the in-memory block groups setup so we can make
13441          * allocations properly, and the metadata we care about is safe since we
13442          * pinned all of it above.
13443          */
13444         while (1) {
13445                 struct btrfs_block_group_cache *cache;
13446
13447                 cache = btrfs_lookup_first_block_group(fs_info, start);
13448                 if (!cache)
13449                         break;
13450                 start = cache->key.objectid + cache->key.offset;
13451                 ret = btrfs_insert_item(trans, fs_info->extent_root,
13452                                         &cache->key, &cache->item,
13453                                         sizeof(cache->item));
13454                 if (ret) {
13455                         fprintf(stderr, "Error adding block group\n");
13456                         return ret;
13457                 }
13458                 btrfs_extent_post_op(trans, fs_info->extent_root);
13459         }
13460
13461         ret = reset_balance(trans, fs_info);
13462         if (ret)
13463                 fprintf(stderr, "error resetting the pending balance\n");
13464
13465         return ret;
13466 }
13467
13468 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13469 {
13470         struct btrfs_path path;
13471         struct btrfs_trans_handle *trans;
13472         struct btrfs_key key;
13473         int ret;
13474
13475         printf("Recowing metadata block %llu\n", eb->start);
13476         key.objectid = btrfs_header_owner(eb);
13477         key.type = BTRFS_ROOT_ITEM_KEY;
13478         key.offset = (u64)-1;
13479
13480         root = btrfs_read_fs_root(root->fs_info, &key);
13481         if (IS_ERR(root)) {
13482                 fprintf(stderr, "Couldn't find owner root %llu\n",
13483                         key.objectid);
13484                 return PTR_ERR(root);
13485         }
13486
13487         trans = btrfs_start_transaction(root, 1);
13488         if (IS_ERR(trans))
13489                 return PTR_ERR(trans);
13490
13491         btrfs_init_path(&path);
13492         path.lowest_level = btrfs_header_level(eb);
13493         if (path.lowest_level)
13494                 btrfs_node_key_to_cpu(eb, &key, 0);
13495         else
13496                 btrfs_item_key_to_cpu(eb, &key, 0);
13497
13498         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13499         btrfs_commit_transaction(trans, root);
13500         btrfs_release_path(&path);
13501         return ret;
13502 }
13503
13504 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13505 {
13506         struct btrfs_path path;
13507         struct btrfs_trans_handle *trans;
13508         struct btrfs_key key;
13509         int ret;
13510
13511         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13512                bad->key.type, bad->key.offset);
13513         key.objectid = bad->root_id;
13514         key.type = BTRFS_ROOT_ITEM_KEY;
13515         key.offset = (u64)-1;
13516
13517         root = btrfs_read_fs_root(root->fs_info, &key);
13518         if (IS_ERR(root)) {
13519                 fprintf(stderr, "Couldn't find owner root %llu\n",
13520                         key.objectid);
13521                 return PTR_ERR(root);
13522         }
13523
13524         trans = btrfs_start_transaction(root, 1);
13525         if (IS_ERR(trans))
13526                 return PTR_ERR(trans);
13527
13528         btrfs_init_path(&path);
13529         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13530         if (ret) {
13531                 if (ret > 0)
13532                         ret = 0;
13533                 goto out;
13534         }
13535         ret = btrfs_del_item(trans, root, &path);
13536 out:
13537         btrfs_commit_transaction(trans, root);
13538         btrfs_release_path(&path);
13539         return ret;
13540 }
13541
13542 static int zero_log_tree(struct btrfs_root *root)
13543 {
13544         struct btrfs_trans_handle *trans;
13545         int ret;
13546
13547         trans = btrfs_start_transaction(root, 1);
13548         if (IS_ERR(trans)) {
13549                 ret = PTR_ERR(trans);
13550                 return ret;
13551         }
13552         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13553         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13554         ret = btrfs_commit_transaction(trans, root);
13555         return ret;
13556 }
13557
13558 static int populate_csum(struct btrfs_trans_handle *trans,
13559                          struct btrfs_root *csum_root, char *buf, u64 start,
13560                          u64 len)
13561 {
13562         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13563         u64 offset = 0;
13564         u64 sectorsize;
13565         int ret = 0;
13566
13567         while (offset < len) {
13568                 sectorsize = fs_info->sectorsize;
13569                 ret = read_extent_data(fs_info, buf, start + offset,
13570                                        &sectorsize, 0);
13571                 if (ret)
13572                         break;
13573                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13574                                             start + offset, buf, sectorsize);
13575                 if (ret)
13576                         break;
13577                 offset += sectorsize;
13578         }
13579         return ret;
13580 }
13581
13582 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13583                                       struct btrfs_root *csum_root,
13584                                       struct btrfs_root *cur_root)
13585 {
13586         struct btrfs_path path;
13587         struct btrfs_key key;
13588         struct extent_buffer *node;
13589         struct btrfs_file_extent_item *fi;
13590         char *buf = NULL;
13591         u64 start = 0;
13592         u64 len = 0;
13593         int slot = 0;
13594         int ret = 0;
13595
13596         buf = malloc(cur_root->fs_info->sectorsize);
13597         if (!buf)
13598                 return -ENOMEM;
13599
13600         btrfs_init_path(&path);
13601         key.objectid = 0;
13602         key.offset = 0;
13603         key.type = 0;
13604         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13605         if (ret < 0)
13606                 goto out;
13607         /* Iterate all regular file extents and fill its csum */
13608         while (1) {
13609                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13610
13611                 if (key.type != BTRFS_EXTENT_DATA_KEY)
13612                         goto next;
13613                 node = path.nodes[0];
13614                 slot = path.slots[0];
13615                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13616                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13617                         goto next;
13618                 start = btrfs_file_extent_disk_bytenr(node, fi);
13619                 len = btrfs_file_extent_disk_num_bytes(node, fi);
13620
13621                 ret = populate_csum(trans, csum_root, buf, start, len);
13622                 if (ret == -EEXIST)
13623                         ret = 0;
13624                 if (ret < 0)
13625                         goto out;
13626 next:
13627                 /*
13628                  * TODO: if next leaf is corrupted, jump to nearest next valid
13629                  * leaf.
13630                  */
13631                 ret = btrfs_next_item(cur_root, &path);
13632                 if (ret < 0)
13633                         goto out;
13634                 if (ret > 0) {
13635                         ret = 0;
13636                         goto out;
13637                 }
13638         }
13639
13640 out:
13641         btrfs_release_path(&path);
13642         free(buf);
13643         return ret;
13644 }
13645
13646 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13647                                   struct btrfs_root *csum_root)
13648 {
13649         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13650         struct btrfs_path path;
13651         struct btrfs_root *tree_root = fs_info->tree_root;
13652         struct btrfs_root *cur_root;
13653         struct extent_buffer *node;
13654         struct btrfs_key key;
13655         int slot = 0;
13656         int ret = 0;
13657
13658         btrfs_init_path(&path);
13659         key.objectid = BTRFS_FS_TREE_OBJECTID;
13660         key.offset = 0;
13661         key.type = BTRFS_ROOT_ITEM_KEY;
13662         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13663         if (ret < 0)
13664                 goto out;
13665         if (ret > 0) {
13666                 ret = -ENOENT;
13667                 goto out;
13668         }
13669
13670         while (1) {
13671                 node = path.nodes[0];
13672                 slot = path.slots[0];
13673                 btrfs_item_key_to_cpu(node, &key, slot);
13674                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13675                         goto out;
13676                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13677                         goto next;
13678                 if (!is_fstree(key.objectid))
13679                         goto next;
13680                 key.offset = (u64)-1;
13681
13682                 cur_root = btrfs_read_fs_root(fs_info, &key);
13683                 if (IS_ERR(cur_root) || !cur_root) {
13684                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13685                                 key.objectid);
13686                         goto out;
13687                 }
13688                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13689                                 cur_root);
13690                 if (ret < 0)
13691                         goto out;
13692 next:
13693                 ret = btrfs_next_item(tree_root, &path);
13694                 if (ret > 0) {
13695                         ret = 0;
13696                         goto out;
13697                 }
13698                 if (ret < 0)
13699                         goto out;
13700         }
13701
13702 out:
13703         btrfs_release_path(&path);
13704         return ret;
13705 }
13706
13707 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13708                                       struct btrfs_root *csum_root)
13709 {
13710         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13711         struct btrfs_path path;
13712         struct btrfs_extent_item *ei;
13713         struct extent_buffer *leaf;
13714         char *buf;
13715         struct btrfs_key key;
13716         int ret;
13717
13718         btrfs_init_path(&path);
13719         key.objectid = 0;
13720         key.type = BTRFS_EXTENT_ITEM_KEY;
13721         key.offset = 0;
13722         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13723         if (ret < 0) {
13724                 btrfs_release_path(&path);
13725                 return ret;
13726         }
13727
13728         buf = malloc(csum_root->fs_info->sectorsize);
13729         if (!buf) {
13730                 btrfs_release_path(&path);
13731                 return -ENOMEM;
13732         }
13733
13734         while (1) {
13735                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13736                         ret = btrfs_next_leaf(extent_root, &path);
13737                         if (ret < 0)
13738                                 break;
13739                         if (ret) {
13740                                 ret = 0;
13741                                 break;
13742                         }
13743                 }
13744                 leaf = path.nodes[0];
13745
13746                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13747                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13748                         path.slots[0]++;
13749                         continue;
13750                 }
13751
13752                 ei = btrfs_item_ptr(leaf, path.slots[0],
13753                                     struct btrfs_extent_item);
13754                 if (!(btrfs_extent_flags(leaf, ei) &
13755                       BTRFS_EXTENT_FLAG_DATA)) {
13756                         path.slots[0]++;
13757                         continue;
13758                 }
13759
13760                 ret = populate_csum(trans, csum_root, buf, key.objectid,
13761                                     key.offset);
13762                 if (ret)
13763                         break;
13764                 path.slots[0]++;
13765         }
13766
13767         btrfs_release_path(&path);
13768         free(buf);
13769         return ret;
13770 }
13771
13772 /*
13773  * Recalculate the csum and put it into the csum tree.
13774  *
13775  * Extent tree init will wipe out all the extent info, so in that case, we
13776  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
13777  * will use fs/subvol trees to init the csum tree.
13778  */
13779 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13780                           struct btrfs_root *csum_root,
13781                           int search_fs_tree)
13782 {
13783         if (search_fs_tree)
13784                 return fill_csum_tree_from_fs(trans, csum_root);
13785         else
13786                 return fill_csum_tree_from_extent(trans, csum_root);
13787 }
13788
13789 static void free_roots_info_cache(void)
13790 {
13791         if (!roots_info_cache)
13792                 return;
13793
13794         while (!cache_tree_empty(roots_info_cache)) {
13795                 struct cache_extent *entry;
13796                 struct root_item_info *rii;
13797
13798                 entry = first_cache_extent(roots_info_cache);
13799                 if (!entry)
13800                         break;
13801                 remove_cache_extent(roots_info_cache, entry);
13802                 rii = container_of(entry, struct root_item_info, cache_extent);
13803                 free(rii);
13804         }
13805
13806         free(roots_info_cache);
13807         roots_info_cache = NULL;
13808 }
13809
13810 static int build_roots_info_cache(struct btrfs_fs_info *info)
13811 {
13812         int ret = 0;
13813         struct btrfs_key key;
13814         struct extent_buffer *leaf;
13815         struct btrfs_path path;
13816
13817         if (!roots_info_cache) {
13818                 roots_info_cache = malloc(sizeof(*roots_info_cache));
13819                 if (!roots_info_cache)
13820                         return -ENOMEM;
13821                 cache_tree_init(roots_info_cache);
13822         }
13823
13824         btrfs_init_path(&path);
13825         key.objectid = 0;
13826         key.type = BTRFS_EXTENT_ITEM_KEY;
13827         key.offset = 0;
13828         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13829         if (ret < 0)
13830                 goto out;
13831         leaf = path.nodes[0];
13832
13833         while (1) {
13834                 struct btrfs_key found_key;
13835                 struct btrfs_extent_item *ei;
13836                 struct btrfs_extent_inline_ref *iref;
13837                 int slot = path.slots[0];
13838                 int type;
13839                 u64 flags;
13840                 u64 root_id;
13841                 u8 level;
13842                 struct cache_extent *entry;
13843                 struct root_item_info *rii;
13844
13845                 if (slot >= btrfs_header_nritems(leaf)) {
13846                         ret = btrfs_next_leaf(info->extent_root, &path);
13847                         if (ret < 0) {
13848                                 break;
13849                         } else if (ret) {
13850                                 ret = 0;
13851                                 break;
13852                         }
13853                         leaf = path.nodes[0];
13854                         slot = path.slots[0];
13855                 }
13856
13857                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13858
13859                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13860                     found_key.type != BTRFS_METADATA_ITEM_KEY)
13861                         goto next;
13862
13863                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13864                 flags = btrfs_extent_flags(leaf, ei);
13865
13866                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13867                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13868                         goto next;
13869
13870                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13871                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13872                         level = found_key.offset;
13873                 } else {
13874                         struct btrfs_tree_block_info *binfo;
13875
13876                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
13877                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13878                         level = btrfs_tree_block_level(leaf, binfo);
13879                 }
13880
13881                 /*
13882                  * For a root extent, it must be of the following type and the
13883                  * first (and only one) iref in the item.
13884                  */
13885                 type = btrfs_extent_inline_ref_type(leaf, iref);
13886                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13887                         goto next;
13888
13889                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13890                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13891                 if (!entry) {
13892                         rii = malloc(sizeof(struct root_item_info));
13893                         if (!rii) {
13894                                 ret = -ENOMEM;
13895                                 goto out;
13896                         }
13897                         rii->cache_extent.start = root_id;
13898                         rii->cache_extent.size = 1;
13899                         rii->level = (u8)-1;
13900                         entry = &rii->cache_extent;
13901                         ret = insert_cache_extent(roots_info_cache, entry);
13902                         ASSERT(ret == 0);
13903                 } else {
13904                         rii = container_of(entry, struct root_item_info,
13905                                            cache_extent);
13906                 }
13907
13908                 ASSERT(rii->cache_extent.start == root_id);
13909                 ASSERT(rii->cache_extent.size == 1);
13910
13911                 if (level > rii->level || rii->level == (u8)-1) {
13912                         rii->level = level;
13913                         rii->bytenr = found_key.objectid;
13914                         rii->gen = btrfs_extent_generation(leaf, ei);
13915                         rii->node_count = 1;
13916                 } else if (level == rii->level) {
13917                         rii->node_count++;
13918                 }
13919 next:
13920                 path.slots[0]++;
13921         }
13922
13923 out:
13924         btrfs_release_path(&path);
13925
13926         return ret;
13927 }
13928
13929 static int maybe_repair_root_item(struct btrfs_path *path,
13930                                   const struct btrfs_key *root_key,
13931                                   const int read_only_mode)
13932 {
13933         const u64 root_id = root_key->objectid;
13934         struct cache_extent *entry;
13935         struct root_item_info *rii;
13936         struct btrfs_root_item ri;
13937         unsigned long offset;
13938
13939         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13940         if (!entry) {
13941                 fprintf(stderr,
13942                         "Error: could not find extent items for root %llu\n",
13943                         root_key->objectid);
13944                 return -ENOENT;
13945         }
13946
13947         rii = container_of(entry, struct root_item_info, cache_extent);
13948         ASSERT(rii->cache_extent.start == root_id);
13949         ASSERT(rii->cache_extent.size == 1);
13950
13951         if (rii->node_count != 1) {
13952                 fprintf(stderr,
13953                         "Error: could not find btree root extent for root %llu\n",
13954                         root_id);
13955                 return -ENOENT;
13956         }
13957
13958         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13959         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13960
13961         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13962             btrfs_root_level(&ri) != rii->level ||
13963             btrfs_root_generation(&ri) != rii->gen) {
13964
13965                 /*
13966                  * If we're in repair mode but our caller told us to not update
13967                  * the root item, i.e. just check if it needs to be updated, don't
13968                  * print this message, since the caller will call us again shortly
13969                  * for the same root item without read only mode (the caller will
13970                  * open a transaction first).
13971                  */
13972                 if (!(read_only_mode && repair))
13973                         fprintf(stderr,
13974                                 "%sroot item for root %llu,"
13975                                 " current bytenr %llu, current gen %llu, current level %u,"
13976                                 " new bytenr %llu, new gen %llu, new level %u\n",
13977                                 (read_only_mode ? "" : "fixing "),
13978                                 root_id,
13979                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13980                                 btrfs_root_level(&ri),
13981                                 rii->bytenr, rii->gen, rii->level);
13982
13983                 if (btrfs_root_generation(&ri) > rii->gen) {
13984                         fprintf(stderr,
13985                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13986                                 root_id, btrfs_root_generation(&ri), rii->gen);
13987                         return -EINVAL;
13988                 }
13989
13990                 if (!read_only_mode) {
13991                         btrfs_set_root_bytenr(&ri, rii->bytenr);
13992                         btrfs_set_root_level(&ri, rii->level);
13993                         btrfs_set_root_generation(&ri, rii->gen);
13994                         write_extent_buffer(path->nodes[0], &ri,
13995                                             offset, sizeof(ri));
13996                 }
13997
13998                 return 1;
13999         }
14000
14001         return 0;
14002 }
14003
14004 /*
14005  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14006  * caused read-only snapshots to be corrupted if they were created at a moment
14007  * when the source subvolume/snapshot had orphan items. The issue was that the
14008  * on-disk root items became incorrect, referring to the pre orphan cleanup root
14009  * node instead of the post orphan cleanup root node.
14010  * So this function, and its callees, just detects and fixes those cases. Even
14011  * though the regression was for read-only snapshots, this function applies to
14012  * any snapshot/subvolume root.
14013  * This must be run before any other repair code - not doing it so, makes other
14014  * repair code delete or modify backrefs in the extent tree for example, which
14015  * will result in an inconsistent fs after repairing the root items.
14016  */
14017 static int repair_root_items(struct btrfs_fs_info *info)
14018 {
14019         struct btrfs_path path;
14020         struct btrfs_key key;
14021         struct extent_buffer *leaf;
14022         struct btrfs_trans_handle *trans = NULL;
14023         int ret = 0;
14024         int bad_roots = 0;
14025         int need_trans = 0;
14026
14027         btrfs_init_path(&path);
14028
14029         ret = build_roots_info_cache(info);
14030         if (ret)
14031                 goto out;
14032
14033         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14034         key.type = BTRFS_ROOT_ITEM_KEY;
14035         key.offset = 0;
14036
14037 again:
14038         /*
14039          * Avoid opening and committing transactions if a leaf doesn't have
14040          * any root items that need to be fixed, so that we avoid rotating
14041          * backup roots unnecessarily.
14042          */
14043         if (need_trans) {
14044                 trans = btrfs_start_transaction(info->tree_root, 1);
14045                 if (IS_ERR(trans)) {
14046                         ret = PTR_ERR(trans);
14047                         goto out;
14048                 }
14049         }
14050
14051         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14052                                 0, trans ? 1 : 0);
14053         if (ret < 0)
14054                 goto out;
14055         leaf = path.nodes[0];
14056
14057         while (1) {
14058                 struct btrfs_key found_key;
14059
14060                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14061                         int no_more_keys = find_next_key(&path, &key);
14062
14063                         btrfs_release_path(&path);
14064                         if (trans) {
14065                                 ret = btrfs_commit_transaction(trans,
14066                                                                info->tree_root);
14067                                 trans = NULL;
14068                                 if (ret < 0)
14069                                         goto out;
14070                         }
14071                         need_trans = 0;
14072                         if (no_more_keys)
14073                                 break;
14074                         goto again;
14075                 }
14076
14077                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14078
14079                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14080                         goto next;
14081                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14082                         goto next;
14083
14084                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14085                 if (ret < 0)
14086                         goto out;
14087                 if (ret) {
14088                         if (!trans && repair) {
14089                                 need_trans = 1;
14090                                 key = found_key;
14091                                 btrfs_release_path(&path);
14092                                 goto again;
14093                         }
14094                         bad_roots++;
14095                 }
14096 next:
14097                 path.slots[0]++;
14098         }
14099         ret = 0;
14100 out:
14101         free_roots_info_cache();
14102         btrfs_release_path(&path);
14103         if (trans)
14104                 btrfs_commit_transaction(trans, info->tree_root);
14105         if (ret < 0)
14106                 return ret;
14107
14108         return bad_roots;
14109 }
14110
14111 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14112 {
14113         struct btrfs_trans_handle *trans;
14114         struct btrfs_block_group_cache *bg_cache;
14115         u64 current = 0;
14116         int ret = 0;
14117
14118         /* Clear all free space cache inodes and its extent data */
14119         while (1) {
14120                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14121                 if (!bg_cache)
14122                         break;
14123                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14124                 if (ret < 0)
14125                         return ret;
14126                 current = bg_cache->key.objectid + bg_cache->key.offset;
14127         }
14128
14129         /* Don't forget to set cache_generation to -1 */
14130         trans = btrfs_start_transaction(fs_info->tree_root, 0);
14131         if (IS_ERR(trans)) {
14132                 error("failed to update super block cache generation");
14133                 return PTR_ERR(trans);
14134         }
14135         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14136         btrfs_commit_transaction(trans, fs_info->tree_root);
14137
14138         return ret;
14139 }
14140
14141 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14142                 int clear_version)
14143 {
14144         int ret = 0;
14145
14146         if (clear_version == 1) {
14147                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14148                         error(
14149                 "free space cache v2 detected, use --clear-space-cache v2");
14150                         ret = 1;
14151                         goto close_out;
14152                 }
14153                 printf("Clearing free space cache\n");
14154                 ret = clear_free_space_cache(fs_info);
14155                 if (ret) {
14156                         error("failed to clear free space cache");
14157                         ret = 1;
14158                 } else {
14159                         printf("Free space cache cleared\n");
14160                 }
14161         } else if (clear_version == 2) {
14162                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14163                         printf("no free space cache v2 to clear\n");
14164                         ret = 0;
14165                         goto close_out;
14166                 }
14167                 printf("Clear free space cache v2\n");
14168                 ret = btrfs_clear_free_space_tree(fs_info);
14169                 if (ret) {
14170                         error("failed to clear free space cache v2: %d", ret);
14171                         ret = 1;
14172                 } else {
14173                         printf("free space cache v2 cleared\n");
14174                 }
14175         }
14176 close_out:
14177         return ret;
14178 }
14179
14180 const char * const cmd_check_usage[] = {
14181         "btrfs check [options] <device>",
14182         "Check structural integrity of a filesystem (unmounted).",
14183         "Check structural integrity of an unmounted filesystem. Verify internal",
14184         "trees' consistency and item connectivity. In the repair mode try to",
14185         "fix the problems found. ",
14186         "WARNING: the repair mode is considered dangerous",
14187         "",
14188         "-s|--super <superblock>     use this superblock copy",
14189         "-b|--backup                 use the first valid backup root copy",
14190         "--force                     skip mount checks, repair is not possible",
14191         "--repair                    try to repair the filesystem",
14192         "--readonly                  run in read-only mode (default)",
14193         "--init-csum-tree            create a new CRC tree",
14194         "--init-extent-tree          create a new extent tree",
14195         "--mode <MODE>               allows choice of memory/IO trade-offs",
14196         "                            where MODE is one of:",
14197         "                            original - read inodes and extents to memory (requires",
14198         "                                       more memory, does less IO)",
14199         "                            lowmem   - try to use less memory but read blocks again",
14200         "                                       when needed",
14201         "--check-data-csum           verify checksums of data blocks",
14202         "-Q|--qgroup-report          print a report on qgroup consistency",
14203         "-E|--subvol-extents <subvolid>",
14204         "                            print subvolume extents and sharing state",
14205         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
14206         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
14207         "-p|--progress               indicate progress",
14208         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
14209         NULL
14210 };
14211
14212 int cmd_check(int argc, char **argv)
14213 {
14214         struct cache_tree root_cache;
14215         struct btrfs_root *root;
14216         struct btrfs_fs_info *info;
14217         u64 bytenr = 0;
14218         u64 subvolid = 0;
14219         u64 tree_root_bytenr = 0;
14220         u64 chunk_root_bytenr = 0;
14221         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14222         int ret = 0;
14223         int err = 0;
14224         u64 num;
14225         int init_csum_tree = 0;
14226         int readonly = 0;
14227         int clear_space_cache = 0;
14228         int qgroup_report = 0;
14229         int qgroups_repaired = 0;
14230         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14231         int force = 0;
14232
14233         while(1) {
14234                 int c;
14235                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14236                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14237                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14238                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14239                         GETOPT_VAL_FORCE };
14240                 static const struct option long_options[] = {
14241                         { "super", required_argument, NULL, 's' },
14242                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14243                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14244                         { "init-csum-tree", no_argument, NULL,
14245                                 GETOPT_VAL_INIT_CSUM },
14246                         { "init-extent-tree", no_argument, NULL,
14247                                 GETOPT_VAL_INIT_EXTENT },
14248                         { "check-data-csum", no_argument, NULL,
14249                                 GETOPT_VAL_CHECK_CSUM },
14250                         { "backup", no_argument, NULL, 'b' },
14251                         { "subvol-extents", required_argument, NULL, 'E' },
14252                         { "qgroup-report", no_argument, NULL, 'Q' },
14253                         { "tree-root", required_argument, NULL, 'r' },
14254                         { "chunk-root", required_argument, NULL,
14255                                 GETOPT_VAL_CHUNK_TREE },
14256                         { "progress", no_argument, NULL, 'p' },
14257                         { "mode", required_argument, NULL,
14258                                 GETOPT_VAL_MODE },
14259                         { "clear-space-cache", required_argument, NULL,
14260                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
14261                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14262                         { NULL, 0, NULL, 0}
14263                 };
14264
14265                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14266                 if (c < 0)
14267                         break;
14268                 switch(c) {
14269                         case 'a': /* ignored */ break;
14270                         case 'b':
14271                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14272                                 break;
14273                         case 's':
14274                                 num = arg_strtou64(optarg);
14275                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14276                                         error(
14277                                         "super mirror should be less than %d",
14278                                                 BTRFS_SUPER_MIRROR_MAX);
14279                                         exit(1);
14280                                 }
14281                                 bytenr = btrfs_sb_offset(((int)num));
14282                                 printf("using SB copy %llu, bytenr %llu\n", num,
14283                                        (unsigned long long)bytenr);
14284                                 break;
14285                         case 'Q':
14286                                 qgroup_report = 1;
14287                                 break;
14288                         case 'E':
14289                                 subvolid = arg_strtou64(optarg);
14290                                 break;
14291                         case 'r':
14292                                 tree_root_bytenr = arg_strtou64(optarg);
14293                                 break;
14294                         case GETOPT_VAL_CHUNK_TREE:
14295                                 chunk_root_bytenr = arg_strtou64(optarg);
14296                                 break;
14297                         case 'p':
14298                                 ctx.progress_enabled = true;
14299                                 break;
14300                         case '?':
14301                         case 'h':
14302                                 usage(cmd_check_usage);
14303                         case GETOPT_VAL_REPAIR:
14304                                 printf("enabling repair mode\n");
14305                                 repair = 1;
14306                                 ctree_flags |= OPEN_CTREE_WRITES;
14307                                 break;
14308                         case GETOPT_VAL_READONLY:
14309                                 readonly = 1;
14310                                 break;
14311                         case GETOPT_VAL_INIT_CSUM:
14312                                 printf("Creating a new CRC tree\n");
14313                                 init_csum_tree = 1;
14314                                 repair = 1;
14315                                 ctree_flags |= OPEN_CTREE_WRITES;
14316                                 break;
14317                         case GETOPT_VAL_INIT_EXTENT:
14318                                 init_extent_tree = 1;
14319                                 ctree_flags |= (OPEN_CTREE_WRITES |
14320                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
14321                                 repair = 1;
14322                                 break;
14323                         case GETOPT_VAL_CHECK_CSUM:
14324                                 check_data_csum = 1;
14325                                 break;
14326                         case GETOPT_VAL_MODE:
14327                                 check_mode = parse_check_mode(optarg);
14328                                 if (check_mode == CHECK_MODE_UNKNOWN) {
14329                                         error("unknown mode: %s", optarg);
14330                                         exit(1);
14331                                 }
14332                                 break;
14333                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
14334                                 if (strcmp(optarg, "v1") == 0) {
14335                                         clear_space_cache = 1;
14336                                 } else if (strcmp(optarg, "v2") == 0) {
14337                                         clear_space_cache = 2;
14338                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14339                                 } else {
14340                                         error(
14341                 "invalid argument to --clear-space-cache, must be v1 or v2");
14342                                         exit(1);
14343                                 }
14344                                 ctree_flags |= OPEN_CTREE_WRITES;
14345                                 break;
14346                         case GETOPT_VAL_FORCE:
14347                                 force = 1;
14348                                 break;
14349                 }
14350         }
14351
14352         if (check_argc_exact(argc - optind, 1))
14353                 usage(cmd_check_usage);
14354
14355         if (ctx.progress_enabled) {
14356                 ctx.tp = TASK_NOTHING;
14357                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14358         }
14359
14360         /* This check is the only reason for --readonly to exist */
14361         if (readonly && repair) {
14362                 error("repair options are not compatible with --readonly");
14363                 exit(1);
14364         }
14365
14366         /*
14367          * experimental and dangerous
14368          */
14369         if (repair && check_mode == CHECK_MODE_LOWMEM)
14370                 warning("low-memory mode repair support is only partial");
14371
14372         radix_tree_init();
14373         cache_tree_init(&root_cache);
14374
14375         ret = check_mounted(argv[optind]);
14376         if (!force) {
14377                 if (ret < 0) {
14378                         error("could not check mount status: %s",
14379                                         strerror(-ret));
14380                         err |= !!ret;
14381                         goto err_out;
14382                 } else if (ret) {
14383                         error(
14384 "%s is currently mounted, use --force if you really intend to check the filesystem",
14385                                 argv[optind]);
14386                         ret = -EBUSY;
14387                         err |= !!ret;
14388                         goto err_out;
14389                 }
14390         } else {
14391                 if (repair) {
14392                         error("repair and --force is not yet supported");
14393                         ret = 1;
14394                         err |= !!ret;
14395                         goto err_out;
14396                 }
14397                 if (ret < 0) {
14398                         warning(
14399 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14400                                 argv[optind]);
14401                 } else if (ret) {
14402                         warning(
14403                         "filesystem mounted, continuing because of --force");
14404                 }
14405                 /* A block device is mounted in exclusive mode by kernel */
14406                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14407         }
14408
14409         /* only allow partial opening under repair mode */
14410         if (repair)
14411                 ctree_flags |= OPEN_CTREE_PARTIAL;
14412
14413         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14414                                   chunk_root_bytenr, ctree_flags);
14415         if (!info) {
14416                 error("cannot open file system");
14417                 ret = -EIO;
14418                 err |= !!ret;
14419                 goto err_out;
14420         }
14421
14422         global_info = info;
14423         root = info->fs_root;
14424         uuid_unparse(info->super_copy->fsid, uuidbuf);
14425
14426         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14427
14428         /*
14429          * Check the bare minimum before starting anything else that could rely
14430          * on it, namely the tree roots, any local consistency checks
14431          */
14432         if (!extent_buffer_uptodate(info->tree_root->node) ||
14433             !extent_buffer_uptodate(info->dev_root->node) ||
14434             !extent_buffer_uptodate(info->chunk_root->node)) {
14435                 error("critical roots corrupted, unable to check the filesystem");
14436                 err |= !!ret;
14437                 ret = -EIO;
14438                 goto close_out;
14439         }
14440
14441         if (clear_space_cache) {
14442                 ret = do_clear_free_space_cache(info, clear_space_cache);
14443                 err |= !!ret;
14444                 goto close_out;
14445         }
14446
14447         /*
14448          * repair mode will force us to commit transaction which
14449          * will make us fail to load log tree when mounting.
14450          */
14451         if (repair && btrfs_super_log_root(info->super_copy)) {
14452                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14453                 if (!ret) {
14454                         ret = 1;
14455                         err |= !!ret;
14456                         goto close_out;
14457                 }
14458                 ret = zero_log_tree(root);
14459                 err |= !!ret;
14460                 if (ret) {
14461                         error("failed to zero log tree: %d", ret);
14462                         goto close_out;
14463                 }
14464         }
14465
14466         if (qgroup_report) {
14467                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14468                        uuidbuf);
14469                 ret = qgroup_verify_all(info);
14470                 err |= !!ret;
14471                 if (ret == 0)
14472                         report_qgroups(1);
14473                 goto close_out;
14474         }
14475         if (subvolid) {
14476                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14477                        subvolid, argv[optind], uuidbuf);
14478                 ret = print_extent_state(info, subvolid);
14479                 err |= !!ret;
14480                 goto close_out;
14481         }
14482
14483         if (init_extent_tree || init_csum_tree) {
14484                 struct btrfs_trans_handle *trans;
14485
14486                 trans = btrfs_start_transaction(info->extent_root, 0);
14487                 if (IS_ERR(trans)) {
14488                         error("error starting transaction");
14489                         ret = PTR_ERR(trans);
14490                         err |= !!ret;
14491                         goto close_out;
14492                 }
14493
14494                 if (init_extent_tree) {
14495                         printf("Creating a new extent tree\n");
14496                         ret = reinit_extent_tree(trans, info);
14497                         err |= !!ret;
14498                         if (ret)
14499                                 goto close_out;
14500                 }
14501
14502                 if (init_csum_tree) {
14503                         printf("Reinitialize checksum tree\n");
14504                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14505                         if (ret) {
14506                                 error("checksum tree initialization failed: %d",
14507                                                 ret);
14508                                 ret = -EIO;
14509                                 err |= !!ret;
14510                                 goto close_out;
14511                         }
14512
14513                         ret = fill_csum_tree(trans, info->csum_root,
14514                                              init_extent_tree);
14515                         err |= !!ret;
14516                         if (ret) {
14517                                 error("checksum tree refilling failed: %d", ret);
14518                                 return -EIO;
14519                         }
14520                 }
14521                 /*
14522                  * Ok now we commit and run the normal fsck, which will add
14523                  * extent entries for all of the items it finds.
14524                  */
14525                 ret = btrfs_commit_transaction(trans, info->extent_root);
14526                 err |= !!ret;
14527                 if (ret)
14528                         goto close_out;
14529         }
14530         if (!extent_buffer_uptodate(info->extent_root->node)) {
14531                 error("critical: extent_root, unable to check the filesystem");
14532                 ret = -EIO;
14533                 err |= !!ret;
14534                 goto close_out;
14535         }
14536         if (!extent_buffer_uptodate(info->csum_root->node)) {
14537                 error("critical: csum_root, unable to check the filesystem");
14538                 ret = -EIO;
14539                 err |= !!ret;
14540                 goto close_out;
14541         }
14542
14543         ret = do_check_chunks_and_extents(info);
14544         err |= !!ret;
14545         if (ret)
14546                 error(
14547                 "errors found in extent allocation tree or chunk allocation");
14548
14549         ret = repair_root_items(info);
14550         err |= !!ret;
14551         if (ret < 0) {
14552                 error("failed to repair root items: %s", strerror(-ret));
14553                 goto close_out;
14554         }
14555         if (repair) {
14556                 fprintf(stderr, "Fixed %d roots.\n", ret);
14557                 ret = 0;
14558         } else if (ret > 0) {
14559                 fprintf(stderr,
14560                        "Found %d roots with an outdated root item.\n",
14561                        ret);
14562                 fprintf(stderr,
14563                         "Please run a filesystem check with the option --repair to fix them.\n");
14564                 ret = 1;
14565                 err |= !!ret;
14566                 goto close_out;
14567         }
14568
14569         if (!ctx.progress_enabled) {
14570                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14571                         fprintf(stderr, "checking free space tree\n");
14572                 else
14573                         fprintf(stderr, "checking free space cache\n");
14574         }
14575         ret = check_space_cache(root);
14576         err |= !!ret;
14577         if (ret) {
14578                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14579                         error("errors found in free space tree");
14580                 else
14581                         error("errors found in free space cache");
14582                 goto out;
14583         }
14584
14585         /*
14586          * We used to have to have these hole extents in between our real
14587          * extents so if we don't have this flag set we need to make sure there
14588          * are no gaps in the file extents for inodes, otherwise we can just
14589          * ignore it when this happens.
14590          */
14591         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14592         ret = do_check_fs_roots(info, &root_cache);
14593         err |= !!ret;
14594         if (ret) {
14595                 error("errors found in fs roots");
14596                 goto out;
14597         }
14598
14599         fprintf(stderr, "checking csums\n");
14600         ret = check_csums(root);
14601         err |= !!ret;
14602         if (ret) {
14603                 error("errors found in csum tree");
14604                 goto out;
14605         }
14606
14607         fprintf(stderr, "checking root refs\n");
14608         /* For low memory mode, check_fs_roots_v2 handles root refs */
14609         if (check_mode != CHECK_MODE_LOWMEM) {
14610                 ret = check_root_refs(root, &root_cache);
14611                 err |= !!ret;
14612                 if (ret) {
14613                         error("errors found in root refs");
14614                         goto out;
14615                 }
14616         }
14617
14618         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14619                 struct extent_buffer *eb;
14620
14621                 eb = list_first_entry(&root->fs_info->recow_ebs,
14622                                       struct extent_buffer, recow);
14623                 list_del_init(&eb->recow);
14624                 ret = recow_extent_buffer(root, eb);
14625                 err |= !!ret;
14626                 if (ret) {
14627                         error("fails to fix transid errors");
14628                         break;
14629                 }
14630         }
14631
14632         while (!list_empty(&delete_items)) {
14633                 struct bad_item *bad;
14634
14635                 bad = list_first_entry(&delete_items, struct bad_item, list);
14636                 list_del_init(&bad->list);
14637                 if (repair) {
14638                         ret = delete_bad_item(root, bad);
14639                         err |= !!ret;
14640                 }
14641                 free(bad);
14642         }
14643
14644         if (info->quota_enabled) {
14645                 fprintf(stderr, "checking quota groups\n");
14646                 ret = qgroup_verify_all(info);
14647                 err |= !!ret;
14648                 if (ret) {
14649                         error("failed to check quota groups");
14650                         goto out;
14651                 }
14652                 report_qgroups(0);
14653                 ret = repair_qgroups(info, &qgroups_repaired);
14654                 err |= !!ret;
14655                 if (err) {
14656                         error("failed to repair quota groups");
14657                         goto out;
14658                 }
14659                 ret = 0;
14660         }
14661
14662         if (!list_empty(&root->fs_info->recow_ebs)) {
14663                 error("transid errors in file system");
14664                 ret = 1;
14665                 err |= !!ret;
14666         }
14667 out:
14668         printf("found %llu bytes used, ",
14669                (unsigned long long)bytes_used);
14670         if (err)
14671                 printf("error(s) found\n");
14672         else
14673                 printf("no error found\n");
14674         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14675         printf("total tree bytes: %llu\n",
14676                (unsigned long long)total_btree_bytes);
14677         printf("total fs tree bytes: %llu\n",
14678                (unsigned long long)total_fs_tree_bytes);
14679         printf("total extent tree bytes: %llu\n",
14680                (unsigned long long)total_extent_tree_bytes);
14681         printf("btree space waste bytes: %llu\n",
14682                (unsigned long long)btree_space_waste);
14683         printf("file data blocks allocated: %llu\n referenced %llu\n",
14684                 (unsigned long long)data_bytes_allocated,
14685                 (unsigned long long)data_bytes_referenced);
14686
14687         free_qgroup_counts();
14688         free_root_recs_tree(&root_cache);
14689 close_out:
14690         close_ctree(root);
14691 err_out:
14692         if (ctx.progress_enabled)
14693                 task_deinit(ctx.info);
14694
14695         return err;
14696 }