btrfs-progs: check: introduce repair_extent_data_item()
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING       (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH      (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN         (1<<20) /* DIR isize should be recalculated */
136
137 static inline struct data_backref* to_data_backref(struct extent_backref *back)
138 {
139         return container_of(back, struct data_backref, node);
140 }
141
142 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
143 {
144         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
145         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
146         struct data_backref *back1 = to_data_backref(ext1);
147         struct data_backref *back2 = to_data_backref(ext2);
148
149         WARN_ON(!ext1->is_data);
150         WARN_ON(!ext2->is_data);
151
152         /* parent and root are a union, so this covers both */
153         if (back1->parent > back2->parent)
154                 return 1;
155         if (back1->parent < back2->parent)
156                 return -1;
157
158         /* This is a full backref and the parents match. */
159         if (back1->node.full_backref)
160                 return 0;
161
162         if (back1->owner > back2->owner)
163                 return 1;
164         if (back1->owner < back2->owner)
165                 return -1;
166
167         if (back1->offset > back2->offset)
168                 return 1;
169         if (back1->offset < back2->offset)
170                 return -1;
171
172         if (back1->found_ref && back2->found_ref) {
173                 if (back1->disk_bytenr > back2->disk_bytenr)
174                         return 1;
175                 if (back1->disk_bytenr < back2->disk_bytenr)
176                         return -1;
177
178                 if (back1->bytes > back2->bytes)
179                         return 1;
180                 if (back1->bytes < back2->bytes)
181                         return -1;
182         }
183
184         return 0;
185 }
186
187 /*
188  * Much like data_backref, just removed the undetermined members
189  * and change it to use list_head.
190  * During extent scan, it is stored in root->orphan_data_extent.
191  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
192  */
193 struct orphan_data_extent {
194         struct list_head list;
195         u64 root;
196         u64 objectid;
197         u64 offset;
198         u64 disk_bytenr;
199         u64 disk_len;
200 };
201
202 struct tree_backref {
203         struct extent_backref node;
204         union {
205                 u64 parent;
206                 u64 root;
207         };
208 };
209
210 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
211 {
212         return container_of(back, struct tree_backref, node);
213 }
214
215 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
216 {
217         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
218         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
219         struct tree_backref *back1 = to_tree_backref(ext1);
220         struct tree_backref *back2 = to_tree_backref(ext2);
221
222         WARN_ON(ext1->is_data);
223         WARN_ON(ext2->is_data);
224
225         /* parent and root are a union, so this covers both */
226         if (back1->parent > back2->parent)
227                 return 1;
228         if (back1->parent < back2->parent)
229                 return -1;
230
231         return 0;
232 }
233
234 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
235 {
236         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
237         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
238
239         if (ext1->is_data > ext2->is_data)
240                 return 1;
241
242         if (ext1->is_data < ext2->is_data)
243                 return -1;
244
245         if (ext1->full_backref > ext2->full_backref)
246                 return 1;
247         if (ext1->full_backref < ext2->full_backref)
248                 return -1;
249
250         if (ext1->is_data)
251                 return compare_data_backref(node1, node2);
252         else
253                 return compare_tree_backref(node1, node2);
254 }
255
256 /* Explicit initialization for extent_record::flag_block_full_backref */
257 enum { FLAG_UNSET = 2 };
258
259 struct extent_record {
260         struct list_head backrefs;
261         struct list_head dups;
262         struct rb_root backref_tree;
263         struct list_head list;
264         struct cache_extent cache;
265         struct btrfs_disk_key parent_key;
266         u64 start;
267         u64 max_size;
268         u64 nr;
269         u64 refs;
270         u64 extent_item_refs;
271         u64 generation;
272         u64 parent_generation;
273         u64 info_objectid;
274         u32 num_duplicates;
275         u8 info_level;
276         unsigned int flag_block_full_backref:2;
277         unsigned int found_rec:1;
278         unsigned int content_checked:1;
279         unsigned int owner_ref_checked:1;
280         unsigned int is_root:1;
281         unsigned int metadata:1;
282         unsigned int bad_full_backref:1;
283         unsigned int crossing_stripes:1;
284         unsigned int wrong_chunk_type:1;
285 };
286
287 static inline struct extent_record* to_extent_record(struct list_head *entry)
288 {
289         return container_of(entry, struct extent_record, list);
290 }
291
292 struct inode_backref {
293         struct list_head list;
294         unsigned int found_dir_item:1;
295         unsigned int found_dir_index:1;
296         unsigned int found_inode_ref:1;
297         u8 filetype;
298         u8 ref_type;
299         int errors;
300         u64 dir;
301         u64 index;
302         u16 namelen;
303         char name[0];
304 };
305
306 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
307 {
308         return list_entry(entry, struct inode_backref, list);
309 }
310
311 struct root_item_record {
312         struct list_head list;
313         u64 objectid;
314         u64 bytenr;
315         u64 last_snapshot;
316         u8 level;
317         u8 drop_level;
318         struct btrfs_key drop_key;
319 };
320
321 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
322 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
323 #define REF_ERR_NO_INODE_REF            (1 << 2)
324 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
325 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
326 #define REF_ERR_DUP_INODE_REF           (1 << 5)
327 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
328 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
329 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
330 #define REF_ERR_NO_ROOT_REF             (1 << 9)
331 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
332 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
333 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
334
335 struct file_extent_hole {
336         struct rb_node node;
337         u64 start;
338         u64 len;
339 };
340
341 struct inode_record {
342         struct list_head backrefs;
343         unsigned int checked:1;
344         unsigned int merging:1;
345         unsigned int found_inode_item:1;
346         unsigned int found_dir_item:1;
347         unsigned int found_file_extent:1;
348         unsigned int found_csum_item:1;
349         unsigned int some_csum_missing:1;
350         unsigned int nodatasum:1;
351         int errors;
352
353         u64 ino;
354         u32 nlink;
355         u32 imode;
356         u64 isize;
357         u64 nbytes;
358
359         u32 found_link;
360         u64 found_size;
361         u64 extent_start;
362         u64 extent_end;
363         struct rb_root holes;
364         struct list_head orphan_extents;
365
366         u32 refs;
367 };
368
369 #define I_ERR_NO_INODE_ITEM             (1 << 0)
370 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
371 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
372 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
373 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
374 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
375 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
376 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
377 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
378 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
379 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
380 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
381 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
382 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
383 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
384
385 struct root_backref {
386         struct list_head list;
387         unsigned int found_dir_item:1;
388         unsigned int found_dir_index:1;
389         unsigned int found_back_ref:1;
390         unsigned int found_forward_ref:1;
391         unsigned int reachable:1;
392         int errors;
393         u64 ref_root;
394         u64 dir;
395         u64 index;
396         u16 namelen;
397         char name[0];
398 };
399
400 static inline struct root_backref* to_root_backref(struct list_head *entry)
401 {
402         return list_entry(entry, struct root_backref, list);
403 }
404
405 struct root_record {
406         struct list_head backrefs;
407         struct cache_extent cache;
408         unsigned int found_root_item:1;
409         u64 objectid;
410         u32 found_ref;
411 };
412
413 struct ptr_node {
414         struct cache_extent cache;
415         void *data;
416 };
417
418 struct shared_node {
419         struct cache_extent cache;
420         struct cache_tree root_cache;
421         struct cache_tree inode_cache;
422         struct inode_record *current;
423         u32 refs;
424 };
425
426 struct block_info {
427         u64 start;
428         u32 size;
429 };
430
431 struct walk_control {
432         struct cache_tree shared;
433         struct shared_node *nodes[BTRFS_MAX_LEVEL];
434         int active_node;
435         int root_level;
436 };
437
438 struct bad_item {
439         struct btrfs_key key;
440         u64 root_id;
441         struct list_head list;
442 };
443
444 struct extent_entry {
445         u64 bytenr;
446         u64 bytes;
447         int count;
448         int broken;
449         struct list_head list;
450 };
451
452 struct root_item_info {
453         /* level of the root */
454         u8 level;
455         /* number of nodes at this level, must be 1 for a root */
456         int node_count;
457         u64 bytenr;
458         u64 gen;
459         struct cache_extent cache_extent;
460 };
461
462 /*
463  * Error bit for low memory mode check.
464  *
465  * Currently no caller cares about it yet.  Just internal use for error
466  * classification.
467  */
468 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
469 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
470 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
471 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
472 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
473 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
474 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
475 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
476 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
477 #define CHUNK_TYPE_MISMATCH     (1 << 8)
478
479 static void *print_status_check(void *p)
480 {
481         struct task_ctx *priv = p;
482         const char work_indicator[] = { '.', 'o', 'O', 'o' };
483         uint32_t count = 0;
484         static char *task_position_string[] = {
485                 "checking extents",
486                 "checking free space cache",
487                 "checking fs roots",
488         };
489
490         task_period_start(priv->info, 1000 /* 1s */);
491
492         if (priv->tp == TASK_NOTHING)
493                 return NULL;
494
495         while (1) {
496                 printf("%s [%c]\r", task_position_string[priv->tp],
497                                 work_indicator[count % 4]);
498                 count++;
499                 fflush(stdout);
500                 task_period_wait(priv->info);
501         }
502         return NULL;
503 }
504
505 static int print_status_return(void *p)
506 {
507         printf("\n");
508         fflush(stdout);
509
510         return 0;
511 }
512
513 static enum btrfs_check_mode parse_check_mode(const char *str)
514 {
515         if (strcmp(str, "lowmem") == 0)
516                 return CHECK_MODE_LOWMEM;
517         if (strcmp(str, "orig") == 0)
518                 return CHECK_MODE_ORIGINAL;
519         if (strcmp(str, "original") == 0)
520                 return CHECK_MODE_ORIGINAL;
521
522         return CHECK_MODE_UNKNOWN;
523 }
524
525 /* Compatible function to allow reuse of old codes */
526 static u64 first_extent_gap(struct rb_root *holes)
527 {
528         struct file_extent_hole *hole;
529
530         if (RB_EMPTY_ROOT(holes))
531                 return (u64)-1;
532
533         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
534         return hole->start;
535 }
536
537 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
538 {
539         struct file_extent_hole *hole1;
540         struct file_extent_hole *hole2;
541
542         hole1 = rb_entry(node1, struct file_extent_hole, node);
543         hole2 = rb_entry(node2, struct file_extent_hole, node);
544
545         if (hole1->start > hole2->start)
546                 return -1;
547         if (hole1->start < hole2->start)
548                 return 1;
549         /* Now hole1->start == hole2->start */
550         if (hole1->len >= hole2->len)
551                 /*
552                  * Hole 1 will be merge center
553                  * Same hole will be merged later
554                  */
555                 return -1;
556         /* Hole 2 will be merge center */
557         return 1;
558 }
559
560 /*
561  * Add a hole to the record
562  *
563  * This will do hole merge for copy_file_extent_holes(),
564  * which will ensure there won't be continuous holes.
565  */
566 static int add_file_extent_hole(struct rb_root *holes,
567                                 u64 start, u64 len)
568 {
569         struct file_extent_hole *hole;
570         struct file_extent_hole *prev = NULL;
571         struct file_extent_hole *next = NULL;
572
573         hole = malloc(sizeof(*hole));
574         if (!hole)
575                 return -ENOMEM;
576         hole->start = start;
577         hole->len = len;
578         /* Since compare will not return 0, no -EEXIST will happen */
579         rb_insert(holes, &hole->node, compare_hole);
580
581         /* simple merge with previous hole */
582         if (rb_prev(&hole->node))
583                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
584                                 node);
585         if (prev && prev->start + prev->len >= hole->start) {
586                 hole->len = hole->start + hole->len - prev->start;
587                 hole->start = prev->start;
588                 rb_erase(&prev->node, holes);
589                 free(prev);
590                 prev = NULL;
591         }
592
593         /* iterate merge with next holes */
594         while (1) {
595                 if (!rb_next(&hole->node))
596                         break;
597                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
598                                         node);
599                 if (hole->start + hole->len >= next->start) {
600                         if (hole->start + hole->len <= next->start + next->len)
601                                 hole->len = next->start + next->len -
602                                             hole->start;
603                         rb_erase(&next->node, holes);
604                         free(next);
605                         next = NULL;
606                 } else
607                         break;
608         }
609         return 0;
610 }
611
612 static int compare_hole_range(struct rb_node *node, void *data)
613 {
614         struct file_extent_hole *hole;
615         u64 start;
616
617         hole = (struct file_extent_hole *)data;
618         start = hole->start;
619
620         hole = rb_entry(node, struct file_extent_hole, node);
621         if (start < hole->start)
622                 return -1;
623         if (start >= hole->start && start < hole->start + hole->len)
624                 return 0;
625         return 1;
626 }
627
628 /*
629  * Delete a hole in the record
630  *
631  * This will do the hole split and is much restrict than add.
632  */
633 static int del_file_extent_hole(struct rb_root *holes,
634                                 u64 start, u64 len)
635 {
636         struct file_extent_hole *hole;
637         struct file_extent_hole tmp;
638         u64 prev_start = 0;
639         u64 prev_len = 0;
640         u64 next_start = 0;
641         u64 next_len = 0;
642         struct rb_node *node;
643         int have_prev = 0;
644         int have_next = 0;
645         int ret = 0;
646
647         tmp.start = start;
648         tmp.len = len;
649         node = rb_search(holes, &tmp, compare_hole_range, NULL);
650         if (!node)
651                 return -EEXIST;
652         hole = rb_entry(node, struct file_extent_hole, node);
653         if (start + len > hole->start + hole->len)
654                 return -EEXIST;
655
656         /*
657          * Now there will be no overlap, delete the hole and re-add the
658          * split(s) if they exists.
659          */
660         if (start > hole->start) {
661                 prev_start = hole->start;
662                 prev_len = start - hole->start;
663                 have_prev = 1;
664         }
665         if (hole->start + hole->len > start + len) {
666                 next_start = start + len;
667                 next_len = hole->start + hole->len - start - len;
668                 have_next = 1;
669         }
670         rb_erase(node, holes);
671         free(hole);
672         if (have_prev) {
673                 ret = add_file_extent_hole(holes, prev_start, prev_len);
674                 if (ret < 0)
675                         return ret;
676         }
677         if (have_next) {
678                 ret = add_file_extent_hole(holes, next_start, next_len);
679                 if (ret < 0)
680                         return ret;
681         }
682         return 0;
683 }
684
685 static int copy_file_extent_holes(struct rb_root *dst,
686                                   struct rb_root *src)
687 {
688         struct file_extent_hole *hole;
689         struct rb_node *node;
690         int ret = 0;
691
692         node = rb_first(src);
693         while (node) {
694                 hole = rb_entry(node, struct file_extent_hole, node);
695                 ret = add_file_extent_hole(dst, hole->start, hole->len);
696                 if (ret)
697                         break;
698                 node = rb_next(node);
699         }
700         return ret;
701 }
702
703 static void free_file_extent_holes(struct rb_root *holes)
704 {
705         struct rb_node *node;
706         struct file_extent_hole *hole;
707
708         node = rb_first(holes);
709         while (node) {
710                 hole = rb_entry(node, struct file_extent_hole, node);
711                 rb_erase(node, holes);
712                 free(hole);
713                 node = rb_first(holes);
714         }
715 }
716
717 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
718
719 static void record_root_in_trans(struct btrfs_trans_handle *trans,
720                                  struct btrfs_root *root)
721 {
722         if (root->last_trans != trans->transid) {
723                 root->track_dirty = 1;
724                 root->last_trans = trans->transid;
725                 root->commit_root = root->node;
726                 extent_buffer_get(root->node);
727         }
728 }
729
730 static u8 imode_to_type(u32 imode)
731 {
732 #define S_SHIFT 12
733         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
734                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
735                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
736                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
737                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
738                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
739                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
740                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
741         };
742
743         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
744 #undef S_SHIFT
745 }
746
747 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
748 {
749         struct device_record *rec1;
750         struct device_record *rec2;
751
752         rec1 = rb_entry(node1, struct device_record, node);
753         rec2 = rb_entry(node2, struct device_record, node);
754         if (rec1->devid > rec2->devid)
755                 return -1;
756         else if (rec1->devid < rec2->devid)
757                 return 1;
758         else
759                 return 0;
760 }
761
762 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
763 {
764         struct inode_record *rec;
765         struct inode_backref *backref;
766         struct inode_backref *orig;
767         struct inode_backref *tmp;
768         struct orphan_data_extent *src_orphan;
769         struct orphan_data_extent *dst_orphan;
770         struct rb_node *rb;
771         size_t size;
772         int ret;
773
774         rec = malloc(sizeof(*rec));
775         if (!rec)
776                 return ERR_PTR(-ENOMEM);
777         memcpy(rec, orig_rec, sizeof(*rec));
778         rec->refs = 1;
779         INIT_LIST_HEAD(&rec->backrefs);
780         INIT_LIST_HEAD(&rec->orphan_extents);
781         rec->holes = RB_ROOT;
782
783         list_for_each_entry(orig, &orig_rec->backrefs, list) {
784                 size = sizeof(*orig) + orig->namelen + 1;
785                 backref = malloc(size);
786                 if (!backref) {
787                         ret = -ENOMEM;
788                         goto cleanup;
789                 }
790                 memcpy(backref, orig, size);
791                 list_add_tail(&backref->list, &rec->backrefs);
792         }
793         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
794                 dst_orphan = malloc(sizeof(*dst_orphan));
795                 if (!dst_orphan) {
796                         ret = -ENOMEM;
797                         goto cleanup;
798                 }
799                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
800                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
801         }
802         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
803         if (ret < 0)
804                 goto cleanup_rb;
805
806         return rec;
807
808 cleanup_rb:
809         rb = rb_first(&rec->holes);
810         while (rb) {
811                 struct file_extent_hole *hole;
812
813                 hole = rb_entry(rb, struct file_extent_hole, node);
814                 rb = rb_next(rb);
815                 free(hole);
816         }
817
818 cleanup:
819         if (!list_empty(&rec->backrefs))
820                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
821                         list_del(&orig->list);
822                         free(orig);
823                 }
824
825         if (!list_empty(&rec->orphan_extents))
826                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
827                         list_del(&orig->list);
828                         free(orig);
829                 }
830
831         free(rec);
832
833         return ERR_PTR(ret);
834 }
835
836 static void print_orphan_data_extents(struct list_head *orphan_extents,
837                                       u64 objectid)
838 {
839         struct orphan_data_extent *orphan;
840
841         if (list_empty(orphan_extents))
842                 return;
843         printf("The following data extent is lost in tree %llu:\n",
844                objectid);
845         list_for_each_entry(orphan, orphan_extents, list) {
846                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
847                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
848                        orphan->disk_len);
849         }
850 }
851
852 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
853 {
854         u64 root_objectid = root->root_key.objectid;
855         int errors = rec->errors;
856
857         if (!errors)
858                 return;
859         /* reloc root errors, we print its corresponding fs root objectid*/
860         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
861                 root_objectid = root->root_key.offset;
862                 fprintf(stderr, "reloc");
863         }
864         fprintf(stderr, "root %llu inode %llu errors %x",
865                 (unsigned long long) root_objectid,
866                 (unsigned long long) rec->ino, rec->errors);
867
868         if (errors & I_ERR_NO_INODE_ITEM)
869                 fprintf(stderr, ", no inode item");
870         if (errors & I_ERR_NO_ORPHAN_ITEM)
871                 fprintf(stderr, ", no orphan item");
872         if (errors & I_ERR_DUP_INODE_ITEM)
873                 fprintf(stderr, ", dup inode item");
874         if (errors & I_ERR_DUP_DIR_INDEX)
875                 fprintf(stderr, ", dup dir index");
876         if (errors & I_ERR_ODD_DIR_ITEM)
877                 fprintf(stderr, ", odd dir item");
878         if (errors & I_ERR_ODD_FILE_EXTENT)
879                 fprintf(stderr, ", odd file extent");
880         if (errors & I_ERR_BAD_FILE_EXTENT)
881                 fprintf(stderr, ", bad file extent");
882         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
883                 fprintf(stderr, ", file extent overlap");
884         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
885                 fprintf(stderr, ", file extent discount");
886         if (errors & I_ERR_DIR_ISIZE_WRONG)
887                 fprintf(stderr, ", dir isize wrong");
888         if (errors & I_ERR_FILE_NBYTES_WRONG)
889                 fprintf(stderr, ", nbytes wrong");
890         if (errors & I_ERR_ODD_CSUM_ITEM)
891                 fprintf(stderr, ", odd csum item");
892         if (errors & I_ERR_SOME_CSUM_MISSING)
893                 fprintf(stderr, ", some csum missing");
894         if (errors & I_ERR_LINK_COUNT_WRONG)
895                 fprintf(stderr, ", link count wrong");
896         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
897                 fprintf(stderr, ", orphan file extent");
898         fprintf(stderr, "\n");
899         /* Print the orphan extents if needed */
900         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
901                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
902
903         /* Print the holes if needed */
904         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
905                 struct file_extent_hole *hole;
906                 struct rb_node *node;
907                 int found = 0;
908
909                 node = rb_first(&rec->holes);
910                 fprintf(stderr, "Found file extent holes:\n");
911                 while (node) {
912                         found = 1;
913                         hole = rb_entry(node, struct file_extent_hole, node);
914                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
915                                 hole->start, hole->len);
916                         node = rb_next(node);
917                 }
918                 if (!found)
919                         fprintf(stderr, "\tstart: 0, len: %llu\n",
920                                 round_up(rec->isize,
921                                          root->fs_info->sectorsize));
922         }
923 }
924
925 static void print_ref_error(int errors)
926 {
927         if (errors & REF_ERR_NO_DIR_ITEM)
928                 fprintf(stderr, ", no dir item");
929         if (errors & REF_ERR_NO_DIR_INDEX)
930                 fprintf(stderr, ", no dir index");
931         if (errors & REF_ERR_NO_INODE_REF)
932                 fprintf(stderr, ", no inode ref");
933         if (errors & REF_ERR_DUP_DIR_ITEM)
934                 fprintf(stderr, ", dup dir item");
935         if (errors & REF_ERR_DUP_DIR_INDEX)
936                 fprintf(stderr, ", dup dir index");
937         if (errors & REF_ERR_DUP_INODE_REF)
938                 fprintf(stderr, ", dup inode ref");
939         if (errors & REF_ERR_INDEX_UNMATCH)
940                 fprintf(stderr, ", index mismatch");
941         if (errors & REF_ERR_FILETYPE_UNMATCH)
942                 fprintf(stderr, ", filetype mismatch");
943         if (errors & REF_ERR_NAME_TOO_LONG)
944                 fprintf(stderr, ", name too long");
945         if (errors & REF_ERR_NO_ROOT_REF)
946                 fprintf(stderr, ", no root ref");
947         if (errors & REF_ERR_NO_ROOT_BACKREF)
948                 fprintf(stderr, ", no root backref");
949         if (errors & REF_ERR_DUP_ROOT_REF)
950                 fprintf(stderr, ", dup root ref");
951         if (errors & REF_ERR_DUP_ROOT_BACKREF)
952                 fprintf(stderr, ", dup root backref");
953         fprintf(stderr, "\n");
954 }
955
956 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
957                                           u64 ino, int mod)
958 {
959         struct ptr_node *node;
960         struct cache_extent *cache;
961         struct inode_record *rec = NULL;
962         int ret;
963
964         cache = lookup_cache_extent(inode_cache, ino, 1);
965         if (cache) {
966                 node = container_of(cache, struct ptr_node, cache);
967                 rec = node->data;
968                 if (mod && rec->refs > 1) {
969                         node->data = clone_inode_rec(rec);
970                         if (IS_ERR(node->data))
971                                 return node->data;
972                         rec->refs--;
973                         rec = node->data;
974                 }
975         } else if (mod) {
976                 rec = calloc(1, sizeof(*rec));
977                 if (!rec)
978                         return ERR_PTR(-ENOMEM);
979                 rec->ino = ino;
980                 rec->extent_start = (u64)-1;
981                 rec->refs = 1;
982                 INIT_LIST_HEAD(&rec->backrefs);
983                 INIT_LIST_HEAD(&rec->orphan_extents);
984                 rec->holes = RB_ROOT;
985
986                 node = malloc(sizeof(*node));
987                 if (!node) {
988                         free(rec);
989                         return ERR_PTR(-ENOMEM);
990                 }
991                 node->cache.start = ino;
992                 node->cache.size = 1;
993                 node->data = rec;
994
995                 if (ino == BTRFS_FREE_INO_OBJECTID)
996                         rec->found_link = 1;
997
998                 ret = insert_cache_extent(inode_cache, &node->cache);
999                 if (ret)
1000                         return ERR_PTR(-EEXIST);
1001         }
1002         return rec;
1003 }
1004
1005 static void free_orphan_data_extents(struct list_head *orphan_extents)
1006 {
1007         struct orphan_data_extent *orphan;
1008
1009         while (!list_empty(orphan_extents)) {
1010                 orphan = list_entry(orphan_extents->next,
1011                                     struct orphan_data_extent, list);
1012                 list_del(&orphan->list);
1013                 free(orphan);
1014         }
1015 }
1016
1017 static void free_inode_rec(struct inode_record *rec)
1018 {
1019         struct inode_backref *backref;
1020
1021         if (--rec->refs > 0)
1022                 return;
1023
1024         while (!list_empty(&rec->backrefs)) {
1025                 backref = to_inode_backref(rec->backrefs.next);
1026                 list_del(&backref->list);
1027                 free(backref);
1028         }
1029         free_orphan_data_extents(&rec->orphan_extents);
1030         free_file_extent_holes(&rec->holes);
1031         free(rec);
1032 }
1033
1034 static int can_free_inode_rec(struct inode_record *rec)
1035 {
1036         if (!rec->errors && rec->checked && rec->found_inode_item &&
1037             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1038                 return 1;
1039         return 0;
1040 }
1041
1042 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1043                                  struct inode_record *rec)
1044 {
1045         struct cache_extent *cache;
1046         struct inode_backref *tmp, *backref;
1047         struct ptr_node *node;
1048         u8 filetype;
1049
1050         if (!rec->found_inode_item)
1051                 return;
1052
1053         filetype = imode_to_type(rec->imode);
1054         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1055                 if (backref->found_dir_item && backref->found_dir_index) {
1056                         if (backref->filetype != filetype)
1057                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1058                         if (!backref->errors && backref->found_inode_ref &&
1059                             rec->nlink == rec->found_link) {
1060                                 list_del(&backref->list);
1061                                 free(backref);
1062                         }
1063                 }
1064         }
1065
1066         if (!rec->checked || rec->merging)
1067                 return;
1068
1069         if (S_ISDIR(rec->imode)) {
1070                 if (rec->found_size != rec->isize)
1071                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1072                 if (rec->found_file_extent)
1073                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1074         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1075                 if (rec->found_dir_item)
1076                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1077                 if (rec->found_size != rec->nbytes)
1078                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1079                 if (rec->nlink > 0 && !no_holes &&
1080                     (rec->extent_end < rec->isize ||
1081                      first_extent_gap(&rec->holes) < rec->isize))
1082                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1083         }
1084
1085         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1086                 if (rec->found_csum_item && rec->nodatasum)
1087                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1088                 if (rec->some_csum_missing && !rec->nodatasum)
1089                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1090         }
1091
1092         BUG_ON(rec->refs != 1);
1093         if (can_free_inode_rec(rec)) {
1094                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1095                 node = container_of(cache, struct ptr_node, cache);
1096                 BUG_ON(node->data != rec);
1097                 remove_cache_extent(inode_cache, &node->cache);
1098                 free(node);
1099                 free_inode_rec(rec);
1100         }
1101 }
1102
1103 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1104 {
1105         struct btrfs_path path;
1106         struct btrfs_key key;
1107         int ret;
1108
1109         key.objectid = BTRFS_ORPHAN_OBJECTID;
1110         key.type = BTRFS_ORPHAN_ITEM_KEY;
1111         key.offset = ino;
1112
1113         btrfs_init_path(&path);
1114         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1115         btrfs_release_path(&path);
1116         if (ret > 0)
1117                 ret = -ENOENT;
1118         return ret;
1119 }
1120
1121 static int process_inode_item(struct extent_buffer *eb,
1122                               int slot, struct btrfs_key *key,
1123                               struct shared_node *active_node)
1124 {
1125         struct inode_record *rec;
1126         struct btrfs_inode_item *item;
1127
1128         rec = active_node->current;
1129         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1130         if (rec->found_inode_item) {
1131                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1132                 return 1;
1133         }
1134         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1135         rec->nlink = btrfs_inode_nlink(eb, item);
1136         rec->isize = btrfs_inode_size(eb, item);
1137         rec->nbytes = btrfs_inode_nbytes(eb, item);
1138         rec->imode = btrfs_inode_mode(eb, item);
1139         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1140                 rec->nodatasum = 1;
1141         rec->found_inode_item = 1;
1142         if (rec->nlink == 0)
1143                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1144         maybe_free_inode_rec(&active_node->inode_cache, rec);
1145         return 0;
1146 }
1147
1148 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1149                                                 const char *name,
1150                                                 int namelen, u64 dir)
1151 {
1152         struct inode_backref *backref;
1153
1154         list_for_each_entry(backref, &rec->backrefs, list) {
1155                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1156                         break;
1157                 if (backref->dir != dir || backref->namelen != namelen)
1158                         continue;
1159                 if (memcmp(name, backref->name, namelen))
1160                         continue;
1161                 return backref;
1162         }
1163
1164         backref = malloc(sizeof(*backref) + namelen + 1);
1165         if (!backref)
1166                 return NULL;
1167         memset(backref, 0, sizeof(*backref));
1168         backref->dir = dir;
1169         backref->namelen = namelen;
1170         memcpy(backref->name, name, namelen);
1171         backref->name[namelen] = '\0';
1172         list_add_tail(&backref->list, &rec->backrefs);
1173         return backref;
1174 }
1175
1176 static int add_inode_backref(struct cache_tree *inode_cache,
1177                              u64 ino, u64 dir, u64 index,
1178                              const char *name, int namelen,
1179                              u8 filetype, u8 itemtype, int errors)
1180 {
1181         struct inode_record *rec;
1182         struct inode_backref *backref;
1183
1184         rec = get_inode_rec(inode_cache, ino, 1);
1185         BUG_ON(IS_ERR(rec));
1186         backref = get_inode_backref(rec, name, namelen, dir);
1187         BUG_ON(!backref);
1188         if (errors)
1189                 backref->errors |= errors;
1190         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1191                 if (backref->found_dir_index)
1192                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1193                 if (backref->found_inode_ref && backref->index != index)
1194                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1195                 if (backref->found_dir_item && backref->filetype != filetype)
1196                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1197
1198                 backref->index = index;
1199                 backref->filetype = filetype;
1200                 backref->found_dir_index = 1;
1201         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1202                 rec->found_link++;
1203                 if (backref->found_dir_item)
1204                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1205                 if (backref->found_dir_index && backref->filetype != filetype)
1206                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1207
1208                 backref->filetype = filetype;
1209                 backref->found_dir_item = 1;
1210         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1211                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1212                 if (backref->found_inode_ref)
1213                         backref->errors |= REF_ERR_DUP_INODE_REF;
1214                 if (backref->found_dir_index && backref->index != index)
1215                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1216                 else
1217                         backref->index = index;
1218
1219                 backref->ref_type = itemtype;
1220                 backref->found_inode_ref = 1;
1221         } else {
1222                 BUG_ON(1);
1223         }
1224
1225         maybe_free_inode_rec(inode_cache, rec);
1226         return 0;
1227 }
1228
1229 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1230                             struct cache_tree *dst_cache)
1231 {
1232         struct inode_backref *backref;
1233         u32 dir_count = 0;
1234         int ret = 0;
1235
1236         dst->merging = 1;
1237         list_for_each_entry(backref, &src->backrefs, list) {
1238                 if (backref->found_dir_index) {
1239                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1240                                         backref->index, backref->name,
1241                                         backref->namelen, backref->filetype,
1242                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1243                 }
1244                 if (backref->found_dir_item) {
1245                         dir_count++;
1246                         add_inode_backref(dst_cache, dst->ino,
1247                                         backref->dir, 0, backref->name,
1248                                         backref->namelen, backref->filetype,
1249                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1250                 }
1251                 if (backref->found_inode_ref) {
1252                         add_inode_backref(dst_cache, dst->ino,
1253                                         backref->dir, backref->index,
1254                                         backref->name, backref->namelen, 0,
1255                                         backref->ref_type, backref->errors);
1256                 }
1257         }
1258
1259         if (src->found_dir_item)
1260                 dst->found_dir_item = 1;
1261         if (src->found_file_extent)
1262                 dst->found_file_extent = 1;
1263         if (src->found_csum_item)
1264                 dst->found_csum_item = 1;
1265         if (src->some_csum_missing)
1266                 dst->some_csum_missing = 1;
1267         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1268                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1269                 if (ret < 0)
1270                         return ret;
1271         }
1272
1273         BUG_ON(src->found_link < dir_count);
1274         dst->found_link += src->found_link - dir_count;
1275         dst->found_size += src->found_size;
1276         if (src->extent_start != (u64)-1) {
1277                 if (dst->extent_start == (u64)-1) {
1278                         dst->extent_start = src->extent_start;
1279                         dst->extent_end = src->extent_end;
1280                 } else {
1281                         if (dst->extent_end > src->extent_start)
1282                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1283                         else if (dst->extent_end < src->extent_start) {
1284                                 ret = add_file_extent_hole(&dst->holes,
1285                                         dst->extent_end,
1286                                         src->extent_start - dst->extent_end);
1287                         }
1288                         if (dst->extent_end < src->extent_end)
1289                                 dst->extent_end = src->extent_end;
1290                 }
1291         }
1292
1293         dst->errors |= src->errors;
1294         if (src->found_inode_item) {
1295                 if (!dst->found_inode_item) {
1296                         dst->nlink = src->nlink;
1297                         dst->isize = src->isize;
1298                         dst->nbytes = src->nbytes;
1299                         dst->imode = src->imode;
1300                         dst->nodatasum = src->nodatasum;
1301                         dst->found_inode_item = 1;
1302                 } else {
1303                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1304                 }
1305         }
1306         dst->merging = 0;
1307
1308         return 0;
1309 }
1310
1311 static int splice_shared_node(struct shared_node *src_node,
1312                               struct shared_node *dst_node)
1313 {
1314         struct cache_extent *cache;
1315         struct ptr_node *node, *ins;
1316         struct cache_tree *src, *dst;
1317         struct inode_record *rec, *conflict;
1318         u64 current_ino = 0;
1319         int splice = 0;
1320         int ret;
1321
1322         if (--src_node->refs == 0)
1323                 splice = 1;
1324         if (src_node->current)
1325                 current_ino = src_node->current->ino;
1326
1327         src = &src_node->root_cache;
1328         dst = &dst_node->root_cache;
1329 again:
1330         cache = search_cache_extent(src, 0);
1331         while (cache) {
1332                 node = container_of(cache, struct ptr_node, cache);
1333                 rec = node->data;
1334                 cache = next_cache_extent(cache);
1335
1336                 if (splice) {
1337                         remove_cache_extent(src, &node->cache);
1338                         ins = node;
1339                 } else {
1340                         ins = malloc(sizeof(*ins));
1341                         BUG_ON(!ins);
1342                         ins->cache.start = node->cache.start;
1343                         ins->cache.size = node->cache.size;
1344                         ins->data = rec;
1345                         rec->refs++;
1346                 }
1347                 ret = insert_cache_extent(dst, &ins->cache);
1348                 if (ret == -EEXIST) {
1349                         conflict = get_inode_rec(dst, rec->ino, 1);
1350                         BUG_ON(IS_ERR(conflict));
1351                         merge_inode_recs(rec, conflict, dst);
1352                         if (rec->checked) {
1353                                 conflict->checked = 1;
1354                                 if (dst_node->current == conflict)
1355                                         dst_node->current = NULL;
1356                         }
1357                         maybe_free_inode_rec(dst, conflict);
1358                         free_inode_rec(rec);
1359                         free(ins);
1360                 } else {
1361                         BUG_ON(ret);
1362                 }
1363         }
1364
1365         if (src == &src_node->root_cache) {
1366                 src = &src_node->inode_cache;
1367                 dst = &dst_node->inode_cache;
1368                 goto again;
1369         }
1370
1371         if (current_ino > 0 && (!dst_node->current ||
1372             current_ino > dst_node->current->ino)) {
1373                 if (dst_node->current) {
1374                         dst_node->current->checked = 1;
1375                         maybe_free_inode_rec(dst, dst_node->current);
1376                 }
1377                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1378                 BUG_ON(IS_ERR(dst_node->current));
1379         }
1380         return 0;
1381 }
1382
1383 static void free_inode_ptr(struct cache_extent *cache)
1384 {
1385         struct ptr_node *node;
1386         struct inode_record *rec;
1387
1388         node = container_of(cache, struct ptr_node, cache);
1389         rec = node->data;
1390         free_inode_rec(rec);
1391         free(node);
1392 }
1393
1394 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1395
1396 static struct shared_node *find_shared_node(struct cache_tree *shared,
1397                                             u64 bytenr)
1398 {
1399         struct cache_extent *cache;
1400         struct shared_node *node;
1401
1402         cache = lookup_cache_extent(shared, bytenr, 1);
1403         if (cache) {
1404                 node = container_of(cache, struct shared_node, cache);
1405                 return node;
1406         }
1407         return NULL;
1408 }
1409
1410 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1411 {
1412         int ret;
1413         struct shared_node *node;
1414
1415         node = calloc(1, sizeof(*node));
1416         if (!node)
1417                 return -ENOMEM;
1418         node->cache.start = bytenr;
1419         node->cache.size = 1;
1420         cache_tree_init(&node->root_cache);
1421         cache_tree_init(&node->inode_cache);
1422         node->refs = refs;
1423
1424         ret = insert_cache_extent(shared, &node->cache);
1425
1426         return ret;
1427 }
1428
1429 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1430                              struct walk_control *wc, int level)
1431 {
1432         struct shared_node *node;
1433         struct shared_node *dest;
1434         int ret;
1435
1436         if (level == wc->active_node)
1437                 return 0;
1438
1439         BUG_ON(wc->active_node <= level);
1440         node = find_shared_node(&wc->shared, bytenr);
1441         if (!node) {
1442                 ret = add_shared_node(&wc->shared, bytenr, refs);
1443                 BUG_ON(ret);
1444                 node = find_shared_node(&wc->shared, bytenr);
1445                 wc->nodes[level] = node;
1446                 wc->active_node = level;
1447                 return 0;
1448         }
1449
1450         if (wc->root_level == wc->active_node &&
1451             btrfs_root_refs(&root->root_item) == 0) {
1452                 if (--node->refs == 0) {
1453                         free_inode_recs_tree(&node->root_cache);
1454                         free_inode_recs_tree(&node->inode_cache);
1455                         remove_cache_extent(&wc->shared, &node->cache);
1456                         free(node);
1457                 }
1458                 return 1;
1459         }
1460
1461         dest = wc->nodes[wc->active_node];
1462         splice_shared_node(node, dest);
1463         if (node->refs == 0) {
1464                 remove_cache_extent(&wc->shared, &node->cache);
1465                 free(node);
1466         }
1467         return 1;
1468 }
1469
1470 static int leave_shared_node(struct btrfs_root *root,
1471                              struct walk_control *wc, int level)
1472 {
1473         struct shared_node *node;
1474         struct shared_node *dest;
1475         int i;
1476
1477         if (level == wc->root_level)
1478                 return 0;
1479
1480         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1481                 if (wc->nodes[i])
1482                         break;
1483         }
1484         BUG_ON(i >= BTRFS_MAX_LEVEL);
1485
1486         node = wc->nodes[wc->active_node];
1487         wc->nodes[wc->active_node] = NULL;
1488         wc->active_node = i;
1489
1490         dest = wc->nodes[wc->active_node];
1491         if (wc->active_node < wc->root_level ||
1492             btrfs_root_refs(&root->root_item) > 0) {
1493                 BUG_ON(node->refs <= 1);
1494                 splice_shared_node(node, dest);
1495         } else {
1496                 BUG_ON(node->refs < 2);
1497                 node->refs--;
1498         }
1499         return 0;
1500 }
1501
1502 /*
1503  * Returns:
1504  * < 0 - on error
1505  * 1   - if the root with id child_root_id is a child of root parent_root_id
1506  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1507  *       has other root(s) as parent(s)
1508  * 2   - if the root child_root_id doesn't have any parent roots
1509  */
1510 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1511                          u64 child_root_id)
1512 {
1513         struct btrfs_path path;
1514         struct btrfs_key key;
1515         struct extent_buffer *leaf;
1516         int has_parent = 0;
1517         int ret;
1518
1519         btrfs_init_path(&path);
1520
1521         key.objectid = parent_root_id;
1522         key.type = BTRFS_ROOT_REF_KEY;
1523         key.offset = child_root_id;
1524         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1525                                 0, 0);
1526         if (ret < 0)
1527                 return ret;
1528         btrfs_release_path(&path);
1529         if (!ret)
1530                 return 1;
1531
1532         key.objectid = child_root_id;
1533         key.type = BTRFS_ROOT_BACKREF_KEY;
1534         key.offset = 0;
1535         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1536                                 0, 0);
1537         if (ret < 0)
1538                 goto out;
1539
1540         while (1) {
1541                 leaf = path.nodes[0];
1542                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1543                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1544                         if (ret)
1545                                 break;
1546                         leaf = path.nodes[0];
1547                 }
1548
1549                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1550                 if (key.objectid != child_root_id ||
1551                     key.type != BTRFS_ROOT_BACKREF_KEY)
1552                         break;
1553
1554                 has_parent = 1;
1555
1556                 if (key.offset == parent_root_id) {
1557                         btrfs_release_path(&path);
1558                         return 1;
1559                 }
1560
1561                 path.slots[0]++;
1562         }
1563 out:
1564         btrfs_release_path(&path);
1565         if (ret < 0)
1566                 return ret;
1567         return has_parent ? 0 : 2;
1568 }
1569
1570 static int process_dir_item(struct extent_buffer *eb,
1571                             int slot, struct btrfs_key *key,
1572                             struct shared_node *active_node)
1573 {
1574         u32 total;
1575         u32 cur = 0;
1576         u32 len;
1577         u32 name_len;
1578         u32 data_len;
1579         int error;
1580         int nritems = 0;
1581         u8 filetype;
1582         struct btrfs_dir_item *di;
1583         struct inode_record *rec;
1584         struct cache_tree *root_cache;
1585         struct cache_tree *inode_cache;
1586         struct btrfs_key location;
1587         char namebuf[BTRFS_NAME_LEN];
1588
1589         root_cache = &active_node->root_cache;
1590         inode_cache = &active_node->inode_cache;
1591         rec = active_node->current;
1592         rec->found_dir_item = 1;
1593
1594         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1595         total = btrfs_item_size_nr(eb, slot);
1596         while (cur < total) {
1597                 nritems++;
1598                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1599                 name_len = btrfs_dir_name_len(eb, di);
1600                 data_len = btrfs_dir_data_len(eb, di);
1601                 filetype = btrfs_dir_type(eb, di);
1602
1603                 rec->found_size += name_len;
1604                 if (cur + sizeof(*di) + name_len > total ||
1605                     name_len > BTRFS_NAME_LEN) {
1606                         error = REF_ERR_NAME_TOO_LONG;
1607
1608                         if (cur + sizeof(*di) > total)
1609                                 break;
1610                         len = min_t(u32, total - cur - sizeof(*di),
1611                                     BTRFS_NAME_LEN);
1612                 } else {
1613                         len = name_len;
1614                         error = 0;
1615                 }
1616
1617                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1618
1619                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1620                     key->offset != btrfs_name_hash(namebuf, len)) {
1621                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1622                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1623                         key->objectid, key->offset, namebuf, len, filetype,
1624                         key->offset, btrfs_name_hash(namebuf, len));
1625                 }
1626
1627                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1628                         add_inode_backref(inode_cache, location.objectid,
1629                                           key->objectid, key->offset, namebuf,
1630                                           len, filetype, key->type, error);
1631                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1632                         add_inode_backref(root_cache, location.objectid,
1633                                           key->objectid, key->offset,
1634                                           namebuf, len, filetype,
1635                                           key->type, error);
1636                 } else {
1637                         fprintf(stderr, "invalid location in dir item %u\n",
1638                                 location.type);
1639                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1640                                           key->objectid, key->offset, namebuf,
1641                                           len, filetype, key->type, error);
1642                 }
1643
1644                 len = sizeof(*di) + name_len + data_len;
1645                 di = (struct btrfs_dir_item *)((char *)di + len);
1646                 cur += len;
1647         }
1648         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1649                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1650
1651         return 0;
1652 }
1653
1654 static int process_inode_ref(struct extent_buffer *eb,
1655                              int slot, struct btrfs_key *key,
1656                              struct shared_node *active_node)
1657 {
1658         u32 total;
1659         u32 cur = 0;
1660         u32 len;
1661         u32 name_len;
1662         u64 index;
1663         int error;
1664         struct cache_tree *inode_cache;
1665         struct btrfs_inode_ref *ref;
1666         char namebuf[BTRFS_NAME_LEN];
1667
1668         inode_cache = &active_node->inode_cache;
1669
1670         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1671         total = btrfs_item_size_nr(eb, slot);
1672         while (cur < total) {
1673                 name_len = btrfs_inode_ref_name_len(eb, ref);
1674                 index = btrfs_inode_ref_index(eb, ref);
1675
1676                 /* inode_ref + namelen should not cross item boundary */
1677                 if (cur + sizeof(*ref) + name_len > total ||
1678                     name_len > BTRFS_NAME_LEN) {
1679                         if (total < cur + sizeof(*ref))
1680                                 break;
1681
1682                         /* Still try to read out the remaining part */
1683                         len = min_t(u32, total - cur - sizeof(*ref),
1684                                     BTRFS_NAME_LEN);
1685                         error = REF_ERR_NAME_TOO_LONG;
1686                 } else {
1687                         len = name_len;
1688                         error = 0;
1689                 }
1690
1691                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1692                 add_inode_backref(inode_cache, key->objectid, key->offset,
1693                                   index, namebuf, len, 0, key->type, error);
1694
1695                 len = sizeof(*ref) + name_len;
1696                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1697                 cur += len;
1698         }
1699         return 0;
1700 }
1701
1702 static int process_inode_extref(struct extent_buffer *eb,
1703                                 int slot, struct btrfs_key *key,
1704                                 struct shared_node *active_node)
1705 {
1706         u32 total;
1707         u32 cur = 0;
1708         u32 len;
1709         u32 name_len;
1710         u64 index;
1711         u64 parent;
1712         int error;
1713         struct cache_tree *inode_cache;
1714         struct btrfs_inode_extref *extref;
1715         char namebuf[BTRFS_NAME_LEN];
1716
1717         inode_cache = &active_node->inode_cache;
1718
1719         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1720         total = btrfs_item_size_nr(eb, slot);
1721         while (cur < total) {
1722                 name_len = btrfs_inode_extref_name_len(eb, extref);
1723                 index = btrfs_inode_extref_index(eb, extref);
1724                 parent = btrfs_inode_extref_parent(eb, extref);
1725                 if (name_len <= BTRFS_NAME_LEN) {
1726                         len = name_len;
1727                         error = 0;
1728                 } else {
1729                         len = BTRFS_NAME_LEN;
1730                         error = REF_ERR_NAME_TOO_LONG;
1731                 }
1732                 read_extent_buffer(eb, namebuf,
1733                                    (unsigned long)(extref + 1), len);
1734                 add_inode_backref(inode_cache, key->objectid, parent,
1735                                   index, namebuf, len, 0, key->type, error);
1736
1737                 len = sizeof(*extref) + name_len;
1738                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1739                 cur += len;
1740         }
1741         return 0;
1742
1743 }
1744
1745 static int count_csum_range(struct btrfs_root *root, u64 start,
1746                             u64 len, u64 *found)
1747 {
1748         struct btrfs_key key;
1749         struct btrfs_path path;
1750         struct extent_buffer *leaf;
1751         int ret;
1752         size_t size;
1753         *found = 0;
1754         u64 csum_end;
1755         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1756
1757         btrfs_init_path(&path);
1758
1759         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1760         key.offset = start;
1761         key.type = BTRFS_EXTENT_CSUM_KEY;
1762
1763         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1764                                 &key, &path, 0, 0);
1765         if (ret < 0)
1766                 goto out;
1767         if (ret > 0 && path.slots[0] > 0) {
1768                 leaf = path.nodes[0];
1769                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1770                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1771                     key.type == BTRFS_EXTENT_CSUM_KEY)
1772                         path.slots[0]--;
1773         }
1774
1775         while (len > 0) {
1776                 leaf = path.nodes[0];
1777                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1778                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1779                         if (ret > 0)
1780                                 break;
1781                         else if (ret < 0)
1782                                 goto out;
1783                         leaf = path.nodes[0];
1784                 }
1785
1786                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1787                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1788                     key.type != BTRFS_EXTENT_CSUM_KEY)
1789                         break;
1790
1791                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1792                 if (key.offset >= start + len)
1793                         break;
1794
1795                 if (key.offset > start)
1796                         start = key.offset;
1797
1798                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1799                 csum_end = key.offset + (size / csum_size) *
1800                            root->fs_info->sectorsize;
1801                 if (csum_end > start) {
1802                         size = min(csum_end - start, len);
1803                         len -= size;
1804                         start += size;
1805                         *found += size;
1806                 }
1807
1808                 path.slots[0]++;
1809         }
1810 out:
1811         btrfs_release_path(&path);
1812         if (ret < 0)
1813                 return ret;
1814         return 0;
1815 }
1816
1817 static int process_file_extent(struct btrfs_root *root,
1818                                 struct extent_buffer *eb,
1819                                 int slot, struct btrfs_key *key,
1820                                 struct shared_node *active_node)
1821 {
1822         struct inode_record *rec;
1823         struct btrfs_file_extent_item *fi;
1824         u64 num_bytes = 0;
1825         u64 disk_bytenr = 0;
1826         u64 extent_offset = 0;
1827         u64 mask = root->fs_info->sectorsize - 1;
1828         int extent_type;
1829         int ret;
1830
1831         rec = active_node->current;
1832         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1833         rec->found_file_extent = 1;
1834
1835         if (rec->extent_start == (u64)-1) {
1836                 rec->extent_start = key->offset;
1837                 rec->extent_end = key->offset;
1838         }
1839
1840         if (rec->extent_end > key->offset)
1841                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1842         else if (rec->extent_end < key->offset) {
1843                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1844                                            key->offset - rec->extent_end);
1845                 if (ret < 0)
1846                         return ret;
1847         }
1848
1849         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1850         extent_type = btrfs_file_extent_type(eb, fi);
1851
1852         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1853                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1854                 if (num_bytes == 0)
1855                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1856                 rec->found_size += num_bytes;
1857                 num_bytes = (num_bytes + mask) & ~mask;
1858         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1859                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1860                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1861                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1862                 extent_offset = btrfs_file_extent_offset(eb, fi);
1863                 if (num_bytes == 0 || (num_bytes & mask))
1864                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865                 if (num_bytes + extent_offset >
1866                     btrfs_file_extent_ram_bytes(eb, fi))
1867                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1868                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1869                     (btrfs_file_extent_compression(eb, fi) ||
1870                      btrfs_file_extent_encryption(eb, fi) ||
1871                      btrfs_file_extent_other_encoding(eb, fi)))
1872                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1873                 if (disk_bytenr > 0)
1874                         rec->found_size += num_bytes;
1875         } else {
1876                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1877         }
1878         rec->extent_end = key->offset + num_bytes;
1879
1880         /*
1881          * The data reloc tree will copy full extents into its inode and then
1882          * copy the corresponding csums.  Because the extent it copied could be
1883          * a preallocated extent that hasn't been written to yet there may be no
1884          * csums to copy, ergo we won't have csums for our file extent.  This is
1885          * ok so just don't bother checking csums if the inode belongs to the
1886          * data reloc tree.
1887          */
1888         if (disk_bytenr > 0 &&
1889             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1890                 u64 found;
1891                 if (btrfs_file_extent_compression(eb, fi))
1892                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1893                 else
1894                         disk_bytenr += extent_offset;
1895
1896                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1897                 if (ret < 0)
1898                         return ret;
1899                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1900                         if (found > 0)
1901                                 rec->found_csum_item = 1;
1902                         if (found < num_bytes)
1903                                 rec->some_csum_missing = 1;
1904                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1905                         if (found > 0)
1906                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1907                 }
1908         }
1909         return 0;
1910 }
1911
1912 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1913                             struct walk_control *wc)
1914 {
1915         struct btrfs_key key;
1916         u32 nritems;
1917         int i;
1918         int ret = 0;
1919         struct cache_tree *inode_cache;
1920         struct shared_node *active_node;
1921
1922         if (wc->root_level == wc->active_node &&
1923             btrfs_root_refs(&root->root_item) == 0)
1924                 return 0;
1925
1926         active_node = wc->nodes[wc->active_node];
1927         inode_cache = &active_node->inode_cache;
1928         nritems = btrfs_header_nritems(eb);
1929         for (i = 0; i < nritems; i++) {
1930                 btrfs_item_key_to_cpu(eb, &key, i);
1931
1932                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1933                         continue;
1934                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1935                         continue;
1936
1937                 if (active_node->current == NULL ||
1938                     active_node->current->ino < key.objectid) {
1939                         if (active_node->current) {
1940                                 active_node->current->checked = 1;
1941                                 maybe_free_inode_rec(inode_cache,
1942                                                      active_node->current);
1943                         }
1944                         active_node->current = get_inode_rec(inode_cache,
1945                                                              key.objectid, 1);
1946                         BUG_ON(IS_ERR(active_node->current));
1947                 }
1948                 switch (key.type) {
1949                 case BTRFS_DIR_ITEM_KEY:
1950                 case BTRFS_DIR_INDEX_KEY:
1951                         ret = process_dir_item(eb, i, &key, active_node);
1952                         break;
1953                 case BTRFS_INODE_REF_KEY:
1954                         ret = process_inode_ref(eb, i, &key, active_node);
1955                         break;
1956                 case BTRFS_INODE_EXTREF_KEY:
1957                         ret = process_inode_extref(eb, i, &key, active_node);
1958                         break;
1959                 case BTRFS_INODE_ITEM_KEY:
1960                         ret = process_inode_item(eb, i, &key, active_node);
1961                         break;
1962                 case BTRFS_EXTENT_DATA_KEY:
1963                         ret = process_file_extent(root, eb, i, &key,
1964                                                   active_node);
1965                         break;
1966                 default:
1967                         break;
1968                 };
1969         }
1970         return ret;
1971 }
1972
1973 struct node_refs {
1974         u64 bytenr[BTRFS_MAX_LEVEL];
1975         u64 refs[BTRFS_MAX_LEVEL];
1976         int need_check[BTRFS_MAX_LEVEL];
1977         /* field for checking all trees */
1978         int checked[BTRFS_MAX_LEVEL];
1979         /* the corresponding extent should be marked as full backref or not */
1980         int full_backref[BTRFS_MAX_LEVEL];
1981 };
1982
1983 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1984                              struct extent_buffer *eb, struct node_refs *nrefs,
1985                              u64 level, int check_all);
1986 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1987                             unsigned int ext_ref);
1988
1989 /*
1990  * Returns >0  Found error, not fatal, should continue
1991  * Returns <0  Fatal error, must exit the whole check
1992  * Returns 0   No errors found
1993  */
1994 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1995                                struct node_refs *nrefs, int *level, int ext_ref)
1996 {
1997         struct extent_buffer *cur = path->nodes[0];
1998         struct btrfs_key key;
1999         u64 cur_bytenr;
2000         u32 nritems;
2001         u64 first_ino = 0;
2002         int root_level = btrfs_header_level(root->node);
2003         int i;
2004         int ret = 0; /* Final return value */
2005         int err = 0; /* Positive error bitmap */
2006
2007         cur_bytenr = cur->start;
2008
2009         /* skip to first inode item or the first inode number change */
2010         nritems = btrfs_header_nritems(cur);
2011         for (i = 0; i < nritems; i++) {
2012                 btrfs_item_key_to_cpu(cur, &key, i);
2013                 if (i == 0)
2014                         first_ino = key.objectid;
2015                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2016                     (first_ino && first_ino != key.objectid))
2017                         break;
2018         }
2019         if (i == nritems) {
2020                 path->slots[0] = nritems;
2021                 return 0;
2022         }
2023         path->slots[0] = i;
2024
2025 again:
2026         err |= check_inode_item(root, path, ext_ref);
2027
2028         /* modify cur since check_inode_item may change path */
2029         cur = path->nodes[0];
2030
2031         if (err & LAST_ITEM)
2032                 goto out;
2033
2034         /* still have inode items in thie leaf */
2035         if (cur->start == cur_bytenr)
2036                 goto again;
2037
2038         /*
2039          * we have switched to another leaf, above nodes may
2040          * have changed, here walk down the path, if a node
2041          * or leaf is shared, check whether we can skip this
2042          * node or leaf.
2043          */
2044         for (i = root_level; i >= 0; i--) {
2045                 if (path->nodes[i]->start == nrefs->bytenr[i])
2046                         continue;
2047
2048                 ret = update_nodes_refs(root, path->nodes[i]->start,
2049                                 path->nodes[i], nrefs, i, 0);
2050                 if (ret)
2051                         goto out;
2052
2053                 if (!nrefs->need_check[i]) {
2054                         *level += 1;
2055                         break;
2056                 }
2057         }
2058
2059         for (i = 0; i < *level; i++) {
2060                 free_extent_buffer(path->nodes[i]);
2061                 path->nodes[i] = NULL;
2062         }
2063 out:
2064         err &= ~LAST_ITEM;
2065         if (err && !ret)
2066                 ret = err;
2067         return ret;
2068 }
2069
2070 static void reada_walk_down(struct btrfs_root *root,
2071                             struct extent_buffer *node, int slot)
2072 {
2073         struct btrfs_fs_info *fs_info = root->fs_info;
2074         u64 bytenr;
2075         u64 ptr_gen;
2076         u32 nritems;
2077         int i;
2078         int level;
2079
2080         level = btrfs_header_level(node);
2081         if (level != 1)
2082                 return;
2083
2084         nritems = btrfs_header_nritems(node);
2085         for (i = slot; i < nritems; i++) {
2086                 bytenr = btrfs_node_blockptr(node, i);
2087                 ptr_gen = btrfs_node_ptr_generation(node, i);
2088                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2089         }
2090 }
2091
2092 /*
2093  * Check the child node/leaf by the following condition:
2094  * 1. the first item key of the node/leaf should be the same with the one
2095  *    in parent.
2096  * 2. block in parent node should match the child node/leaf.
2097  * 3. generation of parent node and child's header should be consistent.
2098  *
2099  * Or the child node/leaf pointed by the key in parent is not valid.
2100  *
2101  * We hope to check leaf owner too, but since subvol may share leaves,
2102  * which makes leaf owner check not so strong, key check should be
2103  * sufficient enough for that case.
2104  */
2105 static int check_child_node(struct extent_buffer *parent, int slot,
2106                             struct extent_buffer *child)
2107 {
2108         struct btrfs_key parent_key;
2109         struct btrfs_key child_key;
2110         int ret = 0;
2111
2112         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2113         if (btrfs_header_level(child) == 0)
2114                 btrfs_item_key_to_cpu(child, &child_key, 0);
2115         else
2116                 btrfs_node_key_to_cpu(child, &child_key, 0);
2117
2118         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2119                 ret = -EINVAL;
2120                 fprintf(stderr,
2121                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2122                         parent_key.objectid, parent_key.type, parent_key.offset,
2123                         child_key.objectid, child_key.type, child_key.offset);
2124         }
2125         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2126                 ret = -EINVAL;
2127                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2128                         btrfs_node_blockptr(parent, slot),
2129                         btrfs_header_bytenr(child));
2130         }
2131         if (btrfs_node_ptr_generation(parent, slot) !=
2132             btrfs_header_generation(child)) {
2133                 ret = -EINVAL;
2134                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2135                         btrfs_header_generation(child),
2136                         btrfs_node_ptr_generation(parent, slot));
2137         }
2138         return ret;
2139 }
2140
2141 /*
2142  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2143  * in every fs or file tree check. Here we find its all root ids, and only check
2144  * it in the fs or file tree which has the smallest root id.
2145  */
2146 static int need_check(struct btrfs_root *root, struct ulist *roots)
2147 {
2148         struct rb_node *node;
2149         struct ulist_node *u;
2150
2151         if (roots->nnodes == 1)
2152                 return 1;
2153
2154         node = rb_first(&roots->root);
2155         u = rb_entry(node, struct ulist_node, rb_node);
2156         /*
2157          * current root id is not smallest, we skip it and let it be checked
2158          * in the fs or file tree who hash the smallest root id.
2159          */
2160         if (root->objectid != u->val)
2161                 return 0;
2162
2163         return 1;
2164 }
2165
2166 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
2167                                u64 *flags_ret)
2168 {
2169         struct btrfs_root *extent_root = root->fs_info->extent_root;
2170         struct btrfs_root_item *ri = &root->root_item;
2171         struct btrfs_extent_inline_ref *iref;
2172         struct btrfs_extent_item *ei;
2173         struct btrfs_key key;
2174         struct btrfs_path *path = NULL;
2175         unsigned long ptr;
2176         unsigned long end;
2177         u64 flags;
2178         u64 owner = 0;
2179         u64 offset;
2180         int slot;
2181         int type;
2182         int ret = 0;
2183
2184         /*
2185          * Except file/reloc tree, we can not have FULL BACKREF MODE
2186          */
2187         if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
2188                 goto normal;
2189
2190         /* root node */
2191         if (eb->start == btrfs_root_bytenr(ri))
2192                 goto normal;
2193
2194         if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
2195                 goto full_backref;
2196
2197         owner = btrfs_header_owner(eb);
2198         if (owner == root->objectid)
2199                 goto normal;
2200
2201         path = btrfs_alloc_path();
2202         if (!path)
2203                 return -ENOMEM;
2204
2205         key.objectid = btrfs_header_bytenr(eb);
2206         key.type = (u8)-1;
2207         key.offset = (u64)-1;
2208
2209         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2210         if (ret <= 0) {
2211                 ret = -EIO;
2212                 goto out;
2213         }
2214
2215         if (ret > 0) {
2216                 ret = btrfs_previous_extent_item(extent_root, path,
2217                                                  key.objectid);
2218                 if (ret)
2219                         goto full_backref;
2220
2221         }
2222         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2223
2224         eb = path->nodes[0];
2225         slot = path->slots[0];
2226         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
2227
2228         flags = btrfs_extent_flags(eb, ei);
2229         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2230                 goto full_backref;
2231
2232         ptr = (unsigned long)(ei + 1);
2233         end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
2234
2235         if (key.type == BTRFS_EXTENT_ITEM_KEY)
2236                 ptr += sizeof(struct btrfs_tree_block_info);
2237
2238 next:
2239         /* Reached extent item ends normally */
2240         if (ptr == end)
2241                 goto full_backref;
2242
2243         /* Beyond extent item end, wrong item size */
2244         if (ptr > end) {
2245                 error("extent item at bytenr %llu slot %d has wrong size",
2246                         eb->start, slot);
2247                 goto full_backref;
2248         }
2249
2250         iref = (struct btrfs_extent_inline_ref *)ptr;
2251         offset = btrfs_extent_inline_ref_offset(eb, iref);
2252         type = btrfs_extent_inline_ref_type(eb, iref);
2253
2254         if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
2255                 goto normal;
2256         ptr += btrfs_extent_inline_ref_size(type);
2257         goto next;
2258
2259 normal:
2260         *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
2261         goto out;
2262
2263 full_backref:
2264         *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2265 out:
2266         btrfs_free_path(path);
2267         return ret;
2268 }
2269
2270 /*
2271  * for a tree node or leaf, we record its reference count, so later if we still
2272  * process this node or leaf, don't need to compute its reference count again.
2273  *
2274  * @bytenr  if @bytenr == (u64)-1, only update nrefs->full_backref[level]
2275  */
2276 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2277                              struct extent_buffer *eb, struct node_refs *nrefs,
2278                              u64 level, int check_all)
2279 {
2280         struct ulist *roots;
2281         u64 refs = 0;
2282         u64 flags = 0;
2283         int root_level = btrfs_header_level(root->node);
2284         int check;
2285         int ret;
2286
2287         if (nrefs->bytenr[level] == bytenr)
2288                 return 0;
2289
2290         if (bytenr != (u64)-1) {
2291                 /* the return value of this function seems a mistake */
2292                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2293                                        level, 1, &refs, &flags);
2294                 /* temporary fix */
2295                 if (ret < 0 && !check_all)
2296                         return ret;
2297
2298                 nrefs->bytenr[level] = bytenr;
2299                 nrefs->refs[level] = refs;
2300                 nrefs->full_backref[level] = 0;
2301                 nrefs->checked[level] = 0;
2302
2303                 if (refs > 1) {
2304                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2305                                                    0, &roots);
2306                         if (ret)
2307                                 return -EIO;
2308
2309                         check = need_check(root, roots);
2310                         ulist_free(roots);
2311                         nrefs->need_check[level] = check;
2312                 } else {
2313                         if (!check_all) {
2314                                 nrefs->need_check[level] = 1;
2315                         } else {
2316                                 if (level == root_level) {
2317                                         nrefs->need_check[level] = 1;
2318                                 } else {
2319                                         /*
2320                                          * The node refs may have not been
2321                                          * updated if upper needs checking (the
2322                                          * lowest root_objectid) the node can
2323                                          * be checked.
2324                                          */
2325                                         nrefs->need_check[level] =
2326                                                 nrefs->need_check[level + 1];
2327                                 }
2328                         }
2329                 }
2330         }
2331
2332         if (check_all && eb) {
2333                 calc_extent_flag_v2(root, eb, &flags);
2334                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2335                         nrefs->full_backref[level] = 1;
2336         }
2337
2338         return 0;
2339 }
2340
2341 /*
2342  * @level           if @level == -1 means extent data item
2343  *                  else normal treeblocl.
2344  */
2345 static int should_check_extent_strictly(struct btrfs_root *root,
2346                                         struct node_refs *nrefs, int level)
2347 {
2348         int root_level = btrfs_header_level(root->node);
2349
2350         if (level > root_level || level < -1)
2351                 return 1;
2352         if (level == root_level)
2353                 return 1;
2354         /*
2355          * if the upper node is marked full backref, it should contain shared
2356          * backref of the parent (except owner == root->objectid).
2357          */
2358         while (++level <= root_level)
2359                 if (nrefs->refs[level] > 1)
2360                         return 0;
2361
2362         return 1;
2363 }
2364
2365 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2366                           struct walk_control *wc, int *level,
2367                           struct node_refs *nrefs)
2368 {
2369         enum btrfs_tree_block_status status;
2370         u64 bytenr;
2371         u64 ptr_gen;
2372         struct btrfs_fs_info *fs_info = root->fs_info;
2373         struct extent_buffer *next;
2374         struct extent_buffer *cur;
2375         int ret, err = 0;
2376         u64 refs;
2377
2378         WARN_ON(*level < 0);
2379         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2380
2381         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2382                 refs = nrefs->refs[*level];
2383                 ret = 0;
2384         } else {
2385                 ret = btrfs_lookup_extent_info(NULL, root,
2386                                        path->nodes[*level]->start,
2387                                        *level, 1, &refs, NULL);
2388                 if (ret < 0) {
2389                         err = ret;
2390                         goto out;
2391                 }
2392                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2393                 nrefs->refs[*level] = refs;
2394         }
2395
2396         if (refs > 1) {
2397                 ret = enter_shared_node(root, path->nodes[*level]->start,
2398                                         refs, wc, *level);
2399                 if (ret > 0) {
2400                         err = ret;
2401                         goto out;
2402                 }
2403         }
2404
2405         while (*level >= 0) {
2406                 WARN_ON(*level < 0);
2407                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2408                 cur = path->nodes[*level];
2409
2410                 if (btrfs_header_level(cur) != *level)
2411                         WARN_ON(1);
2412
2413                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2414                         break;
2415                 if (*level == 0) {
2416                         ret = process_one_leaf(root, cur, wc);
2417                         if (ret < 0)
2418                                 err = ret;
2419                         break;
2420                 }
2421                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2422                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2423
2424                 if (bytenr == nrefs->bytenr[*level - 1]) {
2425                         refs = nrefs->refs[*level - 1];
2426                 } else {
2427                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2428                                         *level - 1, 1, &refs, NULL);
2429                         if (ret < 0) {
2430                                 refs = 0;
2431                         } else {
2432                                 nrefs->bytenr[*level - 1] = bytenr;
2433                                 nrefs->refs[*level - 1] = refs;
2434                         }
2435                 }
2436
2437                 if (refs > 1) {
2438                         ret = enter_shared_node(root, bytenr, refs,
2439                                                 wc, *level - 1);
2440                         if (ret > 0) {
2441                                 path->slots[*level]++;
2442                                 continue;
2443                         }
2444                 }
2445
2446                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2447                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2448                         free_extent_buffer(next);
2449                         reada_walk_down(root, cur, path->slots[*level]);
2450                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2451                         if (!extent_buffer_uptodate(next)) {
2452                                 struct btrfs_key node_key;
2453
2454                                 btrfs_node_key_to_cpu(path->nodes[*level],
2455                                                       &node_key,
2456                                                       path->slots[*level]);
2457                                 btrfs_add_corrupt_extent_record(root->fs_info,
2458                                                 &node_key,
2459                                                 path->nodes[*level]->start,
2460                                                 root->fs_info->nodesize,
2461                                                 *level);
2462                                 err = -EIO;
2463                                 goto out;
2464                         }
2465                 }
2466
2467                 ret = check_child_node(cur, path->slots[*level], next);
2468                 if (ret) {
2469                         free_extent_buffer(next);
2470                         err = ret;
2471                         goto out;
2472                 }
2473
2474                 if (btrfs_is_leaf(next))
2475                         status = btrfs_check_leaf(root, NULL, next);
2476                 else
2477                         status = btrfs_check_node(root, NULL, next);
2478                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2479                         free_extent_buffer(next);
2480                         err = -EIO;
2481                         goto out;
2482                 }
2483
2484                 *level = *level - 1;
2485                 free_extent_buffer(path->nodes[*level]);
2486                 path->nodes[*level] = next;
2487                 path->slots[*level] = 0;
2488         }
2489 out:
2490         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2491         return err;
2492 }
2493
2494 static int fs_root_objectid(u64 objectid);
2495
2496 /*
2497  * Update global fs information.
2498  */
2499 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2500                          int level)
2501 {
2502         u32 free_nrs;
2503         struct extent_buffer *eb = path->nodes[level];
2504
2505         total_btree_bytes += eb->len;
2506         if (fs_root_objectid(root->objectid))
2507                 total_fs_tree_bytes += eb->len;
2508         if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2509                 total_extent_tree_bytes += eb->len;
2510
2511         if (level == 0) {
2512                 btree_space_waste += btrfs_leaf_free_space(root, eb);
2513         } else {
2514                 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root) -
2515                             btrfs_header_nritems(eb));
2516                 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2517         }
2518 }
2519
2520 /*
2521  * This function only handles BACKREF_MISSING,
2522  * If corresponding extent item exists, increase the ref, else insert an extent
2523  * item and backref.
2524  *
2525  * Returns error bits after repair.
2526  */
2527 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2528                                  struct btrfs_root *root,
2529                                  struct extent_buffer *node,
2530                                  struct node_refs *nrefs, int level, int err)
2531 {
2532         struct btrfs_fs_info *fs_info = root->fs_info;
2533         struct btrfs_root *extent_root = fs_info->extent_root;
2534         struct btrfs_path path;
2535         struct btrfs_extent_item *ei;
2536         struct btrfs_tree_block_info *bi;
2537         struct btrfs_key key;
2538         struct extent_buffer *eb;
2539         u32 size = sizeof(*ei);
2540         u32 node_size = root->fs_info->nodesize;
2541         int insert_extent = 0;
2542         int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2543         int root_level = btrfs_header_level(root->node);
2544         int generation;
2545         int ret;
2546         u64 owner;
2547         u64 bytenr;
2548         u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2549         u64 parent = 0;
2550
2551         if ((err & BACKREF_MISSING) == 0)
2552                 return err;
2553
2554         WARN_ON(level > BTRFS_MAX_LEVEL);
2555         WARN_ON(level < 0);
2556
2557         btrfs_init_path(&path);
2558         bytenr = btrfs_header_bytenr(node);
2559         owner = btrfs_header_owner(node);
2560         generation = btrfs_header_generation(node);
2561
2562         key.objectid = bytenr;
2563         key.type = (u8)-1;
2564         key.offset = (u64)-1;
2565
2566         /* Search for the extent item */
2567         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2568         if (ret <= 0) {
2569                 ret = -EIO;
2570                 goto out;
2571         }
2572
2573         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2574         if (ret)
2575                 insert_extent = 1;
2576
2577         /* calculate if the extent item flag is full backref or not */
2578         if (nrefs->full_backref[level] != 0)
2579                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2580
2581         /* insert an extent item */
2582         if (insert_extent) {
2583                 struct btrfs_disk_key copy_key;
2584
2585                 generation = btrfs_header_generation(node);
2586
2587                 if (level < root_level && nrefs->full_backref[level + 1] &&
2588                     owner != root->objectid) {
2589                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2590                 }
2591
2592                 key.objectid = bytenr;
2593                 if (!skinny_metadata) {
2594                         key.type = BTRFS_EXTENT_ITEM_KEY;
2595                         key.offset = node_size;
2596                         size += sizeof(*bi);
2597                 } else {
2598                         key.type = BTRFS_METADATA_ITEM_KEY;
2599                         key.offset = level;
2600                 }
2601
2602                 btrfs_release_path(&path);
2603                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2604                                               size);
2605                 if (ret)
2606                         goto out;
2607
2608                 eb = path.nodes[0];
2609                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2610
2611                 btrfs_set_extent_refs(eb, ei, 0);
2612                 btrfs_set_extent_generation(eb, ei, generation);
2613                 btrfs_set_extent_flags(eb, ei, flags);
2614
2615                 if (!skinny_metadata) {
2616                         bi = (struct btrfs_tree_block_info *)(ei + 1);
2617                         memset_extent_buffer(eb, 0, (unsigned long)bi,
2618                                              sizeof(*bi));
2619                         btrfs_set_disk_key_objectid(&copy_key, root->objectid);
2620                         btrfs_set_disk_key_type(&copy_key, 0);
2621                         btrfs_set_disk_key_offset(&copy_key, 0);
2622
2623                         btrfs_set_tree_block_level(eb, bi, level);
2624                         btrfs_set_tree_block_key(eb, bi, &copy_key);
2625                 }
2626                 btrfs_mark_buffer_dirty(eb);
2627                 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2628                 btrfs_update_block_group(trans, extent_root, bytenr, node_size,
2629                                          1, 0);
2630
2631                 nrefs->refs[level] = 0;
2632                 nrefs->full_backref[level] =
2633                         flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2634                 btrfs_release_path(&path);
2635         }
2636
2637         if (level < root_level && nrefs->full_backref[level + 1] &&
2638             owner != root->objectid)
2639                 parent = nrefs->bytenr[level + 1];
2640
2641         /* increase the ref */
2642         ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2643                         parent, root->objectid, level, 0);
2644
2645         nrefs->refs[level]++;
2646 out:
2647         btrfs_release_path(&path);
2648         if (ret) {
2649                 error(
2650         "failed to repair tree block ref start %llu root %llu due to %s",
2651                       bytenr, root->objectid, strerror(-ret));
2652         } else {
2653                 printf("Added one tree block ref start %llu %s %llu\n",
2654                        bytenr, parent ? "parent" : "root",
2655                        parent ? parent : root->objectid);
2656                 err &= ~BACKREF_MISSING;
2657         }
2658
2659         return err;
2660 }
2661
2662 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2663                             unsigned int ext_ref);
2664 static int check_tree_block_ref(struct btrfs_root *root,
2665                                 struct extent_buffer *eb, u64 bytenr,
2666                                 int level, u64 owner, struct node_refs *nrefs);
2667 static int check_leaf_items(struct btrfs_trans_handle *trans,
2668                             struct btrfs_root *root, struct btrfs_path *path,
2669                             struct node_refs *nrefs, int account_bytes);
2670
2671 /*
2672  * @trans      just for lowmem repair mode
2673  * @check all  if not 0 then check all tree block backrefs and items
2674  *             0 then just check relationship of items in fs tree(s)
2675  *
2676  * Returns >0  Found error, should continue
2677  * Returns <0  Fatal error, must exit the whole check
2678  * Returns 0   No errors found
2679  */
2680 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2681                              struct btrfs_root *root, struct btrfs_path *path,
2682                              int *level, struct node_refs *nrefs, int ext_ref,
2683                              int check_all)
2684
2685 {
2686         enum btrfs_tree_block_status status;
2687         u64 bytenr;
2688         u64 ptr_gen;
2689         struct btrfs_fs_info *fs_info = root->fs_info;
2690         struct extent_buffer *next;
2691         struct extent_buffer *cur;
2692         int ret;
2693         int err = 0;
2694         int check;
2695         int account_file_data = 0;
2696
2697         WARN_ON(*level < 0);
2698         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2699
2700         ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2701                                 path->nodes[*level], nrefs, *level, check_all);
2702         if (ret < 0)
2703                 return ret;
2704
2705         while (*level >= 0) {
2706                 WARN_ON(*level < 0);
2707                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2708                 cur = path->nodes[*level];
2709                 bytenr = btrfs_header_bytenr(cur);
2710                 check = nrefs->need_check[*level];
2711
2712                 if (btrfs_header_level(cur) != *level)
2713                         WARN_ON(1);
2714                /*
2715                 * Update bytes accounting and check tree block ref
2716                 * NOTE: Doing accounting and check before checking nritems
2717                 * is necessary because of empty node/leaf.
2718                 */
2719                 if ((check_all && !nrefs->checked[*level]) ||
2720                     (!check_all && nrefs->need_check[*level])) {
2721                         ret = check_tree_block_ref(root, cur,
2722                            btrfs_header_bytenr(cur), btrfs_header_level(cur),
2723                            btrfs_header_owner(cur), nrefs);
2724
2725                         if (repair && ret)
2726                                 ret = repair_tree_block_ref(trans, root,
2727                                     path->nodes[*level], nrefs, *level, ret);
2728                         err |= ret;
2729
2730                         if (check_all && nrefs->need_check[*level] &&
2731                                 nrefs->refs[*level]) {
2732                                 account_bytes(root, path, *level);
2733                                 account_file_data = 1;
2734                         }
2735                         nrefs->checked[*level] = 1;
2736                 }
2737
2738                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2739                         break;
2740
2741                 /* Don't forgot to check leaf/node validation */
2742                 if (*level == 0) {
2743                         /* skip duplicate check */
2744                         if (check || !check_all) {
2745                                 ret = btrfs_check_leaf(root, NULL, cur);
2746                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2747                                         err |= -EIO;
2748                                         break;
2749                                 }
2750                         }
2751
2752                         ret = 0;
2753                         if (!check_all)
2754                                 ret = process_one_leaf_v2(root, path, nrefs,
2755                                                           level, ext_ref);
2756                         else
2757                                 ret = check_leaf_items(trans, root, path,
2758                                                nrefs, account_file_data);
2759                         err |= ret;
2760                         break;
2761                 } else {
2762                         if (check || !check_all) {
2763                                 ret = btrfs_check_node(root, NULL, cur);
2764                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2765                                         err |= -EIO;
2766                                         break;
2767                                 }
2768                         }
2769                 }
2770
2771                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2772                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2773
2774                 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2775                                         check_all);
2776                 if (ret < 0)
2777                         break;
2778                 /*
2779                  * check all trees in check_chunks_and_extent_v2
2780                  * check shared node once in check_fs_roots
2781                  */
2782                 if (!check_all && !nrefs->need_check[*level - 1]) {
2783                         path->slots[*level]++;
2784                         continue;
2785                 }
2786
2787                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2788                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2789                         free_extent_buffer(next);
2790                         reada_walk_down(root, cur, path->slots[*level]);
2791                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2792                         if (!extent_buffer_uptodate(next)) {
2793                                 struct btrfs_key node_key;
2794
2795                                 btrfs_node_key_to_cpu(path->nodes[*level],
2796                                                       &node_key,
2797                                                       path->slots[*level]);
2798                                 btrfs_add_corrupt_extent_record(fs_info,
2799                                         &node_key, path->nodes[*level]->start,
2800                                         fs_info->nodesize, *level);
2801                                 err |= -EIO;
2802                                 break;
2803                         }
2804                 }
2805
2806                 ret = check_child_node(cur, path->slots[*level], next);
2807                 err |= ret;
2808                 if (ret < 0) 
2809                         break;
2810
2811                 if (btrfs_is_leaf(next))
2812                         status = btrfs_check_leaf(root, NULL, next);
2813                 else
2814                         status = btrfs_check_node(root, NULL, next);
2815                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2816                         free_extent_buffer(next);
2817                         err |= -EIO;
2818                         break;
2819                 }
2820
2821                 *level = *level - 1;
2822                 free_extent_buffer(path->nodes[*level]);
2823                 path->nodes[*level] = next;
2824                 path->slots[*level] = 0;
2825                 account_file_data = 0;
2826
2827                 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2828         }
2829         return err;
2830 }
2831
2832 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2833                         struct walk_control *wc, int *level)
2834 {
2835         int i;
2836         struct extent_buffer *leaf;
2837
2838         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2839                 leaf = path->nodes[i];
2840                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2841                         path->slots[i]++;
2842                         *level = i;
2843                         return 0;
2844                 } else {
2845                         free_extent_buffer(path->nodes[*level]);
2846                         path->nodes[*level] = NULL;
2847                         BUG_ON(*level > wc->active_node);
2848                         if (*level == wc->active_node)
2849                                 leave_shared_node(root, wc, *level);
2850                         *level = i + 1;
2851                 }
2852         }
2853         return 1;
2854 }
2855
2856 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2857                            int *level)
2858 {
2859         int i;
2860         struct extent_buffer *leaf;
2861
2862         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2863                 leaf = path->nodes[i];
2864                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2865                         path->slots[i]++;
2866                         *level = i;
2867                         return 0;
2868                 } else {
2869                         free_extent_buffer(path->nodes[*level]);
2870                         path->nodes[*level] = NULL;
2871                         *level = i + 1;
2872                 }
2873         }
2874         return 1;
2875 }
2876
2877 static int check_root_dir(struct inode_record *rec)
2878 {
2879         struct inode_backref *backref;
2880         int ret = -1;
2881
2882         if (!rec->found_inode_item || rec->errors)
2883                 goto out;
2884         if (rec->nlink != 1 || rec->found_link != 0)
2885                 goto out;
2886         if (list_empty(&rec->backrefs))
2887                 goto out;
2888         backref = to_inode_backref(rec->backrefs.next);
2889         if (!backref->found_inode_ref)
2890                 goto out;
2891         if (backref->index != 0 || backref->namelen != 2 ||
2892             memcmp(backref->name, "..", 2))
2893                 goto out;
2894         if (backref->found_dir_index || backref->found_dir_item)
2895                 goto out;
2896         ret = 0;
2897 out:
2898         return ret;
2899 }
2900
2901 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2902                               struct btrfs_root *root, struct btrfs_path *path,
2903                               struct inode_record *rec)
2904 {
2905         struct btrfs_inode_item *ei;
2906         struct btrfs_key key;
2907         int ret;
2908
2909         key.objectid = rec->ino;
2910         key.type = BTRFS_INODE_ITEM_KEY;
2911         key.offset = (u64)-1;
2912
2913         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2914         if (ret < 0)
2915                 goto out;
2916         if (ret) {
2917                 if (!path->slots[0]) {
2918                         ret = -ENOENT;
2919                         goto out;
2920                 }
2921                 path->slots[0]--;
2922                 ret = 0;
2923         }
2924         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2925         if (key.objectid != rec->ino) {
2926                 ret = -ENOENT;
2927                 goto out;
2928         }
2929
2930         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2931                             struct btrfs_inode_item);
2932         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2933         btrfs_mark_buffer_dirty(path->nodes[0]);
2934         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2935         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2936                root->root_key.objectid);
2937 out:
2938         btrfs_release_path(path);
2939         return ret;
2940 }
2941
2942 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2943                                     struct btrfs_root *root,
2944                                     struct btrfs_path *path,
2945                                     struct inode_record *rec)
2946 {
2947         int ret;
2948
2949         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2950         btrfs_release_path(path);
2951         if (!ret)
2952                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2953         return ret;
2954 }
2955
2956 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2957                                struct btrfs_root *root,
2958                                struct btrfs_path *path,
2959                                struct inode_record *rec)
2960 {
2961         struct btrfs_inode_item *ei;
2962         struct btrfs_key key;
2963         int ret = 0;
2964
2965         key.objectid = rec->ino;
2966         key.type = BTRFS_INODE_ITEM_KEY;
2967         key.offset = 0;
2968
2969         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2970         if (ret) {
2971                 if (ret > 0)
2972                         ret = -ENOENT;
2973                 goto out;
2974         }
2975
2976         /* Since ret == 0, no need to check anything */
2977         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2978                             struct btrfs_inode_item);
2979         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2980         btrfs_mark_buffer_dirty(path->nodes[0]);
2981         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2982         printf("reset nbytes for ino %llu root %llu\n",
2983                rec->ino, root->root_key.objectid);
2984 out:
2985         btrfs_release_path(path);
2986         return ret;
2987 }
2988
2989 static int add_missing_dir_index(struct btrfs_root *root,
2990                                  struct cache_tree *inode_cache,
2991                                  struct inode_record *rec,
2992                                  struct inode_backref *backref)
2993 {
2994         struct btrfs_path path;
2995         struct btrfs_trans_handle *trans;
2996         struct btrfs_dir_item *dir_item;
2997         struct extent_buffer *leaf;
2998         struct btrfs_key key;
2999         struct btrfs_disk_key disk_key;
3000         struct inode_record *dir_rec;
3001         unsigned long name_ptr;
3002         u32 data_size = sizeof(*dir_item) + backref->namelen;
3003         int ret;
3004
3005         trans = btrfs_start_transaction(root, 1);
3006         if (IS_ERR(trans))
3007                 return PTR_ERR(trans);
3008
3009         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
3010                 (unsigned long long)rec->ino);
3011
3012         btrfs_init_path(&path);
3013         key.objectid = backref->dir;
3014         key.type = BTRFS_DIR_INDEX_KEY;
3015         key.offset = backref->index;
3016         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
3017         BUG_ON(ret);
3018
3019         leaf = path.nodes[0];
3020         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
3021
3022         disk_key.objectid = cpu_to_le64(rec->ino);
3023         disk_key.type = BTRFS_INODE_ITEM_KEY;
3024         disk_key.offset = 0;
3025
3026         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
3027         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
3028         btrfs_set_dir_data_len(leaf, dir_item, 0);
3029         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
3030         name_ptr = (unsigned long)(dir_item + 1);
3031         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
3032         btrfs_mark_buffer_dirty(leaf);
3033         btrfs_release_path(&path);
3034         btrfs_commit_transaction(trans, root);
3035
3036         backref->found_dir_index = 1;
3037         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
3038         BUG_ON(IS_ERR(dir_rec));
3039         if (!dir_rec)
3040                 return 0;
3041         dir_rec->found_size += backref->namelen;
3042         if (dir_rec->found_size == dir_rec->isize &&
3043             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
3044                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
3045         if (dir_rec->found_size != dir_rec->isize)
3046                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
3047
3048         return 0;
3049 }
3050
3051 static int delete_dir_index(struct btrfs_root *root,
3052                             struct inode_backref *backref)
3053 {
3054         struct btrfs_trans_handle *trans;
3055         struct btrfs_dir_item *di;
3056         struct btrfs_path path;
3057         int ret = 0;
3058
3059         trans = btrfs_start_transaction(root, 1);
3060         if (IS_ERR(trans))
3061                 return PTR_ERR(trans);
3062
3063         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
3064                 (unsigned long long)backref->dir,
3065                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
3066                 (unsigned long long)root->objectid);
3067
3068         btrfs_init_path(&path);
3069         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
3070                                     backref->name, backref->namelen,
3071                                     backref->index, -1);
3072         if (IS_ERR(di)) {
3073                 ret = PTR_ERR(di);
3074                 btrfs_release_path(&path);
3075                 btrfs_commit_transaction(trans, root);
3076                 if (ret == -ENOENT)
3077                         return 0;
3078                 return ret;
3079         }
3080
3081         if (!di)
3082                 ret = btrfs_del_item(trans, root, &path);
3083         else
3084                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
3085         BUG_ON(ret);
3086         btrfs_release_path(&path);
3087         btrfs_commit_transaction(trans, root);
3088         return ret;
3089 }
3090
3091 static int __create_inode_item(struct btrfs_trans_handle *trans,
3092                                struct btrfs_root *root, u64 ino, u64 size,
3093                                u64 nbytes, u64 nlink, u32 mode)
3094 {
3095         struct btrfs_inode_item ii;
3096         time_t now = time(NULL);
3097         int ret;
3098
3099         btrfs_set_stack_inode_size(&ii, size);
3100         btrfs_set_stack_inode_nbytes(&ii, nbytes);
3101         btrfs_set_stack_inode_nlink(&ii, nlink);
3102         btrfs_set_stack_inode_mode(&ii, mode);
3103         btrfs_set_stack_inode_generation(&ii, trans->transid);
3104         btrfs_set_stack_timespec_nsec(&ii.atime, 0);
3105         btrfs_set_stack_timespec_sec(&ii.ctime, now);
3106         btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
3107         btrfs_set_stack_timespec_sec(&ii.mtime, now);
3108         btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
3109         btrfs_set_stack_timespec_sec(&ii.otime, 0);
3110         btrfs_set_stack_timespec_nsec(&ii.otime, 0);
3111
3112         ret = btrfs_insert_inode(trans, root, ino, &ii);
3113         ASSERT(!ret);
3114
3115         warning("root %llu inode %llu recreating inode item, this may "
3116                 "be incomplete, please check permissions and content after "
3117                 "the fsck completes.\n", (unsigned long long)root->objectid,
3118                 (unsigned long long)ino);
3119
3120         return 0;
3121 }
3122
3123 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
3124                                     struct btrfs_root *root, u64 ino,
3125                                     u8 filetype)
3126 {
3127         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
3128
3129         return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
3130 }
3131
3132 static int create_inode_item(struct btrfs_root *root,
3133                              struct inode_record *rec, int root_dir)
3134 {
3135         struct btrfs_trans_handle *trans;
3136         u64 nlink = 0;
3137         u32 mode = 0;
3138         u64 size = 0;
3139         int ret;
3140
3141         trans = btrfs_start_transaction(root, 1);
3142         if (IS_ERR(trans)) {
3143                 ret = PTR_ERR(trans);
3144                 return ret;
3145         }
3146
3147         nlink = root_dir ? 1 : rec->found_link;
3148         if (rec->found_dir_item) {
3149                 if (rec->found_file_extent)
3150                         fprintf(stderr, "root %llu inode %llu has both a dir "
3151                                 "item and extents, unsure if it is a dir or a "
3152                                 "regular file so setting it as a directory\n",
3153                                 (unsigned long long)root->objectid,
3154                                 (unsigned long long)rec->ino);
3155                 mode = S_IFDIR | 0755;
3156                 size = rec->found_size;
3157         } else if (!rec->found_dir_item) {
3158                 size = rec->extent_end;
3159                 mode =  S_IFREG | 0755;
3160         }
3161
3162         ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
3163                                   nlink, mode);
3164         btrfs_commit_transaction(trans, root);
3165         return 0;
3166 }
3167
3168 static int repair_inode_backrefs(struct btrfs_root *root,
3169                                  struct inode_record *rec,
3170                                  struct cache_tree *inode_cache,
3171                                  int delete)
3172 {
3173         struct inode_backref *tmp, *backref;
3174         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3175         int ret = 0;
3176         int repaired = 0;
3177
3178         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3179                 if (!delete && rec->ino == root_dirid) {
3180                         if (!rec->found_inode_item) {
3181                                 ret = create_inode_item(root, rec, 1);
3182                                 if (ret)
3183                                         break;
3184                                 repaired++;
3185                         }
3186                 }
3187
3188                 /* Index 0 for root dir's are special, don't mess with it */
3189                 if (rec->ino == root_dirid && backref->index == 0)
3190                         continue;
3191
3192                 if (delete &&
3193                     ((backref->found_dir_index && !backref->found_inode_ref) ||
3194                      (backref->found_dir_index && backref->found_inode_ref &&
3195                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
3196                         ret = delete_dir_index(root, backref);
3197                         if (ret)
3198                                 break;
3199                         repaired++;
3200                         list_del(&backref->list);
3201                         free(backref);
3202                         continue;
3203                 }
3204
3205                 if (!delete && !backref->found_dir_index &&
3206                     backref->found_dir_item && backref->found_inode_ref) {
3207                         ret = add_missing_dir_index(root, inode_cache, rec,
3208                                                     backref);
3209                         if (ret)
3210                                 break;
3211                         repaired++;
3212                         if (backref->found_dir_item &&
3213                             backref->found_dir_index) {
3214                                 if (!backref->errors &&
3215                                     backref->found_inode_ref) {
3216                                         list_del(&backref->list);
3217                                         free(backref);
3218                                         continue;
3219                                 }
3220                         }
3221                 }
3222
3223                 if (!delete && (!backref->found_dir_index &&
3224                                 !backref->found_dir_item &&
3225                                 backref->found_inode_ref)) {
3226                         struct btrfs_trans_handle *trans;
3227                         struct btrfs_key location;
3228
3229                         ret = check_dir_conflict(root, backref->name,
3230                                                  backref->namelen,
3231                                                  backref->dir,
3232                                                  backref->index);
3233                         if (ret) {
3234                                 /*
3235                                  * let nlink fixing routine to handle it,
3236                                  * which can do it better.
3237                                  */
3238                                 ret = 0;
3239                                 break;
3240                         }
3241                         location.objectid = rec->ino;
3242                         location.type = BTRFS_INODE_ITEM_KEY;
3243                         location.offset = 0;
3244
3245                         trans = btrfs_start_transaction(root, 1);
3246                         if (IS_ERR(trans)) {
3247                                 ret = PTR_ERR(trans);
3248                                 break;
3249                         }
3250                         fprintf(stderr, "adding missing dir index/item pair "
3251                                 "for inode %llu\n",
3252                                 (unsigned long long)rec->ino);
3253                         ret = btrfs_insert_dir_item(trans, root, backref->name,
3254                                                     backref->namelen,
3255                                                     backref->dir, &location,
3256                                                     imode_to_type(rec->imode),
3257                                                     backref->index);
3258                         BUG_ON(ret);
3259                         btrfs_commit_transaction(trans, root);
3260                         repaired++;
3261                 }
3262
3263                 if (!delete && (backref->found_inode_ref &&
3264                                 backref->found_dir_index &&
3265                                 backref->found_dir_item &&
3266                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
3267                                 !rec->found_inode_item)) {
3268                         ret = create_inode_item(root, rec, 0);
3269                         if (ret)
3270                                 break;
3271                         repaired++;
3272                 }
3273
3274         }
3275         return ret ? ret : repaired;
3276 }
3277
3278 /*
3279  * To determine the file type for nlink/inode_item repair
3280  *
3281  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
3282  * Return -ENOENT if file type is not found.
3283  */
3284 static int find_file_type(struct inode_record *rec, u8 *type)
3285 {
3286         struct inode_backref *backref;
3287
3288         /* For inode item recovered case */
3289         if (rec->found_inode_item) {
3290                 *type = imode_to_type(rec->imode);
3291                 return 0;
3292         }
3293
3294         list_for_each_entry(backref, &rec->backrefs, list) {
3295                 if (backref->found_dir_index || backref->found_dir_item) {
3296                         *type = backref->filetype;
3297                         return 0;
3298                 }
3299         }
3300         return -ENOENT;
3301 }
3302
3303 /*
3304  * To determine the file name for nlink repair
3305  *
3306  * Return 0 if file name is found, set name and namelen.
3307  * Return -ENOENT if file name is not found.
3308  */
3309 static int find_file_name(struct inode_record *rec,
3310                           char *name, int *namelen)
3311 {
3312         struct inode_backref *backref;
3313
3314         list_for_each_entry(backref, &rec->backrefs, list) {
3315                 if (backref->found_dir_index || backref->found_dir_item ||
3316                     backref->found_inode_ref) {
3317                         memcpy(name, backref->name, backref->namelen);
3318                         *namelen = backref->namelen;
3319                         return 0;
3320                 }
3321         }
3322         return -ENOENT;
3323 }
3324
3325 /* Reset the nlink of the inode to the correct one */
3326 static int reset_nlink(struct btrfs_trans_handle *trans,
3327                        struct btrfs_root *root,
3328                        struct btrfs_path *path,
3329                        struct inode_record *rec)
3330 {
3331         struct inode_backref *backref;
3332         struct inode_backref *tmp;
3333         struct btrfs_key key;
3334         struct btrfs_inode_item *inode_item;
3335         int ret = 0;
3336
3337         /* We don't believe this either, reset it and iterate backref */
3338         rec->found_link = 0;
3339
3340         /* Remove all backref including the valid ones */
3341         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3342                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3343                                    backref->index, backref->name,
3344                                    backref->namelen, 0);
3345                 if (ret < 0)
3346                         goto out;
3347
3348                 /* remove invalid backref, so it won't be added back */
3349                 if (!(backref->found_dir_index &&
3350                       backref->found_dir_item &&
3351                       backref->found_inode_ref)) {
3352                         list_del(&backref->list);
3353                         free(backref);
3354                 } else {
3355                         rec->found_link++;
3356                 }
3357         }
3358
3359         /* Set nlink to 0 */
3360         key.objectid = rec->ino;
3361         key.type = BTRFS_INODE_ITEM_KEY;
3362         key.offset = 0;
3363         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3364         if (ret < 0)
3365                 goto out;
3366         if (ret > 0) {
3367                 ret = -ENOENT;
3368                 goto out;
3369         }
3370         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3371                                     struct btrfs_inode_item);
3372         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3373         btrfs_mark_buffer_dirty(path->nodes[0]);
3374         btrfs_release_path(path);
3375
3376         /*
3377          * Add back valid inode_ref/dir_item/dir_index,
3378          * add_link() will handle the nlink inc, so new nlink must be correct
3379          */
3380         list_for_each_entry(backref, &rec->backrefs, list) {
3381                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3382                                      backref->name, backref->namelen,
3383                                      backref->filetype, &backref->index, 1, 0);
3384                 if (ret < 0)
3385                         goto out;
3386         }
3387 out:
3388         btrfs_release_path(path);
3389         return ret;
3390 }
3391
3392 static int get_highest_inode(struct btrfs_trans_handle *trans,
3393                                 struct btrfs_root *root,
3394                                 struct btrfs_path *path,
3395                                 u64 *highest_ino)
3396 {
3397         struct btrfs_key key, found_key;
3398         int ret;
3399
3400         btrfs_init_path(path);
3401         key.objectid = BTRFS_LAST_FREE_OBJECTID;
3402         key.offset = -1;
3403         key.type = BTRFS_INODE_ITEM_KEY;
3404         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3405         if (ret == 1) {
3406                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3407                                 path->slots[0] - 1);
3408                 *highest_ino = found_key.objectid;
3409                 ret = 0;
3410         }
3411         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3412                 ret = -EOVERFLOW;
3413         btrfs_release_path(path);
3414         return ret;
3415 }
3416
3417 /*
3418  * Link inode to dir 'lost+found'. Increase @ref_count.
3419  *
3420  * Returns 0 means success.
3421  * Returns <0 means failure.
3422  */
3423 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3424                                    struct btrfs_root *root,
3425                                    struct btrfs_path *path,
3426                                    u64 ino, char *namebuf, u32 name_len,
3427                                    u8 filetype, u64 *ref_count)
3428 {
3429         char *dir_name = "lost+found";
3430         u64 lost_found_ino;
3431         int ret;
3432         u32 mode = 0700;
3433
3434         btrfs_release_path(path);
3435         ret = get_highest_inode(trans, root, path, &lost_found_ino);
3436         if (ret < 0)
3437                 goto out;
3438         lost_found_ino++;
3439
3440         ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3441                           BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3442                           mode);
3443         if (ret < 0) {
3444                 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3445                 goto out;
3446         }
3447         ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3448                              namebuf, name_len, filetype, NULL, 1, 0);
3449         /*
3450          * Add ".INO" suffix several times to handle case where
3451          * "FILENAME.INO" is already taken by another file.
3452          */
3453         while (ret == -EEXIST) {
3454                 /*
3455                  * Conflicting file name, add ".INO" as suffix * +1 for '.'
3456                  */
3457                 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3458                         ret = -EFBIG;
3459                         goto out;
3460                 }
3461                 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3462                          ".%llu", ino);
3463                 name_len += count_digits(ino) + 1;
3464                 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3465                                      name_len, filetype, NULL, 1, 0);
3466         }
3467         if (ret < 0) {
3468                 error("failed to link the inode %llu to %s dir: %s",
3469                       ino, dir_name, strerror(-ret));
3470                 goto out;
3471         }
3472
3473         ++*ref_count;
3474         printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3475                name_len, namebuf, dir_name);
3476 out:
3477         btrfs_release_path(path);
3478         if (ret)
3479                 error("failed to move file '%.*s' to '%s' dir", name_len,
3480                                 namebuf, dir_name);
3481         return ret;
3482 }
3483
3484 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3485                                struct btrfs_root *root,
3486                                struct btrfs_path *path,
3487                                struct inode_record *rec)
3488 {
3489         char namebuf[BTRFS_NAME_LEN] = {0};
3490         u8 type = 0;
3491         int namelen = 0;
3492         int name_recovered = 0;
3493         int type_recovered = 0;
3494         int ret = 0;
3495
3496         /*
3497          * Get file name and type first before these invalid inode ref
3498          * are deleted by remove_all_invalid_backref()
3499          */
3500         name_recovered = !find_file_name(rec, namebuf, &namelen);
3501         type_recovered = !find_file_type(rec, &type);
3502
3503         if (!name_recovered) {
3504                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3505                        rec->ino, rec->ino);
3506                 namelen = count_digits(rec->ino);
3507                 sprintf(namebuf, "%llu", rec->ino);
3508                 name_recovered = 1;
3509         }
3510         if (!type_recovered) {
3511                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3512                        rec->ino);
3513                 type = BTRFS_FT_REG_FILE;
3514                 type_recovered = 1;
3515         }
3516
3517         ret = reset_nlink(trans, root, path, rec);
3518         if (ret < 0) {
3519                 fprintf(stderr,
3520                         "Failed to reset nlink for inode %llu: %s\n",
3521                         rec->ino, strerror(-ret));
3522                 goto out;
3523         }
3524
3525         if (rec->found_link == 0) {
3526                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3527                                               namebuf, namelen, type,
3528                                               (u64 *)&rec->found_link);
3529                 if (ret)
3530                         goto out;
3531         }
3532         printf("Fixed the nlink of inode %llu\n", rec->ino);
3533 out:
3534         /*
3535          * Clear the flag anyway, or we will loop forever for the same inode
3536          * as it will not be removed from the bad inode list and the dead loop
3537          * happens.
3538          */
3539         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3540         btrfs_release_path(path);
3541         return ret;
3542 }
3543
3544 /*
3545  * Check if there is any normal(reg or prealloc) file extent for given
3546  * ino.
3547  * This is used to determine the file type when neither its dir_index/item or
3548  * inode_item exists.
3549  *
3550  * This will *NOT* report error, if any error happens, just consider it does
3551  * not have any normal file extent.
3552  */
3553 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3554 {
3555         struct btrfs_path path;
3556         struct btrfs_key key;
3557         struct btrfs_key found_key;
3558         struct btrfs_file_extent_item *fi;
3559         u8 type;
3560         int ret = 0;
3561
3562         btrfs_init_path(&path);
3563         key.objectid = ino;
3564         key.type = BTRFS_EXTENT_DATA_KEY;
3565         key.offset = 0;
3566
3567         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3568         if (ret < 0) {
3569                 ret = 0;
3570                 goto out;
3571         }
3572         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3573                 ret = btrfs_next_leaf(root, &path);
3574                 if (ret) {
3575                         ret = 0;
3576                         goto out;
3577                 }
3578         }
3579         while (1) {
3580                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3581                                       path.slots[0]);
3582                 if (found_key.objectid != ino ||
3583                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3584                         break;
3585                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3586                                     struct btrfs_file_extent_item);
3587                 type = btrfs_file_extent_type(path.nodes[0], fi);
3588                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3589                         ret = 1;
3590                         goto out;
3591                 }
3592         }
3593 out:
3594         btrfs_release_path(&path);
3595         return ret;
3596 }
3597
3598 static u32 btrfs_type_to_imode(u8 type)
3599 {
3600         static u32 imode_by_btrfs_type[] = {
3601                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3602                 [BTRFS_FT_DIR]          = S_IFDIR,
3603                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3604                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3605                 [BTRFS_FT_FIFO]         = S_IFIFO,
3606                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3607                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3608         };
3609
3610         return imode_by_btrfs_type[(type)];
3611 }
3612
3613 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3614                                 struct btrfs_root *root,
3615                                 struct btrfs_path *path,
3616                                 struct inode_record *rec)
3617 {
3618         u8 filetype;
3619         u32 mode = 0700;
3620         int type_recovered = 0;
3621         int ret = 0;
3622
3623         printf("Trying to rebuild inode:%llu\n", rec->ino);
3624
3625         type_recovered = !find_file_type(rec, &filetype);
3626
3627         /*
3628          * Try to determine inode type if type not found.
3629          *
3630          * For found regular file extent, it must be FILE.
3631          * For found dir_item/index, it must be DIR.
3632          *
3633          * For undetermined one, use FILE as fallback.
3634          *
3635          * TODO:
3636          * 1. If found backref(inode_index/item is already handled) to it,
3637          *    it must be DIR.
3638          *    Need new inode-inode ref structure to allow search for that.
3639          */
3640         if (!type_recovered) {
3641                 if (rec->found_file_extent &&
3642                     find_normal_file_extent(root, rec->ino)) {
3643                         type_recovered = 1;
3644                         filetype = BTRFS_FT_REG_FILE;
3645                 } else if (rec->found_dir_item) {
3646                         type_recovered = 1;
3647                         filetype = BTRFS_FT_DIR;
3648                 } else if (!list_empty(&rec->orphan_extents)) {
3649                         type_recovered = 1;
3650                         filetype = BTRFS_FT_REG_FILE;
3651                 } else{
3652                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3653                                rec->ino);
3654                         type_recovered = 1;
3655                         filetype = BTRFS_FT_REG_FILE;
3656                 }
3657         }
3658
3659         ret = btrfs_new_inode(trans, root, rec->ino,
3660                               mode | btrfs_type_to_imode(filetype));
3661         if (ret < 0)
3662                 goto out;
3663
3664         /*
3665          * Here inode rebuild is done, we only rebuild the inode item,
3666          * don't repair the nlink(like move to lost+found).
3667          * That is the job of nlink repair.
3668          *
3669          * We just fill the record and return
3670          */
3671         rec->found_dir_item = 1;
3672         rec->imode = mode | btrfs_type_to_imode(filetype);
3673         rec->nlink = 0;
3674         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3675         /* Ensure the inode_nlinks repair function will be called */
3676         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3677 out:
3678         return ret;
3679 }
3680
3681 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3682                                       struct btrfs_root *root,
3683                                       struct btrfs_path *path,
3684                                       struct inode_record *rec)
3685 {
3686         struct orphan_data_extent *orphan;
3687         struct orphan_data_extent *tmp;
3688         int ret = 0;
3689
3690         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3691                 /*
3692                  * Check for conflicting file extents
3693                  *
3694                  * Here we don't know whether the extents is compressed or not,
3695                  * so we can only assume it not compressed nor data offset,
3696                  * and use its disk_len as extent length.
3697                  */
3698                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3699                                        orphan->offset, orphan->disk_len, 0);
3700                 btrfs_release_path(path);
3701                 if (ret < 0)
3702                         goto out;
3703                 if (!ret) {
3704                         fprintf(stderr,
3705                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3706                                 orphan->disk_bytenr, orphan->disk_len);
3707                         ret = btrfs_free_extent(trans,
3708                                         root->fs_info->extent_root,
3709                                         orphan->disk_bytenr, orphan->disk_len,
3710                                         0, root->objectid, orphan->objectid,
3711                                         orphan->offset);
3712                         if (ret < 0)
3713                                 goto out;
3714                 }
3715                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3716                                 orphan->offset, orphan->disk_bytenr,
3717                                 orphan->disk_len, orphan->disk_len);
3718                 if (ret < 0)
3719                         goto out;
3720
3721                 /* Update file size info */
3722                 rec->found_size += orphan->disk_len;
3723                 if (rec->found_size == rec->nbytes)
3724                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3725
3726                 /* Update the file extent hole info too */
3727                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3728                                            orphan->disk_len);
3729                 if (ret < 0)
3730                         goto out;
3731                 if (RB_EMPTY_ROOT(&rec->holes))
3732                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3733
3734                 list_del(&orphan->list);
3735                 free(orphan);
3736         }
3737         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3738 out:
3739         return ret;
3740 }
3741
3742 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3743                                         struct btrfs_root *root,
3744                                         struct btrfs_path *path,
3745                                         struct inode_record *rec)
3746 {
3747         struct rb_node *node;
3748         struct file_extent_hole *hole;
3749         int found = 0;
3750         int ret = 0;
3751
3752         node = rb_first(&rec->holes);
3753
3754         while (node) {
3755                 found = 1;
3756                 hole = rb_entry(node, struct file_extent_hole, node);
3757                 ret = btrfs_punch_hole(trans, root, rec->ino,
3758                                        hole->start, hole->len);
3759                 if (ret < 0)
3760                         goto out;
3761                 ret = del_file_extent_hole(&rec->holes, hole->start,
3762                                            hole->len);
3763                 if (ret < 0)
3764                         goto out;
3765                 if (RB_EMPTY_ROOT(&rec->holes))
3766                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3767                 node = rb_first(&rec->holes);
3768         }
3769         /* special case for a file losing all its file extent */
3770         if (!found) {
3771                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3772                                        round_up(rec->isize,
3773                                                 root->fs_info->sectorsize));
3774                 if (ret < 0)
3775                         goto out;
3776         }
3777         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3778                rec->ino, root->objectid);
3779 out:
3780         return ret;
3781 }
3782
3783 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3784 {
3785         struct btrfs_trans_handle *trans;
3786         struct btrfs_path path;
3787         int ret = 0;
3788
3789         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3790                              I_ERR_NO_ORPHAN_ITEM |
3791                              I_ERR_LINK_COUNT_WRONG |
3792                              I_ERR_NO_INODE_ITEM |
3793                              I_ERR_FILE_EXTENT_ORPHAN |
3794                              I_ERR_FILE_EXTENT_DISCOUNT|
3795                              I_ERR_FILE_NBYTES_WRONG)))
3796                 return rec->errors;
3797
3798         /*
3799          * For nlink repair, it may create a dir and add link, so
3800          * 2 for parent(256)'s dir_index and dir_item
3801          * 2 for lost+found dir's inode_item and inode_ref
3802          * 1 for the new inode_ref of the file
3803          * 2 for lost+found dir's dir_index and dir_item for the file
3804          */
3805         trans = btrfs_start_transaction(root, 7);
3806         if (IS_ERR(trans))
3807                 return PTR_ERR(trans);
3808
3809         btrfs_init_path(&path);
3810         if (rec->errors & I_ERR_NO_INODE_ITEM)
3811                 ret = repair_inode_no_item(trans, root, &path, rec);
3812         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3813                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3814         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3815                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3816         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3817                 ret = repair_inode_isize(trans, root, &path, rec);
3818         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3819                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3820         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3821                 ret = repair_inode_nlinks(trans, root, &path, rec);
3822         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3823                 ret = repair_inode_nbytes(trans, root, &path, rec);
3824         btrfs_commit_transaction(trans, root);
3825         btrfs_release_path(&path);
3826         return ret;
3827 }
3828
3829 static int check_inode_recs(struct btrfs_root *root,
3830                             struct cache_tree *inode_cache)
3831 {
3832         struct cache_extent *cache;
3833         struct ptr_node *node;
3834         struct inode_record *rec;
3835         struct inode_backref *backref;
3836         int stage = 0;
3837         int ret = 0;
3838         int err = 0;
3839         u64 error = 0;
3840         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3841
3842         if (btrfs_root_refs(&root->root_item) == 0) {
3843                 if (!cache_tree_empty(inode_cache))
3844                         fprintf(stderr, "warning line %d\n", __LINE__);
3845                 return 0;
3846         }
3847
3848         /*
3849          * We need to repair backrefs first because we could change some of the
3850          * errors in the inode recs.
3851          *
3852          * We also need to go through and delete invalid backrefs first and then
3853          * add the correct ones second.  We do this because we may get EEXIST
3854          * when adding back the correct index because we hadn't yet deleted the
3855          * invalid index.
3856          *
3857          * For example, if we were missing a dir index then the directories
3858          * isize would be wrong, so if we fixed the isize to what we thought it
3859          * would be and then fixed the backref we'd still have a invalid fs, so
3860          * we need to add back the dir index and then check to see if the isize
3861          * is still wrong.
3862          */
3863         while (stage < 3) {
3864                 stage++;
3865                 if (stage == 3 && !err)
3866                         break;
3867
3868                 cache = search_cache_extent(inode_cache, 0);
3869                 while (repair && cache) {
3870                         node = container_of(cache, struct ptr_node, cache);
3871                         rec = node->data;
3872                         cache = next_cache_extent(cache);
3873
3874                         /* Need to free everything up and rescan */
3875                         if (stage == 3) {
3876                                 remove_cache_extent(inode_cache, &node->cache);
3877                                 free(node);
3878                                 free_inode_rec(rec);
3879                                 continue;
3880                         }
3881
3882                         if (list_empty(&rec->backrefs))
3883                                 continue;
3884
3885                         ret = repair_inode_backrefs(root, rec, inode_cache,
3886                                                     stage == 1);
3887                         if (ret < 0) {
3888                                 err = ret;
3889                                 stage = 2;
3890                                 break;
3891                         } if (ret > 0) {
3892                                 err = -EAGAIN;
3893                         }
3894                 }
3895         }
3896         if (err)
3897                 return err;
3898
3899         rec = get_inode_rec(inode_cache, root_dirid, 0);
3900         BUG_ON(IS_ERR(rec));
3901         if (rec) {
3902                 ret = check_root_dir(rec);
3903                 if (ret) {
3904                         fprintf(stderr, "root %llu root dir %llu error\n",
3905                                 (unsigned long long)root->root_key.objectid,
3906                                 (unsigned long long)root_dirid);
3907                         print_inode_error(root, rec);
3908                         error++;
3909                 }
3910         } else {
3911                 if (repair) {
3912                         struct btrfs_trans_handle *trans;
3913
3914                         trans = btrfs_start_transaction(root, 1);
3915                         if (IS_ERR(trans)) {
3916                                 err = PTR_ERR(trans);
3917                                 return err;
3918                         }
3919
3920                         fprintf(stderr,
3921                                 "root %llu missing its root dir, recreating\n",
3922                                 (unsigned long long)root->objectid);
3923
3924                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3925                         BUG_ON(ret);
3926
3927                         btrfs_commit_transaction(trans, root);
3928                         return -EAGAIN;
3929                 }
3930
3931                 fprintf(stderr, "root %llu root dir %llu not found\n",
3932                         (unsigned long long)root->root_key.objectid,
3933                         (unsigned long long)root_dirid);
3934         }
3935
3936         while (1) {
3937                 cache = search_cache_extent(inode_cache, 0);
3938                 if (!cache)
3939                         break;
3940                 node = container_of(cache, struct ptr_node, cache);
3941                 rec = node->data;
3942                 remove_cache_extent(inode_cache, &node->cache);
3943                 free(node);
3944                 if (rec->ino == root_dirid ||
3945                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3946                         free_inode_rec(rec);
3947                         continue;
3948                 }
3949
3950                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3951                         ret = check_orphan_item(root, rec->ino);
3952                         if (ret == 0)
3953                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3954                         if (can_free_inode_rec(rec)) {
3955                                 free_inode_rec(rec);
3956                                 continue;
3957                         }
3958                 }
3959
3960                 if (!rec->found_inode_item)
3961                         rec->errors |= I_ERR_NO_INODE_ITEM;
3962                 if (rec->found_link != rec->nlink)
3963                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3964                 if (repair) {
3965                         ret = try_repair_inode(root, rec);
3966                         if (ret == 0 && can_free_inode_rec(rec)) {
3967                                 free_inode_rec(rec);
3968                                 continue;
3969                         }
3970                         ret = 0;
3971                 }
3972
3973                 if (!(repair && ret == 0))
3974                         error++;
3975                 print_inode_error(root, rec);
3976                 list_for_each_entry(backref, &rec->backrefs, list) {
3977                         if (!backref->found_dir_item)
3978                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3979                         if (!backref->found_dir_index)
3980                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3981                         if (!backref->found_inode_ref)
3982                                 backref->errors |= REF_ERR_NO_INODE_REF;
3983                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3984                                 " namelen %u name %s filetype %d errors %x",
3985                                 (unsigned long long)backref->dir,
3986                                 (unsigned long long)backref->index,
3987                                 backref->namelen, backref->name,
3988                                 backref->filetype, backref->errors);
3989                         print_ref_error(backref->errors);
3990                 }
3991                 free_inode_rec(rec);
3992         }
3993         return (error > 0) ? -1 : 0;
3994 }
3995
3996 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3997                                         u64 objectid)
3998 {
3999         struct cache_extent *cache;
4000         struct root_record *rec = NULL;
4001         int ret;
4002
4003         cache = lookup_cache_extent(root_cache, objectid, 1);
4004         if (cache) {
4005                 rec = container_of(cache, struct root_record, cache);
4006         } else {
4007                 rec = calloc(1, sizeof(*rec));
4008                 if (!rec)
4009                         return ERR_PTR(-ENOMEM);
4010                 rec->objectid = objectid;
4011                 INIT_LIST_HEAD(&rec->backrefs);
4012                 rec->cache.start = objectid;
4013                 rec->cache.size = 1;
4014
4015                 ret = insert_cache_extent(root_cache, &rec->cache);
4016                 if (ret)
4017                         return ERR_PTR(-EEXIST);
4018         }
4019         return rec;
4020 }
4021
4022 static struct root_backref *get_root_backref(struct root_record *rec,
4023                                              u64 ref_root, u64 dir, u64 index,
4024                                              const char *name, int namelen)
4025 {
4026         struct root_backref *backref;
4027
4028         list_for_each_entry(backref, &rec->backrefs, list) {
4029                 if (backref->ref_root != ref_root || backref->dir != dir ||
4030                     backref->namelen != namelen)
4031                         continue;
4032                 if (memcmp(name, backref->name, namelen))
4033                         continue;
4034                 return backref;
4035         }
4036
4037         backref = calloc(1, sizeof(*backref) + namelen + 1);
4038         if (!backref)
4039                 return NULL;
4040         backref->ref_root = ref_root;
4041         backref->dir = dir;
4042         backref->index = index;
4043         backref->namelen = namelen;
4044         memcpy(backref->name, name, namelen);
4045         backref->name[namelen] = '\0';
4046         list_add_tail(&backref->list, &rec->backrefs);
4047         return backref;
4048 }
4049
4050 static void free_root_record(struct cache_extent *cache)
4051 {
4052         struct root_record *rec;
4053         struct root_backref *backref;
4054
4055         rec = container_of(cache, struct root_record, cache);
4056         while (!list_empty(&rec->backrefs)) {
4057                 backref = to_root_backref(rec->backrefs.next);
4058                 list_del(&backref->list);
4059                 free(backref);
4060         }
4061
4062         free(rec);
4063 }
4064
4065 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
4066
4067 static int add_root_backref(struct cache_tree *root_cache,
4068                             u64 root_id, u64 ref_root, u64 dir, u64 index,
4069                             const char *name, int namelen,
4070                             int item_type, int errors)
4071 {
4072         struct root_record *rec;
4073         struct root_backref *backref;
4074
4075         rec = get_root_rec(root_cache, root_id);
4076         BUG_ON(IS_ERR(rec));
4077         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
4078         BUG_ON(!backref);
4079
4080         backref->errors |= errors;
4081
4082         if (item_type != BTRFS_DIR_ITEM_KEY) {
4083                 if (backref->found_dir_index || backref->found_back_ref ||
4084                     backref->found_forward_ref) {
4085                         if (backref->index != index)
4086                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
4087                 } else {
4088                         backref->index = index;
4089                 }
4090         }
4091
4092         if (item_type == BTRFS_DIR_ITEM_KEY) {
4093                 if (backref->found_forward_ref)
4094                         rec->found_ref++;
4095                 backref->found_dir_item = 1;
4096         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
4097                 backref->found_dir_index = 1;
4098         } else if (item_type == BTRFS_ROOT_REF_KEY) {
4099                 if (backref->found_forward_ref)
4100                         backref->errors |= REF_ERR_DUP_ROOT_REF;
4101                 else if (backref->found_dir_item)
4102                         rec->found_ref++;
4103                 backref->found_forward_ref = 1;
4104         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
4105                 if (backref->found_back_ref)
4106                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
4107                 backref->found_back_ref = 1;
4108         } else {
4109                 BUG_ON(1);
4110         }
4111
4112         if (backref->found_forward_ref && backref->found_dir_item)
4113                 backref->reachable = 1;
4114         return 0;
4115 }
4116
4117 static int merge_root_recs(struct btrfs_root *root,
4118                            struct cache_tree *src_cache,
4119                            struct cache_tree *dst_cache)
4120 {
4121         struct cache_extent *cache;
4122         struct ptr_node *node;
4123         struct inode_record *rec;
4124         struct inode_backref *backref;
4125         int ret = 0;
4126
4127         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4128                 free_inode_recs_tree(src_cache);
4129                 return 0;
4130         }
4131
4132         while (1) {
4133                 cache = search_cache_extent(src_cache, 0);
4134                 if (!cache)
4135                         break;
4136                 node = container_of(cache, struct ptr_node, cache);
4137                 rec = node->data;
4138                 remove_cache_extent(src_cache, &node->cache);
4139                 free(node);
4140
4141                 ret = is_child_root(root, root->objectid, rec->ino);
4142                 if (ret < 0)
4143                         break;
4144                 else if (ret == 0)
4145                         goto skip;
4146
4147                 list_for_each_entry(backref, &rec->backrefs, list) {
4148                         BUG_ON(backref->found_inode_ref);
4149                         if (backref->found_dir_item)
4150                                 add_root_backref(dst_cache, rec->ino,
4151                                         root->root_key.objectid, backref->dir,
4152                                         backref->index, backref->name,
4153                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
4154                                         backref->errors);
4155                         if (backref->found_dir_index)
4156                                 add_root_backref(dst_cache, rec->ino,
4157                                         root->root_key.objectid, backref->dir,
4158                                         backref->index, backref->name,
4159                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
4160                                         backref->errors);
4161                 }
4162 skip:
4163                 free_inode_rec(rec);
4164         }
4165         if (ret < 0)
4166                 return ret;
4167         return 0;
4168 }
4169
4170 static int check_root_refs(struct btrfs_root *root,
4171                            struct cache_tree *root_cache)
4172 {
4173         struct root_record *rec;
4174         struct root_record *ref_root;
4175         struct root_backref *backref;
4176         struct cache_extent *cache;
4177         int loop = 1;
4178         int ret;
4179         int error;
4180         int errors = 0;
4181
4182         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
4183         BUG_ON(IS_ERR(rec));
4184         rec->found_ref = 1;
4185
4186         /* fixme: this can not detect circular references */
4187         while (loop) {
4188                 loop = 0;
4189                 cache = search_cache_extent(root_cache, 0);
4190                 while (1) {
4191                         if (!cache)
4192                                 break;
4193                         rec = container_of(cache, struct root_record, cache);
4194                         cache = next_cache_extent(cache);
4195
4196                         if (rec->found_ref == 0)
4197                                 continue;
4198
4199                         list_for_each_entry(backref, &rec->backrefs, list) {
4200                                 if (!backref->reachable)
4201                                         continue;
4202
4203                                 ref_root = get_root_rec(root_cache,
4204                                                         backref->ref_root);
4205                                 BUG_ON(IS_ERR(ref_root));
4206                                 if (ref_root->found_ref > 0)
4207                                         continue;
4208
4209                                 backref->reachable = 0;
4210                                 rec->found_ref--;
4211                                 if (rec->found_ref == 0)
4212                                         loop = 1;
4213                         }
4214                 }
4215         }
4216
4217         cache = search_cache_extent(root_cache, 0);
4218         while (1) {
4219                 if (!cache)
4220                         break;
4221                 rec = container_of(cache, struct root_record, cache);
4222                 cache = next_cache_extent(cache);
4223
4224                 if (rec->found_ref == 0 &&
4225                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
4226                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
4227                         ret = check_orphan_item(root->fs_info->tree_root,
4228                                                 rec->objectid);
4229                         if (ret == 0)
4230                                 continue;
4231
4232                         /*
4233                          * If we don't have a root item then we likely just have
4234                          * a dir item in a snapshot for this root but no actual
4235                          * ref key or anything so it's meaningless.
4236                          */
4237                         if (!rec->found_root_item)
4238                                 continue;
4239                         errors++;
4240                         fprintf(stderr, "fs tree %llu not referenced\n",
4241                                 (unsigned long long)rec->objectid);
4242                 }
4243
4244                 error = 0;
4245                 if (rec->found_ref > 0 && !rec->found_root_item)
4246                         error = 1;
4247                 list_for_each_entry(backref, &rec->backrefs, list) {
4248                         if (!backref->found_dir_item)
4249                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
4250                         if (!backref->found_dir_index)
4251                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
4252                         if (!backref->found_back_ref)
4253                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
4254                         if (!backref->found_forward_ref)
4255                                 backref->errors |= REF_ERR_NO_ROOT_REF;
4256                         if (backref->reachable && backref->errors)
4257                                 error = 1;
4258                 }
4259                 if (!error)
4260                         continue;
4261
4262                 errors++;
4263                 fprintf(stderr, "fs tree %llu refs %u %s\n",
4264                         (unsigned long long)rec->objectid, rec->found_ref,
4265                          rec->found_root_item ? "" : "not found");
4266
4267                 list_for_each_entry(backref, &rec->backrefs, list) {
4268                         if (!backref->reachable)
4269                                 continue;
4270                         if (!backref->errors && rec->found_root_item)
4271                                 continue;
4272                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
4273                                 " index %llu namelen %u name %s errors %x\n",
4274                                 (unsigned long long)backref->ref_root,
4275                                 (unsigned long long)backref->dir,
4276                                 (unsigned long long)backref->index,
4277                                 backref->namelen, backref->name,
4278                                 backref->errors);
4279                         print_ref_error(backref->errors);
4280                 }
4281         }
4282         return errors > 0 ? 1 : 0;
4283 }
4284
4285 static int process_root_ref(struct extent_buffer *eb, int slot,
4286                             struct btrfs_key *key,
4287                             struct cache_tree *root_cache)
4288 {
4289         u64 dirid;
4290         u64 index;
4291         u32 len;
4292         u32 name_len;
4293         struct btrfs_root_ref *ref;
4294         char namebuf[BTRFS_NAME_LEN];
4295         int error;
4296
4297         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
4298
4299         dirid = btrfs_root_ref_dirid(eb, ref);
4300         index = btrfs_root_ref_sequence(eb, ref);
4301         name_len = btrfs_root_ref_name_len(eb, ref);
4302
4303         if (name_len <= BTRFS_NAME_LEN) {
4304                 len = name_len;
4305                 error = 0;
4306         } else {
4307                 len = BTRFS_NAME_LEN;
4308                 error = REF_ERR_NAME_TOO_LONG;
4309         }
4310         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
4311
4312         if (key->type == BTRFS_ROOT_REF_KEY) {
4313                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
4314                                  index, namebuf, len, key->type, error);
4315         } else {
4316                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
4317                                  index, namebuf, len, key->type, error);
4318         }
4319         return 0;
4320 }
4321
4322 static void free_corrupt_block(struct cache_extent *cache)
4323 {
4324         struct btrfs_corrupt_block *corrupt;
4325
4326         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4327         free(corrupt);
4328 }
4329
4330 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4331
4332 /*
4333  * Repair the btree of the given root.
4334  *
4335  * The fix is to remove the node key in corrupt_blocks cache_tree.
4336  * and rebalance the tree.
4337  * After the fix, the btree should be writeable.
4338  */
4339 static int repair_btree(struct btrfs_root *root,
4340                         struct cache_tree *corrupt_blocks)
4341 {
4342         struct btrfs_trans_handle *trans;
4343         struct btrfs_path path;
4344         struct btrfs_corrupt_block *corrupt;
4345         struct cache_extent *cache;
4346         struct btrfs_key key;
4347         u64 offset;
4348         int level;
4349         int ret = 0;
4350
4351         if (cache_tree_empty(corrupt_blocks))
4352                 return 0;
4353
4354         trans = btrfs_start_transaction(root, 1);
4355         if (IS_ERR(trans)) {
4356                 ret = PTR_ERR(trans);
4357                 fprintf(stderr, "Error starting transaction: %s\n",
4358                         strerror(-ret));
4359                 return ret;
4360         }
4361         btrfs_init_path(&path);
4362         cache = first_cache_extent(corrupt_blocks);
4363         while (cache) {
4364                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4365                                        cache);
4366                 level = corrupt->level;
4367                 path.lowest_level = level;
4368                 key.objectid = corrupt->key.objectid;
4369                 key.type = corrupt->key.type;
4370                 key.offset = corrupt->key.offset;
4371
4372                 /*
4373                  * Here we don't want to do any tree balance, since it may
4374                  * cause a balance with corrupted brother leaf/node,
4375                  * so ins_len set to 0 here.
4376                  * Balance will be done after all corrupt node/leaf is deleted.
4377                  */
4378                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4379                 if (ret < 0)
4380                         goto out;
4381                 offset = btrfs_node_blockptr(path.nodes[level],
4382                                              path.slots[level]);
4383
4384                 /* Remove the ptr */
4385                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4386                 if (ret < 0)
4387                         goto out;
4388                 /*
4389                  * Remove the corresponding extent
4390                  * return value is not concerned.
4391                  */
4392                 btrfs_release_path(&path);
4393                 ret = btrfs_free_extent(trans, root, offset,
4394                                 root->fs_info->nodesize, 0,
4395                                 root->root_key.objectid, level - 1, 0);
4396                 cache = next_cache_extent(cache);
4397         }
4398
4399         /* Balance the btree using btrfs_search_slot() */
4400         cache = first_cache_extent(corrupt_blocks);
4401         while (cache) {
4402                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4403                                        cache);
4404                 memcpy(&key, &corrupt->key, sizeof(key));
4405                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4406                 if (ret < 0)
4407                         goto out;
4408                 /* return will always >0 since it won't find the item */
4409                 ret = 0;
4410                 btrfs_release_path(&path);
4411                 cache = next_cache_extent(cache);
4412         }
4413 out:
4414         btrfs_commit_transaction(trans, root);
4415         btrfs_release_path(&path);
4416         return ret;
4417 }
4418
4419 static int check_fs_root(struct btrfs_root *root,
4420                          struct cache_tree *root_cache,
4421                          struct walk_control *wc)
4422 {
4423         int ret = 0;
4424         int err = 0;
4425         int wret;
4426         int level;
4427         struct btrfs_path path;
4428         struct shared_node root_node;
4429         struct root_record *rec;
4430         struct btrfs_root_item *root_item = &root->root_item;
4431         struct cache_tree corrupt_blocks;
4432         struct orphan_data_extent *orphan;
4433         struct orphan_data_extent *tmp;
4434         enum btrfs_tree_block_status status;
4435         struct node_refs nrefs;
4436
4437         /*
4438          * Reuse the corrupt_block cache tree to record corrupted tree block
4439          *
4440          * Unlike the usage in extent tree check, here we do it in a per
4441          * fs/subvol tree base.
4442          */
4443         cache_tree_init(&corrupt_blocks);
4444         root->fs_info->corrupt_blocks = &corrupt_blocks;
4445
4446         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4447                 rec = get_root_rec(root_cache, root->root_key.objectid);
4448                 BUG_ON(IS_ERR(rec));
4449                 if (btrfs_root_refs(root_item) > 0)
4450                         rec->found_root_item = 1;
4451         }
4452
4453         btrfs_init_path(&path);
4454         memset(&root_node, 0, sizeof(root_node));
4455         cache_tree_init(&root_node.root_cache);
4456         cache_tree_init(&root_node.inode_cache);
4457         memset(&nrefs, 0, sizeof(nrefs));
4458
4459         /* Move the orphan extent record to corresponding inode_record */
4460         list_for_each_entry_safe(orphan, tmp,
4461                                  &root->orphan_data_extents, list) {
4462                 struct inode_record *inode;
4463
4464                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4465                                       1);
4466                 BUG_ON(IS_ERR(inode));
4467                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4468                 list_move(&orphan->list, &inode->orphan_extents);
4469         }
4470
4471         level = btrfs_header_level(root->node);
4472         memset(wc->nodes, 0, sizeof(wc->nodes));
4473         wc->nodes[level] = &root_node;
4474         wc->active_node = level;
4475         wc->root_level = level;
4476
4477         /* We may not have checked the root block, lets do that now */
4478         if (btrfs_is_leaf(root->node))
4479                 status = btrfs_check_leaf(root, NULL, root->node);
4480         else
4481                 status = btrfs_check_node(root, NULL, root->node);
4482         if (status != BTRFS_TREE_BLOCK_CLEAN)
4483                 return -EIO;
4484
4485         if (btrfs_root_refs(root_item) > 0 ||
4486             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4487                 path.nodes[level] = root->node;
4488                 extent_buffer_get(root->node);
4489                 path.slots[level] = 0;
4490         } else {
4491                 struct btrfs_key key;
4492                 struct btrfs_disk_key found_key;
4493
4494                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4495                 level = root_item->drop_level;
4496                 path.lowest_level = level;
4497                 if (level > btrfs_header_level(root->node) ||
4498                     level >= BTRFS_MAX_LEVEL) {
4499                         error("ignoring invalid drop level: %u", level);
4500                         goto skip_walking;
4501                 }
4502                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4503                 if (wret < 0)
4504                         goto skip_walking;
4505                 btrfs_node_key(path.nodes[level], &found_key,
4506                                 path.slots[level]);
4507                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4508                                         sizeof(found_key)));
4509         }
4510
4511         while (1) {
4512                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4513                 if (wret < 0)
4514                         ret = wret;
4515                 if (wret != 0)
4516                         break;
4517
4518                 wret = walk_up_tree(root, &path, wc, &level);
4519                 if (wret < 0)
4520                         ret = wret;
4521                 if (wret != 0)
4522                         break;
4523         }
4524 skip_walking:
4525         btrfs_release_path(&path);
4526
4527         if (!cache_tree_empty(&corrupt_blocks)) {
4528                 struct cache_extent *cache;
4529                 struct btrfs_corrupt_block *corrupt;
4530
4531                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4532                        root->root_key.objectid);
4533                 cache = first_cache_extent(&corrupt_blocks);
4534                 while (cache) {
4535                         corrupt = container_of(cache,
4536                                                struct btrfs_corrupt_block,
4537                                                cache);
4538                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4539                                cache->start, corrupt->level,
4540                                corrupt->key.objectid, corrupt->key.type,
4541                                corrupt->key.offset);
4542                         cache = next_cache_extent(cache);
4543                 }
4544                 if (repair) {
4545                         printf("Try to repair the btree for root %llu\n",
4546                                root->root_key.objectid);
4547                         ret = repair_btree(root, &corrupt_blocks);
4548                         if (ret < 0)
4549                                 fprintf(stderr, "Failed to repair btree: %s\n",
4550                                         strerror(-ret));
4551                         if (!ret)
4552                                 printf("Btree for root %llu is fixed\n",
4553                                        root->root_key.objectid);
4554                 }
4555         }
4556
4557         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4558         if (err < 0)
4559                 ret = err;
4560
4561         if (root_node.current) {
4562                 root_node.current->checked = 1;
4563                 maybe_free_inode_rec(&root_node.inode_cache,
4564                                 root_node.current);
4565         }
4566
4567         err = check_inode_recs(root, &root_node.inode_cache);
4568         if (!ret)
4569                 ret = err;
4570
4571         free_corrupt_blocks_tree(&corrupt_blocks);
4572         root->fs_info->corrupt_blocks = NULL;
4573         free_orphan_data_extents(&root->orphan_data_extents);
4574         return ret;
4575 }
4576
4577 static int fs_root_objectid(u64 objectid)
4578 {
4579         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4580             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4581                 return 1;
4582         return is_fstree(objectid);
4583 }
4584
4585 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4586                           struct cache_tree *root_cache)
4587 {
4588         struct btrfs_path path;
4589         struct btrfs_key key;
4590         struct walk_control wc;
4591         struct extent_buffer *leaf, *tree_node;
4592         struct btrfs_root *tmp_root;
4593         struct btrfs_root *tree_root = fs_info->tree_root;
4594         int ret;
4595         int err = 0;
4596
4597         if (ctx.progress_enabled) {
4598                 ctx.tp = TASK_FS_ROOTS;
4599                 task_start(ctx.info);
4600         }
4601
4602         /*
4603          * Just in case we made any changes to the extent tree that weren't
4604          * reflected into the free space cache yet.
4605          */
4606         if (repair)
4607                 reset_cached_block_groups(fs_info);
4608         memset(&wc, 0, sizeof(wc));
4609         cache_tree_init(&wc.shared);
4610         btrfs_init_path(&path);
4611
4612 again:
4613         key.offset = 0;
4614         key.objectid = 0;
4615         key.type = BTRFS_ROOT_ITEM_KEY;
4616         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4617         if (ret < 0) {
4618                 err = 1;
4619                 goto out;
4620         }
4621         tree_node = tree_root->node;
4622         while (1) {
4623                 if (tree_node != tree_root->node) {
4624                         free_root_recs_tree(root_cache);
4625                         btrfs_release_path(&path);
4626                         goto again;
4627                 }
4628                 leaf = path.nodes[0];
4629                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4630                         ret = btrfs_next_leaf(tree_root, &path);
4631                         if (ret) {
4632                                 if (ret < 0)
4633                                         err = 1;
4634                                 break;
4635                         }
4636                         leaf = path.nodes[0];
4637                 }
4638                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4639                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4640                     fs_root_objectid(key.objectid)) {
4641                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4642                                 tmp_root = btrfs_read_fs_root_no_cache(
4643                                                 fs_info, &key);
4644                         } else {
4645                                 key.offset = (u64)-1;
4646                                 tmp_root = btrfs_read_fs_root(
4647                                                 fs_info, &key);
4648                         }
4649                         if (IS_ERR(tmp_root)) {
4650                                 err = 1;
4651                                 goto next;
4652                         }
4653                         ret = check_fs_root(tmp_root, root_cache, &wc);
4654                         if (ret == -EAGAIN) {
4655                                 free_root_recs_tree(root_cache);
4656                                 btrfs_release_path(&path);
4657                                 goto again;
4658                         }
4659                         if (ret)
4660                                 err = 1;
4661                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4662                                 btrfs_free_fs_root(tmp_root);
4663                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4664                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4665                         process_root_ref(leaf, path.slots[0], &key,
4666                                          root_cache);
4667                 }
4668 next:
4669                 path.slots[0]++;
4670         }
4671 out:
4672         btrfs_release_path(&path);
4673         if (err)
4674                 free_extent_cache_tree(&wc.shared);
4675         if (!cache_tree_empty(&wc.shared))
4676                 fprintf(stderr, "warning line %d\n", __LINE__);
4677
4678         task_stop(ctx.info);
4679
4680         return err;
4681 }
4682
4683 /*
4684  * Find the @index according by @ino and name.
4685  * Notice:time efficiency is O(N)
4686  *
4687  * @root:       the root of the fs/file tree
4688  * @index_ret:  the index as return value
4689  * @namebuf:    the name to match
4690  * @name_len:   the length of name to match
4691  * @file_type:  the file_type of INODE_ITEM to match
4692  *
4693  * Returns 0 if found and *@index_ret will be modified with right value
4694  * Returns< 0 not found and *@index_ret will be (u64)-1
4695  */
4696 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4697                           u64 *index_ret, char *namebuf, u32 name_len,
4698                           u8 file_type)
4699 {
4700         struct btrfs_path path;
4701         struct extent_buffer *node;
4702         struct btrfs_dir_item *di;
4703         struct btrfs_key key;
4704         struct btrfs_key location;
4705         char name[BTRFS_NAME_LEN] = {0};
4706
4707         u32 total;
4708         u32 cur = 0;
4709         u32 len;
4710         u32 data_len;
4711         u8 filetype;
4712         int slot;
4713         int ret;
4714
4715         ASSERT(index_ret);
4716
4717         /* search from the last index */
4718         key.objectid = dirid;
4719         key.offset = (u64)-1;
4720         key.type = BTRFS_DIR_INDEX_KEY;
4721
4722         btrfs_init_path(&path);
4723         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4724         if (ret < 0)
4725                 return ret;
4726
4727 loop:
4728         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4729         if (ret) {
4730                 ret = -ENOENT;
4731                 *index_ret = (64)-1;
4732                 goto out;
4733         }
4734         /* Check whether inode_id/filetype/name match */
4735         node = path.nodes[0];
4736         slot = path.slots[0];
4737         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4738         total = btrfs_item_size_nr(node, slot);
4739         while (cur < total) {
4740                 ret = -ENOENT;
4741                 len = btrfs_dir_name_len(node, di);
4742                 data_len = btrfs_dir_data_len(node, di);
4743
4744                 btrfs_dir_item_key_to_cpu(node, di, &location);
4745                 if (location.objectid != location_id ||
4746                     location.type != BTRFS_INODE_ITEM_KEY ||
4747                     location.offset != 0)
4748                         goto next;
4749
4750                 filetype = btrfs_dir_type(node, di);
4751                 if (file_type != filetype)
4752                         goto next;
4753
4754                 if (len > BTRFS_NAME_LEN)
4755                         len = BTRFS_NAME_LEN;
4756
4757                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4758                 if (len != name_len || strncmp(namebuf, name, len))
4759                         goto next;
4760
4761                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4762                 *index_ret = key.offset;
4763                 ret = 0;
4764                 goto out;
4765 next:
4766                 len += sizeof(*di) + data_len;
4767                 di = (struct btrfs_dir_item *)((char *)di + len);
4768                 cur += len;
4769         }
4770         goto loop;
4771
4772 out:
4773         btrfs_release_path(&path);
4774         return ret;
4775 }
4776
4777 /*
4778  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4779  * INODE_REF/INODE_EXTREF match.
4780  *
4781  * @root:       the root of the fs/file tree
4782  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4783  *              value while find index
4784  * @location_key: location key of the struct btrfs_dir_item to match
4785  * @name:       the name to match
4786  * @namelen:    the length of name
4787  * @file_type:  the type of file to math
4788  *
4789  * Return 0 if no error occurred.
4790  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4791  * DIR_ITEM/DIR_INDEX
4792  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4793  * and DIR_ITEM/DIR_INDEX mismatch
4794  */
4795 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4796                          struct btrfs_key *location_key, char *name,
4797                          u32 namelen, u8 file_type)
4798 {
4799         struct btrfs_path path;
4800         struct extent_buffer *node;
4801         struct btrfs_dir_item *di;
4802         struct btrfs_key location;
4803         char namebuf[BTRFS_NAME_LEN] = {0};
4804         u32 total;
4805         u32 cur = 0;
4806         u32 len;
4807         u32 data_len;
4808         u8 filetype;
4809         int slot;
4810         int ret;
4811
4812         /* get the index by traversing all index */
4813         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4814                 ret = find_dir_index(root, key->objectid,
4815                                      location_key->objectid, &key->offset,
4816                                      name, namelen, file_type);
4817                 if (ret)
4818                         ret = DIR_INDEX_MISSING;
4819                 return ret;
4820         }
4821
4822         btrfs_init_path(&path);
4823         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4824         if (ret) {
4825                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4826                         DIR_INDEX_MISSING;
4827                 goto out;
4828         }
4829
4830         /* Check whether inode_id/filetype/name match */
4831         node = path.nodes[0];
4832         slot = path.slots[0];
4833         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4834         total = btrfs_item_size_nr(node, slot);
4835         while (cur < total) {
4836                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4837                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4838
4839                 len = btrfs_dir_name_len(node, di);
4840                 data_len = btrfs_dir_data_len(node, di);
4841
4842                 btrfs_dir_item_key_to_cpu(node, di, &location);
4843                 if (location.objectid != location_key->objectid ||
4844                     location.type != location_key->type ||
4845                     location.offset != location_key->offset)
4846                         goto next;
4847
4848                 filetype = btrfs_dir_type(node, di);
4849                 if (file_type != filetype)
4850                         goto next;
4851
4852                 if (len > BTRFS_NAME_LEN) {
4853                         len = BTRFS_NAME_LEN;
4854                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4855                         root->objectid,
4856                         key->type == BTRFS_DIR_ITEM_KEY ?
4857                         "DIR_ITEM" : "DIR_INDEX",
4858                         key->objectid, key->offset, len);
4859                 }
4860                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4861                                    len);
4862                 if (len != namelen || strncmp(namebuf, name, len))
4863                         goto next;
4864
4865                 ret = 0;
4866                 goto out;
4867 next:
4868                 len += sizeof(*di) + data_len;
4869                 di = (struct btrfs_dir_item *)((char *)di + len);
4870                 cur += len;
4871         }
4872
4873 out:
4874         btrfs_release_path(&path);
4875         return ret;
4876 }
4877
4878 /*
4879  * Prints inode ref error message
4880  */
4881 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4882                                 u64 index, const char *namebuf, int name_len,
4883                                 u8 filetype, int err)
4884 {
4885         if (!err)
4886                 return;
4887
4888         /* root dir error */
4889         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4890                 error(
4891         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4892                       root->objectid, key->objectid, key->offset, namebuf);
4893                 return;
4894         }
4895
4896         /* normal error */
4897         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4898                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4899                       root->objectid, key->offset,
4900                       btrfs_name_hash(namebuf, name_len),
4901                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4902                       namebuf, filetype);
4903         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4904                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4905                       root->objectid, key->offset, index,
4906                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4907                       namebuf, filetype);
4908 }
4909
4910 /*
4911  * Insert the missing inode item.
4912  *
4913  * Returns 0 means success.
4914  * Returns <0 means error.
4915  */
4916 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4917                                      u8 filetype)
4918 {
4919         struct btrfs_key key;
4920         struct btrfs_trans_handle *trans;
4921         struct btrfs_path path;
4922         int ret;
4923
4924         key.objectid = ino;
4925         key.type = BTRFS_INODE_ITEM_KEY;
4926         key.offset = 0;
4927
4928         btrfs_init_path(&path);
4929         trans = btrfs_start_transaction(root, 1);
4930         if (IS_ERR(trans)) {
4931                 ret = -EIO;
4932                 goto out;
4933         }
4934
4935         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4936         if (ret < 0 || !ret)
4937                 goto fail;
4938
4939         /* insert inode item */
4940         create_inode_item_lowmem(trans, root, ino, filetype);
4941         ret = 0;
4942 fail:
4943         btrfs_commit_transaction(trans, root);
4944 out:
4945         if (ret)
4946                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4947                       root->objectid, ino);
4948         btrfs_release_path(&path);
4949         return ret;
4950 }
4951
4952 /*
4953  * The ternary means dir item, dir index and relative inode ref.
4954  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4955  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4956  * strategy:
4957  * If two of three is missing or mismatched, delete the existing one.
4958  * If one of three is missing or mismatched, add the missing one.
4959  *
4960  * returns 0 means success.
4961  * returns not 0 means on error;
4962  */
4963 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4964                           u64 index, char *name, int name_len, u8 filetype,
4965                           int err)
4966 {
4967         struct btrfs_trans_handle *trans;
4968         int stage = 0;
4969         int ret = 0;
4970
4971         /*
4972          * stage shall be one of following valild values:
4973          *      0: Fine, nothing to do.
4974          *      1: One of three is wrong, so add missing one.
4975          *      2: Two of three is wrong, so delete existed one.
4976          */
4977         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4978                 stage++;
4979         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4980                 stage++;
4981         if (err & (INODE_REF_MISSING))
4982                 stage++;
4983
4984         /* stage must be smllarer than 3 */
4985         ASSERT(stage < 3);
4986
4987         trans = btrfs_start_transaction(root, 1);
4988         if (stage == 2) {
4989                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4990                                    name_len, 0);
4991                 goto out;
4992         }
4993         if (stage == 1) {
4994                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4995                                filetype, &index, 1, 1);
4996                 goto out;
4997         }
4998 out:
4999         btrfs_commit_transaction(trans, root);
5000
5001         if (ret)
5002                 error("fail to repair inode %llu name %s filetype %u",
5003                       ino, name, filetype);
5004         else
5005                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
5006                        stage == 2 ? "Delete" : "Add",
5007                        ino, name, filetype);
5008
5009         return ret;
5010 }
5011
5012 /*
5013  * Traverse the given INODE_REF and call find_dir_item() to find related
5014  * DIR_ITEM/DIR_INDEX.
5015  *
5016  * @root:       the root of the fs/file tree
5017  * @ref_key:    the key of the INODE_REF
5018  * @path        the path provides node and slot
5019  * @refs:       the count of INODE_REF
5020  * @mode:       the st_mode of INODE_ITEM
5021  * @name_ret:   returns with the first ref's name
5022  * @name_len_ret:    len of the name_ret
5023  *
5024  * Return 0 if no error occurred.
5025  */
5026 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5027                            struct btrfs_path *path, char *name_ret,
5028                            u32 *namelen_ret, u64 *refs_ret, int mode)
5029 {
5030         struct btrfs_key key;
5031         struct btrfs_key location;
5032         struct btrfs_inode_ref *ref;
5033         struct extent_buffer *node;
5034         char namebuf[BTRFS_NAME_LEN] = {0};
5035         u32 total;
5036         u32 cur = 0;
5037         u32 len;
5038         u32 name_len;
5039         u64 index;
5040         int ret;
5041         int err = 0;
5042         int tmp_err;
5043         int slot;
5044         int need_research = 0;
5045         u64 refs;
5046
5047 begin:
5048         err = 0;
5049         cur = 0;
5050         refs = *refs_ret;
5051
5052         /* since after repair, path and the dir item may be changed */
5053         if (need_research) {
5054                 need_research = 0;
5055                 btrfs_release_path(path);
5056                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
5057                 /* the item was deleted, let path point to the last checked item */
5058                 if (ret > 0) {
5059                         if (path->slots[0] == 0)
5060                                 btrfs_prev_leaf(root, path);
5061                         else
5062                                 path->slots[0]--;
5063                 }
5064                 if (ret)
5065                         goto out;
5066         }
5067
5068         location.objectid = ref_key->objectid;
5069         location.type = BTRFS_INODE_ITEM_KEY;
5070         location.offset = 0;
5071         node = path->nodes[0];
5072         slot = path->slots[0];
5073
5074         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5075         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5076         total = btrfs_item_size_nr(node, slot);
5077
5078 next:
5079         /* Update inode ref count */
5080         refs++;
5081         tmp_err = 0;
5082         index = btrfs_inode_ref_index(node, ref);
5083         name_len = btrfs_inode_ref_name_len(node, ref);
5084
5085         if (name_len <= BTRFS_NAME_LEN) {
5086                 len = name_len;
5087         } else {
5088                 len = BTRFS_NAME_LEN;
5089                 warning("root %llu INODE_REF[%llu %llu] name too long",
5090                         root->objectid, ref_key->objectid, ref_key->offset);
5091         }
5092
5093         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
5094
5095         /* copy the first name found to name_ret */
5096         if (refs == 1 && name_ret) {
5097                 memcpy(name_ret, namebuf, len);
5098                 *namelen_ret = len;
5099         }
5100
5101         /* Check root dir ref */
5102         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
5103                 if (index != 0 || len != strlen("..") ||
5104                     strncmp("..", namebuf, len) ||
5105                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
5106                         /* set err bits then repair will delete the ref */
5107                         err |= DIR_INDEX_MISSING;
5108                         err |= DIR_ITEM_MISSING;
5109                 }
5110                 goto end;
5111         }
5112
5113         /* Find related DIR_INDEX */
5114         key.objectid = ref_key->offset;
5115         key.type = BTRFS_DIR_INDEX_KEY;
5116         key.offset = index;
5117         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5118                             imode_to_type(mode));
5119
5120         /* Find related dir_item */
5121         key.objectid = ref_key->offset;
5122         key.type = BTRFS_DIR_ITEM_KEY;
5123         key.offset = btrfs_name_hash(namebuf, len);
5124         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5125                             imode_to_type(mode));
5126 end:
5127         if (tmp_err && repair) {
5128                 ret = repair_ternary_lowmem(root, ref_key->offset,
5129                                             ref_key->objectid, index, namebuf,
5130                                             name_len, imode_to_type(mode),
5131                                             tmp_err);
5132                 if (!ret) {
5133                         need_research = 1;
5134                         goto begin;
5135                 }
5136         }
5137         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
5138                             imode_to_type(mode), tmp_err);
5139         err |= tmp_err;
5140         len = sizeof(*ref) + name_len;
5141         ref = (struct btrfs_inode_ref *)((char *)ref + len);
5142         cur += len;
5143         if (cur < total)
5144                 goto next;
5145
5146 out:
5147         *refs_ret = refs;
5148         return err;
5149 }
5150
5151 /*
5152  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
5153  * DIR_ITEM/DIR_INDEX.
5154  *
5155  * @root:       the root of the fs/file tree
5156  * @ref_key:    the key of the INODE_EXTREF
5157  * @refs:       the count of INODE_EXTREF
5158  * @mode:       the st_mode of INODE_ITEM
5159  *
5160  * Return 0 if no error occurred.
5161  */
5162 static int check_inode_extref(struct btrfs_root *root,
5163                               struct btrfs_key *ref_key,
5164                               struct extent_buffer *node, int slot, u64 *refs,
5165                               int mode)
5166 {
5167         struct btrfs_key key;
5168         struct btrfs_key location;
5169         struct btrfs_inode_extref *extref;
5170         char namebuf[BTRFS_NAME_LEN] = {0};
5171         u32 total;
5172         u32 cur = 0;
5173         u32 len;
5174         u32 name_len;
5175         u64 index;
5176         u64 parent;
5177         int ret;
5178         int err = 0;
5179
5180         location.objectid = ref_key->objectid;
5181         location.type = BTRFS_INODE_ITEM_KEY;
5182         location.offset = 0;
5183
5184         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5185         total = btrfs_item_size_nr(node, slot);
5186
5187 next:
5188         /* update inode ref count */
5189         (*refs)++;
5190         name_len = btrfs_inode_extref_name_len(node, extref);
5191         index = btrfs_inode_extref_index(node, extref);
5192         parent = btrfs_inode_extref_parent(node, extref);
5193         if (name_len <= BTRFS_NAME_LEN) {
5194                 len = name_len;
5195         } else {
5196                 len = BTRFS_NAME_LEN;
5197                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
5198                         root->objectid, ref_key->objectid, ref_key->offset);
5199         }
5200         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
5201
5202         /* Check root dir ref name */
5203         if (index == 0 && strncmp(namebuf, "..", name_len)) {
5204                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
5205                       root->objectid, ref_key->objectid, ref_key->offset,
5206                       namebuf);
5207                 err |= ROOT_DIR_ERROR;
5208         }
5209
5210         /* find related dir_index */
5211         key.objectid = parent;
5212         key.type = BTRFS_DIR_INDEX_KEY;
5213         key.offset = index;
5214         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5215         err |= ret;
5216
5217         /* find related dir_item */
5218         key.objectid = parent;
5219         key.type = BTRFS_DIR_ITEM_KEY;
5220         key.offset = btrfs_name_hash(namebuf, len);
5221         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5222         err |= ret;
5223
5224         len = sizeof(*extref) + name_len;
5225         extref = (struct btrfs_inode_extref *)((char *)extref + len);
5226         cur += len;
5227
5228         if (cur < total)
5229                 goto next;
5230
5231         return err;
5232 }
5233
5234 /*
5235  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
5236  * DIR_ITEM/DIR_INDEX match.
5237  * Return with @index_ret.
5238  *
5239  * @root:       the root of the fs/file tree
5240  * @key:        the key of the INODE_REF/INODE_EXTREF
5241  * @name:       the name in the INODE_REF/INODE_EXTREF
5242  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
5243  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
5244  *              value (64)-1 means do not check index
5245  * @ext_ref:    the EXTENDED_IREF feature
5246  *
5247  * Return 0 if no error occurred.
5248  * Return >0 for error bitmap
5249  */
5250 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
5251                           char *name, int namelen, u64 *index_ret,
5252                           unsigned int ext_ref)
5253 {
5254         struct btrfs_path path;
5255         struct btrfs_inode_ref *ref;
5256         struct btrfs_inode_extref *extref;
5257         struct extent_buffer *node;
5258         char ref_namebuf[BTRFS_NAME_LEN] = {0};
5259         u32 total;
5260         u32 cur = 0;
5261         u32 len;
5262         u32 ref_namelen;
5263         u64 ref_index;
5264         u64 parent;
5265         u64 dir_id;
5266         int slot;
5267         int ret;
5268
5269         ASSERT(index_ret);
5270
5271         btrfs_init_path(&path);
5272         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5273         if (ret) {
5274                 ret = INODE_REF_MISSING;
5275                 goto extref;
5276         }
5277
5278         node = path.nodes[0];
5279         slot = path.slots[0];
5280
5281         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5282         total = btrfs_item_size_nr(node, slot);
5283
5284         /* Iterate all entry of INODE_REF */
5285         while (cur < total) {
5286                 ret = INODE_REF_MISSING;
5287
5288                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
5289                 ref_index = btrfs_inode_ref_index(node, ref);
5290                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5291                         goto next_ref;
5292
5293                 if (cur + sizeof(*ref) + ref_namelen > total ||
5294                     ref_namelen > BTRFS_NAME_LEN) {
5295                         warning("root %llu INODE %s[%llu %llu] name too long",
5296                                 root->objectid,
5297                                 key->type == BTRFS_INODE_REF_KEY ?
5298                                         "REF" : "EXTREF",
5299                                 key->objectid, key->offset);
5300
5301                         if (cur + sizeof(*ref) > total)
5302                                 break;
5303                         len = min_t(u32, total - cur - sizeof(*ref),
5304                                     BTRFS_NAME_LEN);
5305                 } else {
5306                         len = ref_namelen;
5307                 }
5308
5309                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
5310                                    len);
5311
5312                 if (len != namelen || strncmp(ref_namebuf, name, len))
5313                         goto next_ref;
5314
5315                 *index_ret = ref_index;
5316                 ret = 0;
5317                 goto out;
5318 next_ref:
5319                 len = sizeof(*ref) + ref_namelen;
5320                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5321                 cur += len;
5322         }
5323
5324 extref:
5325         /* Skip if not support EXTENDED_IREF feature */
5326         if (!ext_ref)
5327                 goto out;
5328
5329         btrfs_release_path(&path);
5330         btrfs_init_path(&path);
5331
5332         dir_id = key->offset;
5333         key->type = BTRFS_INODE_EXTREF_KEY;
5334         key->offset = btrfs_extref_hash(dir_id, name, namelen);
5335
5336         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5337         if (ret) {
5338                 ret = INODE_REF_MISSING;
5339                 goto out;
5340         }
5341
5342         node = path.nodes[0];
5343         slot = path.slots[0];
5344
5345         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5346         cur = 0;
5347         total = btrfs_item_size_nr(node, slot);
5348
5349         /* Iterate all entry of INODE_EXTREF */
5350         while (cur < total) {
5351                 ret = INODE_REF_MISSING;
5352
5353                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5354                 ref_index = btrfs_inode_extref_index(node, extref);
5355                 parent = btrfs_inode_extref_parent(node, extref);
5356                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5357                         goto next_extref;
5358
5359                 if (parent != dir_id)
5360                         goto next_extref;
5361
5362                 if (ref_namelen <= BTRFS_NAME_LEN) {
5363                         len = ref_namelen;
5364                 } else {
5365                         len = BTRFS_NAME_LEN;
5366                         warning("root %llu INODE %s[%llu %llu] name too long",
5367                                 root->objectid,
5368                                 key->type == BTRFS_INODE_REF_KEY ?
5369                                         "REF" : "EXTREF",
5370                                 key->objectid, key->offset);
5371                 }
5372                 read_extent_buffer(node, ref_namebuf,
5373                                    (unsigned long)(extref + 1), len);
5374
5375                 if (len != namelen || strncmp(ref_namebuf, name, len))
5376                         goto next_extref;
5377
5378                 *index_ret = ref_index;
5379                 ret = 0;
5380                 goto out;
5381
5382 next_extref:
5383                 len = sizeof(*extref) + ref_namelen;
5384                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5385                 cur += len;
5386
5387         }
5388 out:
5389         btrfs_release_path(&path);
5390         return ret;
5391 }
5392
5393 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5394                                u64 ino, u64 index, const char *namebuf,
5395                                int name_len, u8 filetype, int err)
5396 {
5397         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5398                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5399                       root->objectid, key->objectid, key->offset, namebuf,
5400                       filetype,
5401                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5402         }
5403
5404         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5405                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5406                       root->objectid, key->objectid, index, namebuf, filetype,
5407                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5408         }
5409
5410         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5411                 error(
5412                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5413                       root->objectid, ino, index, namebuf, filetype,
5414                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5415         }
5416
5417         if (err & INODE_REF_MISSING)
5418                 error(
5419                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5420                       root->objectid, ino, key->objectid, namebuf, filetype);
5421
5422 }
5423
5424 /*
5425  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5426  *
5427  * Returns error after repair
5428  */
5429 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5430                            u64 index, u8 filetype, char *namebuf, u32 name_len,
5431                            int err)
5432 {
5433         int ret;
5434
5435         if (err & INODE_ITEM_MISSING) {
5436                 ret = repair_inode_item_missing(root, ino, filetype);
5437                 if (!ret)
5438                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5439         }
5440
5441         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5442                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5443                                             name_len, filetype, err);
5444                 if (!ret) {
5445                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5446                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5447                         err &= ~(INODE_REF_MISSING);
5448                 }
5449         }
5450         return err;
5451 }
5452
5453 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5454                 u64 *size_ret)
5455 {
5456         struct btrfs_key key;
5457         struct btrfs_path path;
5458         u32 len;
5459         struct btrfs_dir_item *di;
5460         int ret;
5461         int cur = 0;
5462         int total = 0;
5463
5464         ASSERT(size_ret);
5465         *size_ret = 0;
5466
5467         key.objectid = ino;
5468         key.type = type;
5469         key.offset = (u64)-1;
5470
5471         btrfs_init_path(&path);
5472         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5473         if (ret < 0) {
5474                 ret = -EIO;
5475                 goto out;
5476         }
5477         /* if found, go to spacial case */
5478         if (ret == 0)
5479                 goto special_case;
5480
5481 loop:
5482         ret = btrfs_previous_item(root, &path, ino, type);
5483
5484         if (ret) {
5485                 ret = 0;
5486                 goto out;
5487         }
5488
5489 special_case:
5490         di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5491         cur = 0;
5492         total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5493
5494         while (cur < total) {
5495                 len = btrfs_dir_name_len(path.nodes[0], di);
5496                 if (len > BTRFS_NAME_LEN)
5497                         len = BTRFS_NAME_LEN;
5498                 *size_ret += len;
5499
5500                 len += btrfs_dir_data_len(path.nodes[0], di);
5501                 len += sizeof(*di);
5502                 di = (struct btrfs_dir_item *)((char *)di + len);
5503                 cur += len;
5504         }
5505         goto loop;
5506
5507 out:
5508         btrfs_release_path(&path);
5509         return ret;
5510 }
5511
5512 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5513 {
5514         u64 item_size;
5515         u64 index_size;
5516         int ret;
5517
5518         ASSERT(size);
5519         ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5520         if (ret)
5521                 goto out;
5522
5523         ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5524         if (ret)
5525                 goto out;
5526
5527         *size = item_size + index_size;
5528
5529 out:
5530         if (ret)
5531                 error("failed to count root %llu INODE[%llu] root size",
5532                       root->objectid, ino);
5533         return ret;
5534 }
5535
5536 /*
5537  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5538  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5539  *
5540  * @root:       the root of the fs/file tree
5541  * @key:        the key of the INODE_REF/INODE_EXTREF
5542  * @path:       the path
5543  * @size:       the st_size of the INODE_ITEM
5544  * @ext_ref:    the EXTENDED_IREF feature
5545  *
5546  * Return 0 if no error occurred.
5547  * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5548  */
5549 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5550                           struct btrfs_path *path, u64 *size,
5551                           unsigned int ext_ref)
5552 {
5553         struct btrfs_dir_item *di;
5554         struct btrfs_inode_item *ii;
5555         struct btrfs_key key;
5556         struct btrfs_key location;
5557         struct extent_buffer *node;
5558         int slot;
5559         char namebuf[BTRFS_NAME_LEN] = {0};
5560         u32 total;
5561         u32 cur = 0;
5562         u32 len;
5563         u32 name_len;
5564         u32 data_len;
5565         u8 filetype;
5566         u32 mode = 0;
5567         u64 index;
5568         int ret;
5569         int err;
5570         int tmp_err;
5571         int need_research = 0;
5572
5573         /*
5574          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5575          * ignore index check.
5576          */
5577         if (di_key->type == BTRFS_DIR_INDEX_KEY)
5578                 index = di_key->offset;
5579         else
5580                 index = (u64)-1;
5581 begin:
5582         err = 0;
5583         cur = 0;
5584
5585         /* since after repair, path and the dir item may be changed */
5586         if (need_research) {
5587                 need_research = 0;
5588                 err |= DIR_COUNT_AGAIN;
5589                 btrfs_release_path(path);
5590                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5591                 /* the item was deleted, let path point the last checked item */
5592                 if (ret > 0) {
5593                         if (path->slots[0] == 0)
5594                                 btrfs_prev_leaf(root, path);
5595                         else
5596                                 path->slots[0]--;
5597                 }
5598                 if (ret)
5599                         goto out;
5600         }
5601
5602         node = path->nodes[0];
5603         slot = path->slots[0];
5604
5605         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5606         total = btrfs_item_size_nr(node, slot);
5607         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5608
5609         while (cur < total) {
5610                 data_len = btrfs_dir_data_len(node, di);
5611                 tmp_err = 0;
5612                 if (data_len)
5613                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5614                               root->objectid,
5615               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5616                               di_key->objectid, di_key->offset, data_len);
5617
5618                 name_len = btrfs_dir_name_len(node, di);
5619                 if (name_len <= BTRFS_NAME_LEN) {
5620                         len = name_len;
5621                 } else {
5622                         len = BTRFS_NAME_LEN;
5623                         warning("root %llu %s[%llu %llu] name too long",
5624                                 root->objectid,
5625                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5626                                 di_key->objectid, di_key->offset);
5627                 }
5628                 (*size) += name_len;
5629                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5630                                    len);
5631                 filetype = btrfs_dir_type(node, di);
5632
5633                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5634                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5635                         err |= -EIO;
5636                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5637                         root->objectid, di_key->objectid, di_key->offset,
5638                         namebuf, len, filetype, di_key->offset,
5639                         btrfs_name_hash(namebuf, len));
5640                 }
5641
5642                 btrfs_dir_item_key_to_cpu(node, di, &location);
5643                 /* Ignore related ROOT_ITEM check */
5644                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5645                         goto next;
5646
5647                 btrfs_release_path(path);
5648                 /* Check relative INODE_ITEM(existence/filetype) */
5649                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5650                 if (ret) {
5651                         tmp_err |= INODE_ITEM_MISSING;
5652                         goto next;
5653                 }
5654
5655                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5656                                     struct btrfs_inode_item);
5657                 mode = btrfs_inode_mode(path->nodes[0], ii);
5658                 if (imode_to_type(mode) != filetype) {
5659                         tmp_err |= INODE_ITEM_MISMATCH;
5660                         goto next;
5661                 }
5662
5663                 /* Check relative INODE_REF/INODE_EXTREF */
5664                 key.objectid = location.objectid;
5665                 key.type = BTRFS_INODE_REF_KEY;
5666                 key.offset = di_key->objectid;
5667                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5668                                           &index, ext_ref);
5669
5670                 /* check relative INDEX/ITEM */
5671                 key.objectid = di_key->objectid;
5672                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5673                         key.type = BTRFS_DIR_INDEX_KEY;
5674                         key.offset = index;
5675                 } else {
5676                         key.type = BTRFS_DIR_ITEM_KEY;
5677                         key.offset = btrfs_name_hash(namebuf, name_len);
5678                 }
5679
5680                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5681                                          name_len, filetype);
5682                 /* find_dir_item may find index */
5683                 if (key.type == BTRFS_DIR_INDEX_KEY)
5684                         index = key.offset;
5685 next:
5686
5687                 if (tmp_err && repair) {
5688                         ret = repair_dir_item(root, di_key->objectid,
5689                                               location.objectid, index,
5690                                               imode_to_type(mode), namebuf,
5691                                               name_len, tmp_err);
5692                         if (ret != tmp_err) {
5693                                 need_research = 1;
5694                                 goto begin;
5695                         }
5696                 }
5697                 btrfs_release_path(path);
5698                 print_dir_item_err(root, di_key, location.objectid, index,
5699                                    namebuf, name_len, filetype, tmp_err);
5700                 err |= tmp_err;
5701                 len = sizeof(*di) + name_len + data_len;
5702                 di = (struct btrfs_dir_item *)((char *)di + len);
5703                 cur += len;
5704
5705                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5706                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5707                               root->objectid, di_key->objectid,
5708                               di_key->offset);
5709                         break;
5710                 }
5711         }
5712 out:
5713         /* research path */
5714         btrfs_release_path(path);
5715         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5716         if (ret)
5717                 err |= ret > 0 ? -ENOENT : ret;
5718         return err;
5719 }
5720
5721 /*
5722  * Wrapper function of btrfs_punch_hole.
5723  *
5724  * Returns 0 means success.
5725  * Returns not 0 means error.
5726  */
5727 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5728                              u64 len)
5729 {
5730         struct btrfs_trans_handle *trans;
5731         int ret = 0;
5732
5733         trans = btrfs_start_transaction(root, 1);
5734         if (IS_ERR(trans))
5735                 return PTR_ERR(trans);
5736
5737         ret = btrfs_punch_hole(trans, root, ino, start, len);
5738         if (ret)
5739                 error("failed to add hole [%llu, %llu] in inode [%llu]",
5740                       start, len, ino);
5741         else
5742                 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5743                        ino);
5744
5745         btrfs_commit_transaction(trans, root);
5746         return ret;
5747 }
5748
5749 /*
5750  * Check file extent datasum/hole, update the size of the file extents,
5751  * check and update the last offset of the file extent.
5752  *
5753  * @root:       the root of fs/file tree.
5754  * @fkey:       the key of the file extent.
5755  * @nodatasum:  INODE_NODATASUM feature.
5756  * @size:       the sum of all EXTENT_DATA items size for this inode.
5757  * @end:        the offset of the last extent.
5758  *
5759  * Return 0 if no error occurred.
5760  */
5761 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5762                              struct extent_buffer *node, int slot,
5763                              unsigned int nodatasum, u64 *size, u64 *end)
5764 {
5765         struct btrfs_file_extent_item *fi;
5766         u64 disk_bytenr;
5767         u64 disk_num_bytes;
5768         u64 extent_num_bytes;
5769         u64 extent_offset;
5770         u64 csum_found;         /* In byte size, sectorsize aligned */
5771         u64 search_start;       /* Logical range start we search for csum */
5772         u64 search_len;         /* Logical range len we search for csum */
5773         unsigned int extent_type;
5774         unsigned int is_hole;
5775         int compressed = 0;
5776         int ret;
5777         int err = 0;
5778
5779         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5780
5781         /* Check inline extent */
5782         extent_type = btrfs_file_extent_type(node, fi);
5783         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5784                 struct btrfs_item *e = btrfs_item_nr(slot);
5785                 u32 item_inline_len;
5786
5787                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5788                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5789                 compressed = btrfs_file_extent_compression(node, fi);
5790                 if (extent_num_bytes == 0) {
5791                         error(
5792                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5793                                 root->objectid, fkey->objectid, fkey->offset);
5794                         err |= FILE_EXTENT_ERROR;
5795                 }
5796                 if (!compressed && extent_num_bytes != item_inline_len) {
5797                         error(
5798                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5799                                 root->objectid, fkey->objectid, fkey->offset,
5800                                 extent_num_bytes, item_inline_len);
5801                         err |= FILE_EXTENT_ERROR;
5802                 }
5803                 *end += extent_num_bytes;
5804                 *size += extent_num_bytes;
5805                 return err;
5806         }
5807
5808         /* Check extent type */
5809         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5810                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5811                 err |= FILE_EXTENT_ERROR;
5812                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5813                       root->objectid, fkey->objectid, fkey->offset);
5814                 return err;
5815         }
5816
5817         /* Check REG_EXTENT/PREALLOC_EXTENT */
5818         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5819         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5820         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5821         extent_offset = btrfs_file_extent_offset(node, fi);
5822         compressed = btrfs_file_extent_compression(node, fi);
5823         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5824
5825         /*
5826          * Check EXTENT_DATA csum
5827          *
5828          * For plain (uncompressed) extent, we should only check the range
5829          * we're referring to, as it's possible that part of prealloc extent
5830          * has been written, and has csum:
5831          *
5832          * |<--- Original large preallocated extent A ---->|
5833          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5834          *      No csum                         Has csum
5835          *
5836          * For compressed extent, we should check the whole range.
5837          */
5838         if (!compressed) {
5839                 search_start = disk_bytenr + extent_offset;
5840                 search_len = extent_num_bytes;
5841         } else {
5842                 search_start = disk_bytenr;
5843                 search_len = disk_num_bytes;
5844         }
5845         ret = count_csum_range(root, search_start, search_len, &csum_found);
5846         if (csum_found > 0 && nodatasum) {
5847                 err |= ODD_CSUM_ITEM;
5848                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5849                       root->objectid, fkey->objectid, fkey->offset);
5850         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5851                    !is_hole && (ret < 0 || csum_found < search_len)) {
5852                 err |= CSUM_ITEM_MISSING;
5853                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5854                       root->objectid, fkey->objectid, fkey->offset,
5855                       csum_found, search_len);
5856         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5857                 err |= ODD_CSUM_ITEM;
5858                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5859                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5860         }
5861
5862         /* Check EXTENT_DATA hole */
5863         if (!no_holes && *end != fkey->offset) {
5864                 if (repair)
5865                         ret = punch_extent_hole(root, fkey->objectid,
5866                                                 *end, fkey->offset - *end);
5867                 if (!repair || ret) {
5868                         err |= FILE_EXTENT_ERROR;
5869                         error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5870                               root->objectid, fkey->objectid, fkey->offset);
5871                 }
5872         }
5873
5874         *end += extent_num_bytes;
5875         if (!is_hole)
5876                 *size += extent_num_bytes;
5877
5878         return err;
5879 }
5880
5881 /*
5882  * Set inode item nbytes to @nbytes
5883  *
5884  * Returns  0     on success
5885  * Returns  != 0  on error
5886  */
5887 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5888                                       struct btrfs_path *path,
5889                                       u64 ino, u64 nbytes)
5890 {
5891         struct btrfs_trans_handle *trans;
5892         struct btrfs_inode_item *ii;
5893         struct btrfs_key key;
5894         struct btrfs_key research_key;
5895         int err = 0;
5896         int ret;
5897
5898         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5899
5900         key.objectid = ino;
5901         key.type = BTRFS_INODE_ITEM_KEY;
5902         key.offset = 0;
5903
5904         trans = btrfs_start_transaction(root, 1);
5905         if (IS_ERR(trans)) {
5906                 ret = PTR_ERR(trans);
5907                 err |= ret;
5908                 goto out;
5909         }
5910
5911         btrfs_release_path(path);
5912         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5913         if (ret > 0)
5914                 ret = -ENOENT;
5915         if (ret) {
5916                 err |= ret;
5917                 goto fail;
5918         }
5919
5920         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5921                             struct btrfs_inode_item);
5922         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5923         btrfs_mark_buffer_dirty(path->nodes[0]);
5924 fail:
5925         btrfs_commit_transaction(trans, root);
5926 out:
5927         if (ret)
5928                 error("failed to set nbytes in inode %llu root %llu",
5929                       ino, root->root_key.objectid);
5930         else
5931                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5932                        root->root_key.objectid, nbytes);
5933
5934         /* research path */
5935         btrfs_release_path(path);
5936         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5937         err |= ret;
5938
5939         return err;
5940 }
5941
5942 /*
5943  * Set directory inode isize to @isize.
5944  *
5945  * Returns 0     on success.
5946  * Returns != 0  on error.
5947  */
5948 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5949                                    struct btrfs_path *path,
5950                                    u64 ino, u64 isize)
5951 {
5952         struct btrfs_trans_handle *trans;
5953         struct btrfs_inode_item *ii;
5954         struct btrfs_key key;
5955         struct btrfs_key research_key;
5956         int ret;
5957         int err = 0;
5958
5959         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5960
5961         key.objectid = ino;
5962         key.type = BTRFS_INODE_ITEM_KEY;
5963         key.offset = 0;
5964
5965         trans = btrfs_start_transaction(root, 1);
5966         if (IS_ERR(trans)) {
5967                 ret = PTR_ERR(trans);
5968                 err |= ret;
5969                 goto out;
5970         }
5971
5972         btrfs_release_path(path);
5973         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5974         if (ret > 0)
5975                 ret = -ENOENT;
5976         if (ret) {
5977                 err |= ret;
5978                 goto fail;
5979         }
5980
5981         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5982                             struct btrfs_inode_item);
5983         btrfs_set_inode_size(path->nodes[0], ii, isize);
5984         btrfs_mark_buffer_dirty(path->nodes[0]);
5985 fail:
5986         btrfs_commit_transaction(trans, root);
5987 out:
5988         if (ret)
5989                 error("failed to set isize in inode %llu root %llu",
5990                       ino, root->root_key.objectid);
5991         else
5992                 printf("Set isize in inode %llu root %llu to %llu\n",
5993                        ino, root->root_key.objectid, isize);
5994
5995         btrfs_release_path(path);
5996         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5997         err |= ret;
5998
5999         return err;
6000 }
6001
6002 /*
6003  * Wrapper function for btrfs_add_orphan_item().
6004  *
6005  * Returns 0     on success.
6006  * Returns != 0  on error.
6007  */
6008 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
6009                                            struct btrfs_path *path, u64 ino)
6010 {
6011         struct btrfs_trans_handle *trans;
6012         struct btrfs_key research_key;
6013         int ret;
6014         int err = 0;
6015
6016         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
6017
6018         trans = btrfs_start_transaction(root, 1);
6019         if (IS_ERR(trans)) {
6020                 ret = PTR_ERR(trans);
6021                 err |= ret;
6022                 goto out;
6023         }
6024
6025         btrfs_release_path(path);
6026         ret = btrfs_add_orphan_item(trans, root, path, ino);
6027         err |= ret;
6028         btrfs_commit_transaction(trans, root);
6029 out:
6030         if (ret)
6031                 error("failed to add inode %llu as orphan item root %llu",
6032                       ino, root->root_key.objectid);
6033         else
6034                 printf("Added inode %llu as orphan item root %llu\n",
6035                        ino, root->root_key.objectid);
6036
6037         btrfs_release_path(path);
6038         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6039         err |= ret;
6040
6041         return err;
6042 }
6043
6044 /* Set inode_item nlink to @ref_count.
6045  * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
6046  *
6047  * Returns 0 on success
6048  */
6049 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
6050                                       struct btrfs_path *path, u64 ino,
6051                                       const char *name, u32 namelen,
6052                                       u64 ref_count, u8 filetype, u64 *nlink)
6053 {
6054         struct btrfs_trans_handle *trans;
6055         struct btrfs_inode_item *ii;
6056         struct btrfs_key key;
6057         struct btrfs_key old_key;
6058         char namebuf[BTRFS_NAME_LEN] = {0};
6059         int name_len;
6060         int ret;
6061         int ret2;
6062
6063         /* save the key */
6064         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
6065
6066         if (name && namelen) {
6067                 ASSERT(namelen <= BTRFS_NAME_LEN);
6068                 memcpy(namebuf, name, namelen);
6069                 name_len = namelen;
6070         } else {
6071                 sprintf(namebuf, "%llu", ino);
6072                 name_len = count_digits(ino);
6073                 printf("Can't find file name for inode %llu, use %s instead\n",
6074                        ino, namebuf);
6075         }
6076
6077         trans = btrfs_start_transaction(root, 1);
6078         if (IS_ERR(trans)) {
6079                 ret = PTR_ERR(trans);
6080                 goto out;
6081         }
6082
6083         btrfs_release_path(path);
6084         /* if refs is 0, put it into lostfound */
6085         if (ref_count == 0) {
6086                 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
6087                                               name_len, filetype, &ref_count);
6088                 if (ret)
6089                         goto fail;
6090         }
6091
6092         /* reset inode_item's nlink to ref_count */
6093         key.objectid = ino;
6094         key.type = BTRFS_INODE_ITEM_KEY;
6095         key.offset = 0;
6096
6097         btrfs_release_path(path);
6098         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6099         if (ret > 0)
6100                 ret = -ENOENT;
6101         if (ret)
6102                 goto fail;
6103
6104         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
6105                             struct btrfs_inode_item);
6106         btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
6107         btrfs_mark_buffer_dirty(path->nodes[0]);
6108
6109         if (nlink)
6110                 *nlink = ref_count;
6111 fail:
6112         btrfs_commit_transaction(trans, root);
6113 out:
6114         if (ret)
6115                 error(
6116         "fail to repair nlink of inode %llu root %llu name %s filetype %u",
6117                        root->objectid, ino, namebuf, filetype);
6118         else
6119                 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
6120                        root->objectid, ino, namebuf, filetype);
6121
6122         /* research */
6123         btrfs_release_path(path);
6124         ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
6125         if (ret2 < 0)
6126                 return ret |= ret2;
6127         return ret;
6128 }
6129
6130 /*
6131  * Check INODE_ITEM and related ITEMs (the same inode number)
6132  * 1. check link count
6133  * 2. check inode ref/extref
6134  * 3. check dir item/index
6135  *
6136  * @ext_ref:    the EXTENDED_IREF feature
6137  *
6138  * Return 0 if no error occurred.
6139  * Return >0 for error or hit the traversal is done(by error bitmap)
6140  */
6141 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
6142                             unsigned int ext_ref)
6143 {
6144         struct extent_buffer *node;
6145         struct btrfs_inode_item *ii;
6146         struct btrfs_key key;
6147         struct btrfs_key last_key;
6148         u64 inode_id;
6149         u32 mode;
6150         u64 nlink;
6151         u64 nbytes;
6152         u64 isize;
6153         u64 size = 0;
6154         u64 refs = 0;
6155         u64 extent_end = 0;
6156         u64 extent_size = 0;
6157         unsigned int dir;
6158         unsigned int nodatasum;
6159         int slot;
6160         int ret;
6161         int err = 0;
6162         char namebuf[BTRFS_NAME_LEN] = {0};
6163         u32 name_len = 0;
6164
6165         node = path->nodes[0];
6166         slot = path->slots[0];
6167
6168         btrfs_item_key_to_cpu(node, &key, slot);
6169         inode_id = key.objectid;
6170
6171         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
6172                 ret = btrfs_next_item(root, path);
6173                 if (ret > 0)
6174                         err |= LAST_ITEM;
6175                 return err;
6176         }
6177
6178         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
6179         isize = btrfs_inode_size(node, ii);
6180         nbytes = btrfs_inode_nbytes(node, ii);
6181         mode = btrfs_inode_mode(node, ii);
6182         dir = imode_to_type(mode) == BTRFS_FT_DIR;
6183         nlink = btrfs_inode_nlink(node, ii);
6184         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
6185
6186         while (1) {
6187                 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
6188                 ret = btrfs_next_item(root, path);
6189                 if (ret < 0) {
6190                         /* out will fill 'err' rusing current statistics */
6191                         goto out;
6192                 } else if (ret > 0) {
6193                         err |= LAST_ITEM;
6194                         goto out;
6195                 }
6196
6197                 node = path->nodes[0];
6198                 slot = path->slots[0];
6199                 btrfs_item_key_to_cpu(node, &key, slot);
6200                 if (key.objectid != inode_id)
6201                         goto out;
6202
6203                 switch (key.type) {
6204                 case BTRFS_INODE_REF_KEY:
6205                         ret = check_inode_ref(root, &key, path, namebuf,
6206                                               &name_len, &refs, mode);
6207                         err |= ret;
6208                         break;
6209                 case BTRFS_INODE_EXTREF_KEY:
6210                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
6211                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
6212                                         root->objectid, key.objectid,
6213                                         key.offset);
6214                         ret = check_inode_extref(root, &key, node, slot, &refs,
6215                                                  mode);
6216                         err |= ret;
6217                         break;
6218                 case BTRFS_DIR_ITEM_KEY:
6219                 case BTRFS_DIR_INDEX_KEY:
6220                         if (!dir) {
6221                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
6222                                         root->objectid, inode_id,
6223                                         imode_to_type(mode), key.objectid,
6224                                         key.offset);
6225                         }
6226                         ret = check_dir_item(root, &key, path, &size, ext_ref);
6227                         err |= ret;
6228                         break;
6229                 case BTRFS_EXTENT_DATA_KEY:
6230                         if (dir) {
6231                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
6232                                         root->objectid, inode_id, key.objectid,
6233                                         key.offset);
6234                         }
6235                         ret = check_file_extent(root, &key, node, slot,
6236                                                 nodatasum, &extent_size,
6237                                                 &extent_end);
6238                         err |= ret;
6239                         break;
6240                 case BTRFS_XATTR_ITEM_KEY:
6241                         break;
6242                 default:
6243                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
6244                               key.objectid, key.type, key.offset);
6245                 }
6246         }
6247
6248 out:
6249         if (err & LAST_ITEM) {
6250                 btrfs_release_path(path);
6251                 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
6252                 if (ret)
6253                         return err;
6254         }
6255
6256         /* verify INODE_ITEM nlink/isize/nbytes */
6257         if (dir) {
6258                 if (repair && (err & DIR_COUNT_AGAIN)) {
6259                         err &= ~DIR_COUNT_AGAIN;
6260                         count_dir_isize(root, inode_id, &size);
6261                 }
6262
6263                 if ((nlink != 1 || refs != 1) && repair) {
6264                         ret = repair_inode_nlinks_lowmem(root, path, inode_id,
6265                                 namebuf, name_len, refs, imode_to_type(mode),
6266                                 &nlink);
6267                 }
6268
6269                 if (nlink != 1) {
6270                         err |= LINK_COUNT_ERROR;
6271                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
6272                               root->objectid, inode_id, nlink);
6273                 }
6274
6275                 /*
6276                  * Just a warning, as dir inode nbytes is just an
6277                  * instructive value.
6278                  */
6279                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
6280                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
6281                                 root->objectid, inode_id,
6282                                 root->fs_info->nodesize);
6283                 }
6284
6285                 if (isize != size) {
6286                         if (repair)
6287                                 ret = repair_dir_isize_lowmem(root, path,
6288                                                               inode_id, size);
6289                         if (!repair || ret) {
6290                                 err |= ISIZE_ERROR;
6291                                 error(
6292                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
6293                                       root->objectid, inode_id, isize, size);
6294                         }
6295                 }
6296         } else {
6297                 if (nlink != refs) {
6298                         if (repair)
6299                                 ret = repair_inode_nlinks_lowmem(root, path,
6300                                          inode_id, namebuf, name_len, refs,
6301                                          imode_to_type(mode), &nlink);
6302                         if (!repair || ret) {
6303                                 err |= LINK_COUNT_ERROR;
6304                                 error(
6305                 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
6306                                       root->objectid, inode_id, nlink, refs);
6307                         }
6308                 } else if (!nlink) {
6309                         if (repair)
6310                                 ret = repair_inode_orphan_item_lowmem(root,
6311                                                               path, inode_id);
6312                         if (!repair || ret) {
6313                                 err |= ORPHAN_ITEM;
6314                                 error("root %llu INODE[%llu] is orphan item",
6315                                       root->objectid, inode_id);
6316                         }
6317                 }
6318
6319                 if (!nbytes && !no_holes && extent_end < isize) {
6320                         if (repair)
6321                                 ret = punch_extent_hole(root, inode_id,
6322                                                 extent_end, isize - extent_end);
6323                         if (!repair || ret) {
6324                                 err |= NBYTES_ERROR;
6325                                 error(
6326         "root %llu INODE[%llu] size %llu should have a file extent hole",
6327                                       root->objectid, inode_id, isize);
6328                         }
6329                 }
6330
6331                 if (nbytes != extent_size) {
6332                         if (repair)
6333                                 ret = repair_inode_nbytes_lowmem(root, path,
6334                                                          inode_id, extent_size);
6335                         if (!repair || ret) {
6336                                 err |= NBYTES_ERROR;
6337                                 error(
6338         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6339                                       root->objectid, inode_id, nbytes,
6340                                       extent_size);
6341                         }
6342                 }
6343         }
6344
6345         if (err & LAST_ITEM)
6346                 btrfs_next_item(root, path);
6347         return err;
6348 }
6349
6350 /*
6351  * Insert the missing inode item and inode ref.
6352  *
6353  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6354  * Root dir should be handled specially because root dir is the root of fs.
6355  *
6356  * returns err (>0 or 0) after repair
6357  */
6358 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6359 {
6360         struct btrfs_trans_handle *trans;
6361         struct btrfs_key key;
6362         struct btrfs_path path;
6363         int filetype = BTRFS_FT_DIR;
6364         int ret = 0;
6365
6366         btrfs_init_path(&path);
6367
6368         if (err & INODE_REF_MISSING) {
6369                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6370                 key.type = BTRFS_INODE_REF_KEY;
6371                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6372
6373                 trans = btrfs_start_transaction(root, 1);
6374                 if (IS_ERR(trans)) {
6375                         ret = PTR_ERR(trans);
6376                         goto out;
6377                 }
6378
6379                 btrfs_release_path(&path);
6380                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6381                 if (ret)
6382                         goto trans_fail;
6383
6384                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6385                                              BTRFS_FIRST_FREE_OBJECTID,
6386                                              BTRFS_FIRST_FREE_OBJECTID, 0);
6387                 if (ret)
6388                         goto trans_fail;
6389
6390                 printf("Add INODE_REF[%llu %llu] name %s\n",
6391                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6392                        "..");
6393                 err &= ~INODE_REF_MISSING;
6394 trans_fail:
6395                 if (ret)
6396                         error("fail to insert first inode's ref");
6397                 btrfs_commit_transaction(trans, root);
6398         }
6399
6400         if (err & INODE_ITEM_MISSING) {
6401                 ret = repair_inode_item_missing(root,
6402                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
6403                 if (ret)
6404                         goto out;
6405                 err &= ~INODE_ITEM_MISSING;
6406         }
6407 out:
6408         if (ret)
6409                 error("fail to repair first inode");
6410         btrfs_release_path(&path);
6411         return err;
6412 }
6413
6414 /*
6415  * check first root dir's inode_item and inode_ref
6416  *
6417  * returns 0 means no error
6418  * returns >0 means error
6419  * returns <0 means fatal error
6420  */
6421 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6422 {
6423         struct btrfs_path path;
6424         struct btrfs_key key;
6425         struct btrfs_inode_item *ii;
6426         u64 index;
6427         u32 mode;
6428         int err = 0;
6429         int ret;
6430
6431         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6432         key.type = BTRFS_INODE_ITEM_KEY;
6433         key.offset = 0;
6434
6435         /* For root being dropped, we don't need to check first inode */
6436         if (btrfs_root_refs(&root->root_item) == 0 &&
6437             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6438             BTRFS_FIRST_FREE_OBJECTID)
6439                 return 0;
6440
6441         btrfs_init_path(&path);
6442         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6443         if (ret < 0)
6444                 goto out;
6445         if (ret > 0) {
6446                 ret = 0;
6447                 err |= INODE_ITEM_MISSING;
6448         } else {
6449                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6450                                     struct btrfs_inode_item);
6451                 mode = btrfs_inode_mode(path.nodes[0], ii);
6452                 if (imode_to_type(mode) != BTRFS_FT_DIR)
6453                         err |= INODE_ITEM_MISMATCH;
6454         }
6455
6456         /* lookup first inode ref */
6457         key.offset = BTRFS_FIRST_FREE_OBJECTID;
6458         key.type = BTRFS_INODE_REF_KEY;
6459         /* special index value */
6460         index = 0;
6461
6462         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6463         if (ret < 0)
6464                 goto out;
6465         err |= ret;
6466
6467 out:
6468         btrfs_release_path(&path);
6469
6470         if (err && repair)
6471                 err = repair_fs_first_inode(root, err);
6472
6473         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6474                 error("root dir INODE_ITEM is %s",
6475                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6476         if (err & INODE_REF_MISSING)
6477                 error("root dir INODE_REF is missing");
6478
6479         return ret < 0 ? ret : err;
6480 }
6481
6482 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6483                                                 u64 parent, u64 root)
6484 {
6485         struct rb_node *node;
6486         struct tree_backref *back = NULL;
6487         struct tree_backref match = {
6488                 .node = {
6489                         .is_data = 0,
6490                 },
6491         };
6492
6493         if (parent) {
6494                 match.parent = parent;
6495                 match.node.full_backref = 1;
6496         } else {
6497                 match.root = root;
6498         }
6499
6500         node = rb_search(&rec->backref_tree, &match.node.node,
6501                          (rb_compare_keys)compare_extent_backref, NULL);
6502         if (node)
6503                 back = to_tree_backref(rb_node_to_extent_backref(node));
6504
6505         return back;
6506 }
6507
6508 static struct data_backref *find_data_backref(struct extent_record *rec,
6509                                                 u64 parent, u64 root,
6510                                                 u64 owner, u64 offset,
6511                                                 int found_ref,
6512                                                 u64 disk_bytenr, u64 bytes)
6513 {
6514         struct rb_node *node;
6515         struct data_backref *back = NULL;
6516         struct data_backref match = {
6517                 .node = {
6518                         .is_data = 1,
6519                 },
6520                 .owner = owner,
6521                 .offset = offset,
6522                 .bytes = bytes,
6523                 .found_ref = found_ref,
6524                 .disk_bytenr = disk_bytenr,
6525         };
6526
6527         if (parent) {
6528                 match.parent = parent;
6529                 match.node.full_backref = 1;
6530         } else {
6531                 match.root = root;
6532         }
6533
6534         node = rb_search(&rec->backref_tree, &match.node.node,
6535                          (rb_compare_keys)compare_extent_backref, NULL);
6536         if (node)
6537                 back = to_data_backref(rb_node_to_extent_backref(node));
6538
6539         return back;
6540 }
6541 /*
6542  * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6543  * blocks and integrity of fs tree items.
6544  *
6545  * @root:         the root of the tree to be checked.
6546  * @ext_ref       feature EXTENDED_IREF is enable or not.
6547  * @account       if NOT 0 means check the tree (including tree)'s treeblocks.
6548  *                otherwise means check fs tree(s) items relationship and
6549  *                @root MUST be a fs tree root.
6550  * Returns 0      represents OK.
6551  * Returns not 0  represents error.
6552  */
6553 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6554                             struct btrfs_root *root, unsigned int ext_ref,
6555                             int check_all)
6556
6557 {
6558         struct btrfs_path path;
6559         struct node_refs nrefs;
6560         struct btrfs_root_item *root_item = &root->root_item;
6561         int ret;
6562         int level;
6563         int err = 0;
6564
6565         memset(&nrefs, 0, sizeof(nrefs));
6566         if (!check_all) {
6567                 /*
6568                  * We need to manually check the first inode item (256)
6569                  * As the following traversal function will only start from
6570                  * the first inode item in the leaf, if inode item (256) is
6571                  * missing we will skip it forever.
6572                  */
6573                 ret = check_fs_first_inode(root, ext_ref);
6574                 if (ret < 0)
6575                         return ret;
6576         }
6577
6578
6579         level = btrfs_header_level(root->node);
6580         btrfs_init_path(&path);
6581
6582         if (btrfs_root_refs(root_item) > 0 ||
6583             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6584                 path.nodes[level] = root->node;
6585                 path.slots[level] = 0;
6586                 extent_buffer_get(root->node);
6587         } else {
6588                 struct btrfs_key key;
6589
6590                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6591                 level = root_item->drop_level;
6592                 path.lowest_level = level;
6593                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6594                 if (ret < 0)
6595                         goto out;
6596                 ret = 0;
6597         }
6598
6599         while (1) {
6600                 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6601                                         ext_ref, check_all);
6602
6603                 err |= !!ret;
6604
6605                 /* if ret is negative, walk shall stop */
6606                 if (ret < 0) {
6607                         ret = err;
6608                         break;
6609                 }
6610
6611                 ret = walk_up_tree_v2(root, &path, &level);
6612                 if (ret != 0) {
6613                         /* Normal exit, reset ret to err */
6614                         ret = err;
6615                         break;
6616                 }
6617         }
6618
6619 out:
6620         btrfs_release_path(&path);
6621         return ret;
6622 }
6623
6624 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info);
6625
6626 /*
6627  * Iterate all items in the tree and call check_inode_item() to check.
6628  *
6629  * @root:       the root of the tree to be checked.
6630  * @ext_ref:    the EXTENDED_IREF feature
6631  *
6632  * Return 0 if no error found.
6633  * Return <0 for error.
6634  */
6635 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6636 {
6637         reset_cached_block_groups(root->fs_info);
6638         return check_btrfs_root(NULL, root, ext_ref, 0);
6639 }
6640
6641 /*
6642  * Find the relative ref for root_ref and root_backref.
6643  *
6644  * @root:       the root of the root tree.
6645  * @ref_key:    the key of the root ref.
6646  *
6647  * Return 0 if no error occurred.
6648  */
6649 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6650                           struct extent_buffer *node, int slot)
6651 {
6652         struct btrfs_path path;
6653         struct btrfs_key key;
6654         struct btrfs_root_ref *ref;
6655         struct btrfs_root_ref *backref;
6656         char ref_name[BTRFS_NAME_LEN] = {0};
6657         char backref_name[BTRFS_NAME_LEN] = {0};
6658         u64 ref_dirid;
6659         u64 ref_seq;
6660         u32 ref_namelen;
6661         u64 backref_dirid;
6662         u64 backref_seq;
6663         u32 backref_namelen;
6664         u32 len;
6665         int ret;
6666         int err = 0;
6667
6668         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6669         ref_dirid = btrfs_root_ref_dirid(node, ref);
6670         ref_seq = btrfs_root_ref_sequence(node, ref);
6671         ref_namelen = btrfs_root_ref_name_len(node, ref);
6672
6673         if (ref_namelen <= BTRFS_NAME_LEN) {
6674                 len = ref_namelen;
6675         } else {
6676                 len = BTRFS_NAME_LEN;
6677                 warning("%s[%llu %llu] ref_name too long",
6678                         ref_key->type == BTRFS_ROOT_REF_KEY ?
6679                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6680                         ref_key->offset);
6681         }
6682         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6683
6684         /* Find relative root_ref */
6685         key.objectid = ref_key->offset;
6686         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6687         key.offset = ref_key->objectid;
6688
6689         btrfs_init_path(&path);
6690         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6691         if (ret) {
6692                 err |= ROOT_REF_MISSING;
6693                 error("%s[%llu %llu] couldn't find relative ref",
6694                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6695                       "ROOT_REF" : "ROOT_BACKREF",
6696                       ref_key->objectid, ref_key->offset);
6697                 goto out;
6698         }
6699
6700         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6701                                  struct btrfs_root_ref);
6702         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6703         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6704         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6705
6706         if (backref_namelen <= BTRFS_NAME_LEN) {
6707                 len = backref_namelen;
6708         } else {
6709                 len = BTRFS_NAME_LEN;
6710                 warning("%s[%llu %llu] ref_name too long",
6711                         key.type == BTRFS_ROOT_REF_KEY ?
6712                         "ROOT_REF" : "ROOT_BACKREF",
6713                         key.objectid, key.offset);
6714         }
6715         read_extent_buffer(path.nodes[0], backref_name,
6716                            (unsigned long)(backref + 1), len);
6717
6718         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6719             ref_namelen != backref_namelen ||
6720             strncmp(ref_name, backref_name, len)) {
6721                 err |= ROOT_REF_MISMATCH;
6722                 error("%s[%llu %llu] mismatch relative ref",
6723                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6724                       "ROOT_REF" : "ROOT_BACKREF",
6725                       ref_key->objectid, ref_key->offset);
6726         }
6727 out:
6728         btrfs_release_path(&path);
6729         return err;
6730 }
6731
6732 /*
6733  * Check all fs/file tree in low_memory mode.
6734  *
6735  * 1. for fs tree root item, call check_fs_root_v2()
6736  * 2. for fs tree root ref/backref, call check_root_ref()
6737  *
6738  * Return 0 if no error occurred.
6739  */
6740 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6741 {
6742         struct btrfs_root *tree_root = fs_info->tree_root;
6743         struct btrfs_root *cur_root = NULL;
6744         struct btrfs_path path;
6745         struct btrfs_key key;
6746         struct extent_buffer *node;
6747         unsigned int ext_ref;
6748         int slot;
6749         int ret;
6750         int err = 0;
6751
6752         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6753
6754         btrfs_init_path(&path);
6755         key.objectid = BTRFS_FS_TREE_OBJECTID;
6756         key.offset = 0;
6757         key.type = BTRFS_ROOT_ITEM_KEY;
6758
6759         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6760         if (ret < 0) {
6761                 err = ret;
6762                 goto out;
6763         } else if (ret > 0) {
6764                 err = -ENOENT;
6765                 goto out;
6766         }
6767
6768         while (1) {
6769                 node = path.nodes[0];
6770                 slot = path.slots[0];
6771                 btrfs_item_key_to_cpu(node, &key, slot);
6772                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6773                         goto out;
6774                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6775                     fs_root_objectid(key.objectid)) {
6776                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6777                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6778                                                                        &key);
6779                         } else {
6780                                 key.offset = (u64)-1;
6781                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6782                         }
6783
6784                         if (IS_ERR(cur_root)) {
6785                                 error("Fail to read fs/subvol tree: %lld",
6786                                       key.objectid);
6787                                 err = -EIO;
6788                                 goto next;
6789                         }
6790
6791                         ret = check_fs_root_v2(cur_root, ext_ref);
6792                         err |= ret;
6793
6794                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6795                                 btrfs_free_fs_root(cur_root);
6796                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6797                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6798                         ret = check_root_ref(tree_root, &key, node, slot);
6799                         err |= ret;
6800                 }
6801 next:
6802                 ret = btrfs_next_item(tree_root, &path);
6803                 if (ret > 0)
6804                         goto out;
6805                 if (ret < 0) {
6806                         err = ret;
6807                         goto out;
6808                 }
6809         }
6810
6811 out:
6812         btrfs_release_path(&path);
6813         return err;
6814 }
6815
6816 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6817                           struct cache_tree *root_cache)
6818 {
6819         int ret;
6820
6821         if (!ctx.progress_enabled)
6822                 fprintf(stderr, "checking fs roots\n");
6823         if (check_mode == CHECK_MODE_LOWMEM)
6824                 ret = check_fs_roots_v2(fs_info);
6825         else
6826                 ret = check_fs_roots(fs_info, root_cache);
6827
6828         return ret;
6829 }
6830
6831 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6832 {
6833         struct extent_backref *back, *tmp;
6834         struct tree_backref *tback;
6835         struct data_backref *dback;
6836         u64 found = 0;
6837         int err = 0;
6838
6839         rbtree_postorder_for_each_entry_safe(back, tmp,
6840                                              &rec->backref_tree, node) {
6841                 if (!back->found_extent_tree) {
6842                         err = 1;
6843                         if (!print_errs)
6844                                 goto out;
6845                         if (back->is_data) {
6846                                 dback = to_data_backref(back);
6847                                 fprintf(stderr, "Data backref %llu %s %llu"
6848                                         " owner %llu offset %llu num_refs %lu"
6849                                         " not found in extent tree\n",
6850                                         (unsigned long long)rec->start,
6851                                         back->full_backref ?
6852                                         "parent" : "root",
6853                                         back->full_backref ?
6854                                         (unsigned long long)dback->parent:
6855                                         (unsigned long long)dback->root,
6856                                         (unsigned long long)dback->owner,
6857                                         (unsigned long long)dback->offset,
6858                                         (unsigned long)dback->num_refs);
6859                         } else {
6860                                 tback = to_tree_backref(back);
6861                                 fprintf(stderr, "Tree backref %llu parent %llu"
6862                                         " root %llu not found in extent tree\n",
6863                                         (unsigned long long)rec->start,
6864                                         (unsigned long long)tback->parent,
6865                                         (unsigned long long)tback->root);
6866                         }
6867                 }
6868                 if (!back->is_data && !back->found_ref) {
6869                         err = 1;
6870                         if (!print_errs)
6871                                 goto out;
6872                         tback = to_tree_backref(back);
6873                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6874                                 (unsigned long long)rec->start,
6875                                 back->full_backref ? "parent" : "root",
6876                                 back->full_backref ?
6877                                 (unsigned long long)tback->parent :
6878                                 (unsigned long long)tback->root, back);
6879                 }
6880                 if (back->is_data) {
6881                         dback = to_data_backref(back);
6882                         if (dback->found_ref != dback->num_refs) {
6883                                 err = 1;
6884                                 if (!print_errs)
6885                                         goto out;
6886                                 fprintf(stderr, "Incorrect local backref count"
6887                                         " on %llu %s %llu owner %llu"
6888                                         " offset %llu found %u wanted %u back %p\n",
6889                                         (unsigned long long)rec->start,
6890                                         back->full_backref ?
6891                                         "parent" : "root",
6892                                         back->full_backref ?
6893                                         (unsigned long long)dback->parent:
6894                                         (unsigned long long)dback->root,
6895                                         (unsigned long long)dback->owner,
6896                                         (unsigned long long)dback->offset,
6897                                         dback->found_ref, dback->num_refs, back);
6898                         }
6899                         if (dback->disk_bytenr != rec->start) {
6900                                 err = 1;
6901                                 if (!print_errs)
6902                                         goto out;
6903                                 fprintf(stderr, "Backref disk bytenr does not"
6904                                         " match extent record, bytenr=%llu, "
6905                                         "ref bytenr=%llu\n",
6906                                         (unsigned long long)rec->start,
6907                                         (unsigned long long)dback->disk_bytenr);
6908                         }
6909
6910                         if (dback->bytes != rec->nr) {
6911                                 err = 1;
6912                                 if (!print_errs)
6913                                         goto out;
6914                                 fprintf(stderr, "Backref bytes do not match "
6915                                         "extent backref, bytenr=%llu, ref "
6916                                         "bytes=%llu, backref bytes=%llu\n",
6917                                         (unsigned long long)rec->start,
6918                                         (unsigned long long)rec->nr,
6919                                         (unsigned long long)dback->bytes);
6920                         }
6921                 }
6922                 if (!back->is_data) {
6923                         found += 1;
6924                 } else {
6925                         dback = to_data_backref(back);
6926                         found += dback->found_ref;
6927                 }
6928         }
6929         if (found != rec->refs) {
6930                 err = 1;
6931                 if (!print_errs)
6932                         goto out;
6933                 fprintf(stderr, "Incorrect global backref count "
6934                         "on %llu found %llu wanted %llu\n",
6935                         (unsigned long long)rec->start,
6936                         (unsigned long long)found,
6937                         (unsigned long long)rec->refs);
6938         }
6939 out:
6940         return err;
6941 }
6942
6943 static void __free_one_backref(struct rb_node *node)
6944 {
6945         struct extent_backref *back = rb_node_to_extent_backref(node);
6946
6947         free(back);
6948 }
6949
6950 static void free_all_extent_backrefs(struct extent_record *rec)
6951 {
6952         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6953 }
6954
6955 static void free_extent_record_cache(struct cache_tree *extent_cache)
6956 {
6957         struct cache_extent *cache;
6958         struct extent_record *rec;
6959
6960         while (1) {
6961                 cache = first_cache_extent(extent_cache);
6962                 if (!cache)
6963                         break;
6964                 rec = container_of(cache, struct extent_record, cache);
6965                 remove_cache_extent(extent_cache, cache);
6966                 free_all_extent_backrefs(rec);
6967                 free(rec);
6968         }
6969 }
6970
6971 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6972                                  struct extent_record *rec)
6973 {
6974         if (rec->content_checked && rec->owner_ref_checked &&
6975             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6976             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6977             !rec->bad_full_backref && !rec->crossing_stripes &&
6978             !rec->wrong_chunk_type) {
6979                 remove_cache_extent(extent_cache, &rec->cache);
6980                 free_all_extent_backrefs(rec);
6981                 list_del_init(&rec->list);
6982                 free(rec);
6983         }
6984         return 0;
6985 }
6986
6987 static int check_owner_ref(struct btrfs_root *root,
6988                             struct extent_record *rec,
6989                             struct extent_buffer *buf)
6990 {
6991         struct extent_backref *node, *tmp;
6992         struct tree_backref *back;
6993         struct btrfs_root *ref_root;
6994         struct btrfs_key key;
6995         struct btrfs_path path;
6996         struct extent_buffer *parent;
6997         int level;
6998         int found = 0;
6999         int ret;
7000
7001         rbtree_postorder_for_each_entry_safe(node, tmp,
7002                                              &rec->backref_tree, node) {
7003                 if (node->is_data)
7004                         continue;
7005                 if (!node->found_ref)
7006                         continue;
7007                 if (node->full_backref)
7008                         continue;
7009                 back = to_tree_backref(node);
7010                 if (btrfs_header_owner(buf) == back->root)
7011                         return 0;
7012         }
7013         BUG_ON(rec->is_root);
7014
7015         /* try to find the block by search corresponding fs tree */
7016         key.objectid = btrfs_header_owner(buf);
7017         key.type = BTRFS_ROOT_ITEM_KEY;
7018         key.offset = (u64)-1;
7019
7020         ref_root = btrfs_read_fs_root(root->fs_info, &key);
7021         if (IS_ERR(ref_root))
7022                 return 1;
7023
7024         level = btrfs_header_level(buf);
7025         if (level == 0)
7026                 btrfs_item_key_to_cpu(buf, &key, 0);
7027         else
7028                 btrfs_node_key_to_cpu(buf, &key, 0);
7029
7030         btrfs_init_path(&path);
7031         path.lowest_level = level + 1;
7032         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
7033         if (ret < 0)
7034                 return 0;
7035
7036         parent = path.nodes[level + 1];
7037         if (parent && buf->start == btrfs_node_blockptr(parent,
7038                                                         path.slots[level + 1]))
7039                 found = 1;
7040
7041         btrfs_release_path(&path);
7042         return found ? 0 : 1;
7043 }
7044
7045 static int is_extent_tree_record(struct extent_record *rec)
7046 {
7047         struct extent_backref *node, *tmp;
7048         struct tree_backref *back;
7049         int is_extent = 0;
7050
7051         rbtree_postorder_for_each_entry_safe(node, tmp,
7052                                              &rec->backref_tree, node) {
7053                 if (node->is_data)
7054                         return 0;
7055                 back = to_tree_backref(node);
7056                 if (node->full_backref)
7057                         return 0;
7058                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
7059                         is_extent = 1;
7060         }
7061         return is_extent;
7062 }
7063
7064
7065 static int record_bad_block_io(struct btrfs_fs_info *info,
7066                                struct cache_tree *extent_cache,
7067                                u64 start, u64 len)
7068 {
7069         struct extent_record *rec;
7070         struct cache_extent *cache;
7071         struct btrfs_key key;
7072
7073         cache = lookup_cache_extent(extent_cache, start, len);
7074         if (!cache)
7075                 return 0;
7076
7077         rec = container_of(cache, struct extent_record, cache);
7078         if (!is_extent_tree_record(rec))
7079                 return 0;
7080
7081         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
7082         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
7083 }
7084
7085 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
7086                        struct extent_buffer *buf, int slot)
7087 {
7088         if (btrfs_header_level(buf)) {
7089                 struct btrfs_key_ptr ptr1, ptr2;
7090
7091                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
7092                                    sizeof(struct btrfs_key_ptr));
7093                 read_extent_buffer(buf, &ptr2,
7094                                    btrfs_node_key_ptr_offset(slot + 1),
7095                                    sizeof(struct btrfs_key_ptr));
7096                 write_extent_buffer(buf, &ptr1,
7097                                     btrfs_node_key_ptr_offset(slot + 1),
7098                                     sizeof(struct btrfs_key_ptr));
7099                 write_extent_buffer(buf, &ptr2,
7100                                     btrfs_node_key_ptr_offset(slot),
7101                                     sizeof(struct btrfs_key_ptr));
7102                 if (slot == 0) {
7103                         struct btrfs_disk_key key;
7104                         btrfs_node_key(buf, &key, 0);
7105                         btrfs_fixup_low_keys(root, path, &key,
7106                                              btrfs_header_level(buf) + 1);
7107                 }
7108         } else {
7109                 struct btrfs_item *item1, *item2;
7110                 struct btrfs_key k1, k2;
7111                 char *item1_data, *item2_data;
7112                 u32 item1_offset, item2_offset, item1_size, item2_size;
7113
7114                 item1 = btrfs_item_nr(slot);
7115                 item2 = btrfs_item_nr(slot + 1);
7116                 btrfs_item_key_to_cpu(buf, &k1, slot);
7117                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
7118                 item1_offset = btrfs_item_offset(buf, item1);
7119                 item2_offset = btrfs_item_offset(buf, item2);
7120                 item1_size = btrfs_item_size(buf, item1);
7121                 item2_size = btrfs_item_size(buf, item2);
7122
7123                 item1_data = malloc(item1_size);
7124                 if (!item1_data)
7125                         return -ENOMEM;
7126                 item2_data = malloc(item2_size);
7127                 if (!item2_data) {
7128                         free(item1_data);
7129                         return -ENOMEM;
7130                 }
7131
7132                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
7133                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
7134
7135                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
7136                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
7137                 free(item1_data);
7138                 free(item2_data);
7139
7140                 btrfs_set_item_offset(buf, item1, item2_offset);
7141                 btrfs_set_item_offset(buf, item2, item1_offset);
7142                 btrfs_set_item_size(buf, item1, item2_size);
7143                 btrfs_set_item_size(buf, item2, item1_size);
7144
7145                 path->slots[0] = slot;
7146                 btrfs_set_item_key_unsafe(root, path, &k2);
7147                 path->slots[0] = slot + 1;
7148                 btrfs_set_item_key_unsafe(root, path, &k1);
7149         }
7150         return 0;
7151 }
7152
7153 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
7154 {
7155         struct extent_buffer *buf;
7156         struct btrfs_key k1, k2;
7157         int i;
7158         int level = path->lowest_level;
7159         int ret = -EIO;
7160
7161         buf = path->nodes[level];
7162         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
7163                 if (level) {
7164                         btrfs_node_key_to_cpu(buf, &k1, i);
7165                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
7166                 } else {
7167                         btrfs_item_key_to_cpu(buf, &k1, i);
7168                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
7169                 }
7170                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
7171                         continue;
7172                 ret = swap_values(root, path, buf, i);
7173                 if (ret)
7174                         break;
7175                 btrfs_mark_buffer_dirty(buf);
7176                 i = 0;
7177         }
7178         return ret;
7179 }
7180
7181 static int delete_bogus_item(struct btrfs_root *root,
7182                              struct btrfs_path *path,
7183                              struct extent_buffer *buf, int slot)
7184 {
7185         struct btrfs_key key;
7186         int nritems = btrfs_header_nritems(buf);
7187
7188         btrfs_item_key_to_cpu(buf, &key, slot);
7189
7190         /* These are all the keys we can deal with missing. */
7191         if (key.type != BTRFS_DIR_INDEX_KEY &&
7192             key.type != BTRFS_EXTENT_ITEM_KEY &&
7193             key.type != BTRFS_METADATA_ITEM_KEY &&
7194             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7195             key.type != BTRFS_EXTENT_DATA_REF_KEY)
7196                 return -1;
7197
7198         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
7199                (unsigned long long)key.objectid, key.type,
7200                (unsigned long long)key.offset, slot, buf->start);
7201         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
7202                               btrfs_item_nr_offset(slot + 1),
7203                               sizeof(struct btrfs_item) *
7204                               (nritems - slot - 1));
7205         btrfs_set_header_nritems(buf, nritems - 1);
7206         if (slot == 0) {
7207                 struct btrfs_disk_key disk_key;
7208
7209                 btrfs_item_key(buf, &disk_key, 0);
7210                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
7211         }
7212         btrfs_mark_buffer_dirty(buf);
7213         return 0;
7214 }
7215
7216 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
7217 {
7218         struct extent_buffer *buf;
7219         int i;
7220         int ret = 0;
7221
7222         /* We should only get this for leaves */
7223         BUG_ON(path->lowest_level);
7224         buf = path->nodes[0];
7225 again:
7226         for (i = 0; i < btrfs_header_nritems(buf); i++) {
7227                 unsigned int shift = 0, offset;
7228
7229                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
7230                     BTRFS_LEAF_DATA_SIZE(root)) {
7231                         if (btrfs_item_end_nr(buf, i) >
7232                             BTRFS_LEAF_DATA_SIZE(root)) {
7233                                 ret = delete_bogus_item(root, path, buf, i);
7234                                 if (!ret)
7235                                         goto again;
7236                                 fprintf(stderr, "item is off the end of the "
7237                                         "leaf, can't fix\n");
7238                                 ret = -EIO;
7239                                 break;
7240                         }
7241                         shift = BTRFS_LEAF_DATA_SIZE(root) -
7242                                 btrfs_item_end_nr(buf, i);
7243                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
7244                            btrfs_item_offset_nr(buf, i - 1)) {
7245                         if (btrfs_item_end_nr(buf, i) >
7246                             btrfs_item_offset_nr(buf, i - 1)) {
7247                                 ret = delete_bogus_item(root, path, buf, i);
7248                                 if (!ret)
7249                                         goto again;
7250                                 fprintf(stderr, "items overlap, can't fix\n");
7251                                 ret = -EIO;
7252                                 break;
7253                         }
7254                         shift = btrfs_item_offset_nr(buf, i - 1) -
7255                                 btrfs_item_end_nr(buf, i);
7256                 }
7257                 if (!shift)
7258                         continue;
7259
7260                 printf("Shifting item nr %d by %u bytes in block %llu\n",
7261                        i, shift, (unsigned long long)buf->start);
7262                 offset = btrfs_item_offset_nr(buf, i);
7263                 memmove_extent_buffer(buf,
7264                                       btrfs_leaf_data(buf) + offset + shift,
7265                                       btrfs_leaf_data(buf) + offset,
7266                                       btrfs_item_size_nr(buf, i));
7267                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
7268                                       offset + shift);
7269                 btrfs_mark_buffer_dirty(buf);
7270         }
7271
7272         /*
7273          * We may have moved things, in which case we want to exit so we don't
7274          * write those changes out.  Once we have proper abort functionality in
7275          * progs this can be changed to something nicer.
7276          */
7277         BUG_ON(ret);
7278         return ret;
7279 }
7280
7281 /*
7282  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
7283  * then just return -EIO.
7284  */
7285 static int try_to_fix_bad_block(struct btrfs_root *root,
7286                                 struct extent_buffer *buf,
7287                                 enum btrfs_tree_block_status status)
7288 {
7289         struct btrfs_trans_handle *trans;
7290         struct ulist *roots;
7291         struct ulist_node *node;
7292         struct btrfs_root *search_root;
7293         struct btrfs_path path;
7294         struct ulist_iterator iter;
7295         struct btrfs_key root_key, key;
7296         int ret;
7297
7298         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
7299             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7300                 return -EIO;
7301
7302         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
7303         if (ret)
7304                 return -EIO;
7305
7306         btrfs_init_path(&path);
7307         ULIST_ITER_INIT(&iter);
7308         while ((node = ulist_next(roots, &iter))) {
7309                 root_key.objectid = node->val;
7310                 root_key.type = BTRFS_ROOT_ITEM_KEY;
7311                 root_key.offset = (u64)-1;
7312
7313                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
7314                 if (IS_ERR(root)) {
7315                         ret = -EIO;
7316                         break;
7317                 }
7318
7319
7320                 trans = btrfs_start_transaction(search_root, 0);
7321                 if (IS_ERR(trans)) {
7322                         ret = PTR_ERR(trans);
7323                         break;
7324                 }
7325
7326                 path.lowest_level = btrfs_header_level(buf);
7327                 path.skip_check_block = 1;
7328                 if (path.lowest_level)
7329                         btrfs_node_key_to_cpu(buf, &key, 0);
7330                 else
7331                         btrfs_item_key_to_cpu(buf, &key, 0);
7332                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7333                 if (ret) {
7334                         ret = -EIO;
7335                         btrfs_commit_transaction(trans, search_root);
7336                         break;
7337                 }
7338                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7339                         ret = fix_key_order(search_root, &path);
7340                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7341                         ret = fix_item_offset(search_root, &path);
7342                 if (ret) {
7343                         btrfs_commit_transaction(trans, search_root);
7344                         break;
7345                 }
7346                 btrfs_release_path(&path);
7347                 btrfs_commit_transaction(trans, search_root);
7348         }
7349         ulist_free(roots);
7350         btrfs_release_path(&path);
7351         return ret;
7352 }
7353
7354 static int check_block(struct btrfs_root *root,
7355                        struct cache_tree *extent_cache,
7356                        struct extent_buffer *buf, u64 flags)
7357 {
7358         struct extent_record *rec;
7359         struct cache_extent *cache;
7360         struct btrfs_key key;
7361         enum btrfs_tree_block_status status;
7362         int ret = 0;
7363         int level;
7364
7365         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7366         if (!cache)
7367                 return 1;
7368         rec = container_of(cache, struct extent_record, cache);
7369         rec->generation = btrfs_header_generation(buf);
7370
7371         level = btrfs_header_level(buf);
7372         if (btrfs_header_nritems(buf) > 0) {
7373
7374                 if (level == 0)
7375                         btrfs_item_key_to_cpu(buf, &key, 0);
7376                 else
7377                         btrfs_node_key_to_cpu(buf, &key, 0);
7378
7379                 rec->info_objectid = key.objectid;
7380         }
7381         rec->info_level = level;
7382
7383         if (btrfs_is_leaf(buf))
7384                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7385         else
7386                 status = btrfs_check_node(root, &rec->parent_key, buf);
7387
7388         if (status != BTRFS_TREE_BLOCK_CLEAN) {
7389                 if (repair)
7390                         status = try_to_fix_bad_block(root, buf, status);
7391                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7392                         ret = -EIO;
7393                         fprintf(stderr, "bad block %llu\n",
7394                                 (unsigned long long)buf->start);
7395                 } else {
7396                         /*
7397                          * Signal to callers we need to start the scan over
7398                          * again since we'll have cowed blocks.
7399                          */
7400                         ret = -EAGAIN;
7401                 }
7402         } else {
7403                 rec->content_checked = 1;
7404                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7405                         rec->owner_ref_checked = 1;
7406                 else {
7407                         ret = check_owner_ref(root, rec, buf);
7408                         if (!ret)
7409                                 rec->owner_ref_checked = 1;
7410                 }
7411         }
7412         if (!ret)
7413                 maybe_free_extent_rec(extent_cache, rec);
7414         return ret;
7415 }
7416
7417 #if 0
7418 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7419                                                 u64 parent, u64 root)
7420 {
7421         struct list_head *cur = rec->backrefs.next;
7422         struct extent_backref *node;
7423         struct tree_backref *back;
7424
7425         while(cur != &rec->backrefs) {
7426                 node = to_extent_backref(cur);
7427                 cur = cur->next;
7428                 if (node->is_data)
7429                         continue;
7430                 back = to_tree_backref(node);
7431                 if (parent > 0) {
7432                         if (!node->full_backref)
7433                                 continue;
7434                         if (parent == back->parent)
7435                                 return back;
7436                 } else {
7437                         if (node->full_backref)
7438                                 continue;
7439                         if (back->root == root)
7440                                 return back;
7441                 }
7442         }
7443         return NULL;
7444 }
7445 #endif
7446
7447 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7448                                                 u64 parent, u64 root)
7449 {
7450         struct tree_backref *ref = malloc(sizeof(*ref));
7451
7452         if (!ref)
7453                 return NULL;
7454         memset(&ref->node, 0, sizeof(ref->node));
7455         if (parent > 0) {
7456                 ref->parent = parent;
7457                 ref->node.full_backref = 1;
7458         } else {
7459                 ref->root = root;
7460                 ref->node.full_backref = 0;
7461         }
7462
7463         return ref;
7464 }
7465
7466 #if 0
7467 static struct data_backref *find_data_backref(struct extent_record *rec,
7468                                                 u64 parent, u64 root,
7469                                                 u64 owner, u64 offset,
7470                                                 int found_ref,
7471                                                 u64 disk_bytenr, u64 bytes)
7472 {
7473         struct list_head *cur = rec->backrefs.next;
7474         struct extent_backref *node;
7475         struct data_backref *back;
7476
7477         while(cur != &rec->backrefs) {
7478                 node = to_extent_backref(cur);
7479                 cur = cur->next;
7480                 if (!node->is_data)
7481                         continue;
7482                 back = to_data_backref(node);
7483                 if (parent > 0) {
7484                         if (!node->full_backref)
7485                                 continue;
7486                         if (parent == back->parent)
7487                                 return back;
7488                 } else {
7489                         if (node->full_backref)
7490                                 continue;
7491                         if (back->root == root && back->owner == owner &&
7492                             back->offset == offset) {
7493                                 if (found_ref && node->found_ref &&
7494                                     (back->bytes != bytes ||
7495                                     back->disk_bytenr != disk_bytenr))
7496                                         continue;
7497                                 return back;
7498                         }
7499                 }
7500         }
7501         return NULL;
7502 }
7503 #endif
7504
7505 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7506                                                 u64 parent, u64 root,
7507                                                 u64 owner, u64 offset,
7508                                                 u64 max_size)
7509 {
7510         struct data_backref *ref = malloc(sizeof(*ref));
7511
7512         if (!ref)
7513                 return NULL;
7514         memset(&ref->node, 0, sizeof(ref->node));
7515         ref->node.is_data = 1;
7516
7517         if (parent > 0) {
7518                 ref->parent = parent;
7519                 ref->owner = 0;
7520                 ref->offset = 0;
7521                 ref->node.full_backref = 1;
7522         } else {
7523                 ref->root = root;
7524                 ref->owner = owner;
7525                 ref->offset = offset;
7526                 ref->node.full_backref = 0;
7527         }
7528         ref->bytes = max_size;
7529         ref->found_ref = 0;
7530         ref->num_refs = 0;
7531         if (max_size > rec->max_size)
7532                 rec->max_size = max_size;
7533         return ref;
7534 }
7535
7536 /* Check if the type of extent matches with its chunk */
7537 static void check_extent_type(struct extent_record *rec)
7538 {
7539         struct btrfs_block_group_cache *bg_cache;
7540
7541         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7542         if (!bg_cache)
7543                 return;
7544
7545         /* data extent, check chunk directly*/
7546         if (!rec->metadata) {
7547                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7548                         rec->wrong_chunk_type = 1;
7549                 return;
7550         }
7551
7552         /* metadata extent, check the obvious case first */
7553         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7554                                  BTRFS_BLOCK_GROUP_METADATA))) {
7555                 rec->wrong_chunk_type = 1;
7556                 return;
7557         }
7558
7559         /*
7560          * Check SYSTEM extent, as it's also marked as metadata, we can only
7561          * make sure it's a SYSTEM extent by its backref
7562          */
7563         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7564                 struct extent_backref *node;
7565                 struct tree_backref *tback;
7566                 u64 bg_type;
7567
7568                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7569                 if (node->is_data) {
7570                         /* tree block shouldn't have data backref */
7571                         rec->wrong_chunk_type = 1;
7572                         return;
7573                 }
7574                 tback = container_of(node, struct tree_backref, node);
7575
7576                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7577                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7578                 else
7579                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
7580                 if (!(bg_cache->flags & bg_type))
7581                         rec->wrong_chunk_type = 1;
7582         }
7583 }
7584
7585 /*
7586  * Allocate a new extent record, fill default values from @tmpl and insert int
7587  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7588  * the cache, otherwise it fails.
7589  */
7590 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7591                 struct extent_record *tmpl)
7592 {
7593         struct extent_record *rec;
7594         int ret = 0;
7595
7596         BUG_ON(tmpl->max_size == 0);
7597         rec = malloc(sizeof(*rec));
7598         if (!rec)
7599                 return -ENOMEM;
7600         rec->start = tmpl->start;
7601         rec->max_size = tmpl->max_size;
7602         rec->nr = max(tmpl->nr, tmpl->max_size);
7603         rec->found_rec = tmpl->found_rec;
7604         rec->content_checked = tmpl->content_checked;
7605         rec->owner_ref_checked = tmpl->owner_ref_checked;
7606         rec->num_duplicates = 0;
7607         rec->metadata = tmpl->metadata;
7608         rec->flag_block_full_backref = FLAG_UNSET;
7609         rec->bad_full_backref = 0;
7610         rec->crossing_stripes = 0;
7611         rec->wrong_chunk_type = 0;
7612         rec->is_root = tmpl->is_root;
7613         rec->refs = tmpl->refs;
7614         rec->extent_item_refs = tmpl->extent_item_refs;
7615         rec->parent_generation = tmpl->parent_generation;
7616         INIT_LIST_HEAD(&rec->backrefs);
7617         INIT_LIST_HEAD(&rec->dups);
7618         INIT_LIST_HEAD(&rec->list);
7619         rec->backref_tree = RB_ROOT;
7620         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7621         rec->cache.start = tmpl->start;
7622         rec->cache.size = tmpl->nr;
7623         ret = insert_cache_extent(extent_cache, &rec->cache);
7624         if (ret) {
7625                 free(rec);
7626                 return ret;
7627         }
7628         bytes_used += rec->nr;
7629
7630         if (tmpl->metadata)
7631                 rec->crossing_stripes = check_crossing_stripes(global_info,
7632                                 rec->start, global_info->nodesize);
7633         check_extent_type(rec);
7634         return ret;
7635 }
7636
7637 /*
7638  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7639  * some are hints:
7640  * - refs              - if found, increase refs
7641  * - is_root           - if found, set
7642  * - content_checked   - if found, set
7643  * - owner_ref_checked - if found, set
7644  *
7645  * If not found, create a new one, initialize and insert.
7646  */
7647 static int add_extent_rec(struct cache_tree *extent_cache,
7648                 struct extent_record *tmpl)
7649 {
7650         struct extent_record *rec;
7651         struct cache_extent *cache;
7652         int ret = 0;
7653         int dup = 0;
7654
7655         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7656         if (cache) {
7657                 rec = container_of(cache, struct extent_record, cache);
7658                 if (tmpl->refs)
7659                         rec->refs++;
7660                 if (rec->nr == 1)
7661                         rec->nr = max(tmpl->nr, tmpl->max_size);
7662
7663                 /*
7664                  * We need to make sure to reset nr to whatever the extent
7665                  * record says was the real size, this way we can compare it to
7666                  * the backrefs.
7667                  */
7668                 if (tmpl->found_rec) {
7669                         if (tmpl->start != rec->start || rec->found_rec) {
7670                                 struct extent_record *tmp;
7671
7672                                 dup = 1;
7673                                 if (list_empty(&rec->list))
7674                                         list_add_tail(&rec->list,
7675                                                       &duplicate_extents);
7676
7677                                 /*
7678                                  * We have to do this song and dance in case we
7679                                  * find an extent record that falls inside of
7680                                  * our current extent record but does not have
7681                                  * the same objectid.
7682                                  */
7683                                 tmp = malloc(sizeof(*tmp));
7684                                 if (!tmp)
7685                                         return -ENOMEM;
7686                                 tmp->start = tmpl->start;
7687                                 tmp->max_size = tmpl->max_size;
7688                                 tmp->nr = tmpl->nr;
7689                                 tmp->found_rec = 1;
7690                                 tmp->metadata = tmpl->metadata;
7691                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7692                                 INIT_LIST_HEAD(&tmp->list);
7693                                 list_add_tail(&tmp->list, &rec->dups);
7694                                 rec->num_duplicates++;
7695                         } else {
7696                                 rec->nr = tmpl->nr;
7697                                 rec->found_rec = 1;
7698                         }
7699                 }
7700
7701                 if (tmpl->extent_item_refs && !dup) {
7702                         if (rec->extent_item_refs) {
7703                                 fprintf(stderr, "block %llu rec "
7704                                         "extent_item_refs %llu, passed %llu\n",
7705                                         (unsigned long long)tmpl->start,
7706                                         (unsigned long long)
7707                                                         rec->extent_item_refs,
7708                                         (unsigned long long)tmpl->extent_item_refs);
7709                         }
7710                         rec->extent_item_refs = tmpl->extent_item_refs;
7711                 }
7712                 if (tmpl->is_root)
7713                         rec->is_root = 1;
7714                 if (tmpl->content_checked)
7715                         rec->content_checked = 1;
7716                 if (tmpl->owner_ref_checked)
7717                         rec->owner_ref_checked = 1;
7718                 memcpy(&rec->parent_key, &tmpl->parent_key,
7719                                 sizeof(tmpl->parent_key));
7720                 if (tmpl->parent_generation)
7721                         rec->parent_generation = tmpl->parent_generation;
7722                 if (rec->max_size < tmpl->max_size)
7723                         rec->max_size = tmpl->max_size;
7724
7725                 /*
7726                  * A metadata extent can't cross stripe_len boundary, otherwise
7727                  * kernel scrub won't be able to handle it.
7728                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7729                  * it.
7730                  */
7731                 if (tmpl->metadata)
7732                         rec->crossing_stripes = check_crossing_stripes(
7733                                         global_info, rec->start,
7734                                         global_info->nodesize);
7735                 check_extent_type(rec);
7736                 maybe_free_extent_rec(extent_cache, rec);
7737                 return ret;
7738         }
7739
7740         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7741
7742         return ret;
7743 }
7744
7745 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7746                             u64 parent, u64 root, int found_ref)
7747 {
7748         struct extent_record *rec;
7749         struct tree_backref *back;
7750         struct cache_extent *cache;
7751         int ret;
7752         bool insert = false;
7753
7754         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7755         if (!cache) {
7756                 struct extent_record tmpl;
7757
7758                 memset(&tmpl, 0, sizeof(tmpl));
7759                 tmpl.start = bytenr;
7760                 tmpl.nr = 1;
7761                 tmpl.metadata = 1;
7762                 tmpl.max_size = 1;
7763
7764                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7765                 if (ret)
7766                         return ret;
7767
7768                 /* really a bug in cache_extent implement now */
7769                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7770                 if (!cache)
7771                         return -ENOENT;
7772         }
7773
7774         rec = container_of(cache, struct extent_record, cache);
7775         if (rec->start != bytenr) {
7776                 /*
7777                  * Several cause, from unaligned bytenr to over lapping extents
7778                  */
7779                 return -EEXIST;
7780         }
7781
7782         back = find_tree_backref(rec, parent, root);
7783         if (!back) {
7784                 back = alloc_tree_backref(rec, parent, root);
7785                 if (!back)
7786                         return -ENOMEM;
7787                 insert = true;
7788         }
7789
7790         if (found_ref) {
7791                 if (back->node.found_ref) {
7792                         fprintf(stderr, "Extent back ref already exists "
7793                                 "for %llu parent %llu root %llu \n",
7794                                 (unsigned long long)bytenr,
7795                                 (unsigned long long)parent,
7796                                 (unsigned long long)root);
7797                 }
7798                 back->node.found_ref = 1;
7799         } else {
7800                 if (back->node.found_extent_tree) {
7801                         fprintf(stderr, "Extent back ref already exists "
7802                                 "for %llu parent %llu root %llu \n",
7803                                 (unsigned long long)bytenr,
7804                                 (unsigned long long)parent,
7805                                 (unsigned long long)root);
7806                 }
7807                 back->node.found_extent_tree = 1;
7808         }
7809         if (insert)
7810                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7811                         compare_extent_backref));
7812         check_extent_type(rec);
7813         maybe_free_extent_rec(extent_cache, rec);
7814         return 0;
7815 }
7816
7817 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7818                             u64 parent, u64 root, u64 owner, u64 offset,
7819                             u32 num_refs, int found_ref, u64 max_size)
7820 {
7821         struct extent_record *rec;
7822         struct data_backref *back;
7823         struct cache_extent *cache;
7824         int ret;
7825         bool insert = false;
7826
7827         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7828         if (!cache) {
7829                 struct extent_record tmpl;
7830
7831                 memset(&tmpl, 0, sizeof(tmpl));
7832                 tmpl.start = bytenr;
7833                 tmpl.nr = 1;
7834                 tmpl.max_size = max_size;
7835
7836                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7837                 if (ret)
7838                         return ret;
7839
7840                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7841                 if (!cache)
7842                         abort();
7843         }
7844
7845         rec = container_of(cache, struct extent_record, cache);
7846         if (rec->max_size < max_size)
7847                 rec->max_size = max_size;
7848
7849         /*
7850          * If found_ref is set then max_size is the real size and must match the
7851          * existing refs.  So if we have already found a ref then we need to
7852          * make sure that this ref matches the existing one, otherwise we need
7853          * to add a new backref so we can notice that the backrefs don't match
7854          * and we need to figure out who is telling the truth.  This is to
7855          * account for that awful fsync bug I introduced where we'd end up with
7856          * a btrfs_file_extent_item that would have its length include multiple
7857          * prealloc extents or point inside of a prealloc extent.
7858          */
7859         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7860                                  bytenr, max_size);
7861         if (!back) {
7862                 back = alloc_data_backref(rec, parent, root, owner, offset,
7863                                           max_size);
7864                 BUG_ON(!back);
7865                 insert = true;
7866         }
7867
7868         if (found_ref) {
7869                 BUG_ON(num_refs != 1);
7870                 if (back->node.found_ref)
7871                         BUG_ON(back->bytes != max_size);
7872                 back->node.found_ref = 1;
7873                 back->found_ref += 1;
7874                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7875                         back->bytes = max_size;
7876                         back->disk_bytenr = bytenr;
7877
7878                         /* Need to reinsert if not already in the tree */
7879                         if (!insert) {
7880                                 rb_erase(&back->node.node, &rec->backref_tree);
7881                                 insert = true;
7882                         }
7883                 }
7884                 rec->refs += 1;
7885                 rec->content_checked = 1;
7886                 rec->owner_ref_checked = 1;
7887         } else {
7888                 if (back->node.found_extent_tree) {
7889                         fprintf(stderr, "Extent back ref already exists "
7890                                 "for %llu parent %llu root %llu "
7891                                 "owner %llu offset %llu num_refs %lu\n",
7892                                 (unsigned long long)bytenr,
7893                                 (unsigned long long)parent,
7894                                 (unsigned long long)root,
7895                                 (unsigned long long)owner,
7896                                 (unsigned long long)offset,
7897                                 (unsigned long)num_refs);
7898                 }
7899                 back->num_refs = num_refs;
7900                 back->node.found_extent_tree = 1;
7901         }
7902         if (insert)
7903                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7904                         compare_extent_backref));
7905
7906         maybe_free_extent_rec(extent_cache, rec);
7907         return 0;
7908 }
7909
7910 static int add_pending(struct cache_tree *pending,
7911                        struct cache_tree *seen, u64 bytenr, u32 size)
7912 {
7913         int ret;
7914         ret = add_cache_extent(seen, bytenr, size);
7915         if (ret)
7916                 return ret;
7917         add_cache_extent(pending, bytenr, size);
7918         return 0;
7919 }
7920
7921 static int pick_next_pending(struct cache_tree *pending,
7922                         struct cache_tree *reada,
7923                         struct cache_tree *nodes,
7924                         u64 last, struct block_info *bits, int bits_nr,
7925                         int *reada_bits)
7926 {
7927         unsigned long node_start = last;
7928         struct cache_extent *cache;
7929         int ret;
7930
7931         cache = search_cache_extent(reada, 0);
7932         if (cache) {
7933                 bits[0].start = cache->start;
7934                 bits[0].size = cache->size;
7935                 *reada_bits = 1;
7936                 return 1;
7937         }
7938         *reada_bits = 0;
7939         if (node_start > 32768)
7940                 node_start -= 32768;
7941
7942         cache = search_cache_extent(nodes, node_start);
7943         if (!cache)
7944                 cache = search_cache_extent(nodes, 0);
7945
7946         if (!cache) {
7947                  cache = search_cache_extent(pending, 0);
7948                  if (!cache)
7949                          return 0;
7950                  ret = 0;
7951                  do {
7952                          bits[ret].start = cache->start;
7953                          bits[ret].size = cache->size;
7954                          cache = next_cache_extent(cache);
7955                          ret++;
7956                  } while (cache && ret < bits_nr);
7957                  return ret;
7958         }
7959
7960         ret = 0;
7961         do {
7962                 bits[ret].start = cache->start;
7963                 bits[ret].size = cache->size;
7964                 cache = next_cache_extent(cache);
7965                 ret++;
7966         } while (cache && ret < bits_nr);
7967
7968         if (bits_nr - ret > 8) {
7969                 u64 lookup = bits[0].start + bits[0].size;
7970                 struct cache_extent *next;
7971                 next = search_cache_extent(pending, lookup);
7972                 while(next) {
7973                         if (next->start - lookup > 32768)
7974                                 break;
7975                         bits[ret].start = next->start;
7976                         bits[ret].size = next->size;
7977                         lookup = next->start + next->size;
7978                         ret++;
7979                         if (ret == bits_nr)
7980                                 break;
7981                         next = next_cache_extent(next);
7982                         if (!next)
7983                                 break;
7984                 }
7985         }
7986         return ret;
7987 }
7988
7989 static void free_chunk_record(struct cache_extent *cache)
7990 {
7991         struct chunk_record *rec;
7992
7993         rec = container_of(cache, struct chunk_record, cache);
7994         list_del_init(&rec->list);
7995         list_del_init(&rec->dextents);
7996         free(rec);
7997 }
7998
7999 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
8000 {
8001         cache_tree_free_extents(chunk_cache, free_chunk_record);
8002 }
8003
8004 static void free_device_record(struct rb_node *node)
8005 {
8006         struct device_record *rec;
8007
8008         rec = container_of(node, struct device_record, node);
8009         free(rec);
8010 }
8011
8012 FREE_RB_BASED_TREE(device_cache, free_device_record);
8013
8014 int insert_block_group_record(struct block_group_tree *tree,
8015                               struct block_group_record *bg_rec)
8016 {
8017         int ret;
8018
8019         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
8020         if (ret)
8021                 return ret;
8022
8023         list_add_tail(&bg_rec->list, &tree->block_groups);
8024         return 0;
8025 }
8026
8027 static void free_block_group_record(struct cache_extent *cache)
8028 {
8029         struct block_group_record *rec;
8030
8031         rec = container_of(cache, struct block_group_record, cache);
8032         list_del_init(&rec->list);
8033         free(rec);
8034 }
8035
8036 void free_block_group_tree(struct block_group_tree *tree)
8037 {
8038         cache_tree_free_extents(&tree->tree, free_block_group_record);
8039 }
8040
8041 int insert_device_extent_record(struct device_extent_tree *tree,
8042                                 struct device_extent_record *de_rec)
8043 {
8044         int ret;
8045
8046         /*
8047          * Device extent is a bit different from the other extents, because
8048          * the extents which belong to the different devices may have the
8049          * same start and size, so we need use the special extent cache
8050          * search/insert functions.
8051          */
8052         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
8053         if (ret)
8054                 return ret;
8055
8056         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
8057         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
8058         return 0;
8059 }
8060
8061 static void free_device_extent_record(struct cache_extent *cache)
8062 {
8063         struct device_extent_record *rec;
8064
8065         rec = container_of(cache, struct device_extent_record, cache);
8066         if (!list_empty(&rec->chunk_list))
8067                 list_del_init(&rec->chunk_list);
8068         if (!list_empty(&rec->device_list))
8069                 list_del_init(&rec->device_list);
8070         free(rec);
8071 }
8072
8073 void free_device_extent_tree(struct device_extent_tree *tree)
8074 {
8075         cache_tree_free_extents(&tree->tree, free_device_extent_record);
8076 }
8077
8078 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8079 static int process_extent_ref_v0(struct cache_tree *extent_cache,
8080                                  struct extent_buffer *leaf, int slot)
8081 {
8082         struct btrfs_extent_ref_v0 *ref0;
8083         struct btrfs_key key;
8084         int ret;
8085
8086         btrfs_item_key_to_cpu(leaf, &key, slot);
8087         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
8088         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
8089                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
8090                                 0, 0);
8091         } else {
8092                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
8093                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
8094         }
8095         return ret;
8096 }
8097 #endif
8098
8099 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
8100                                             struct btrfs_key *key,
8101                                             int slot)
8102 {
8103         struct btrfs_chunk *ptr;
8104         struct chunk_record *rec;
8105         int num_stripes, i;
8106
8107         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
8108         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
8109
8110         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
8111         if (!rec) {
8112                 fprintf(stderr, "memory allocation failed\n");
8113                 exit(-1);
8114         }
8115
8116         INIT_LIST_HEAD(&rec->list);
8117         INIT_LIST_HEAD(&rec->dextents);
8118         rec->bg_rec = NULL;
8119
8120         rec->cache.start = key->offset;
8121         rec->cache.size = btrfs_chunk_length(leaf, ptr);
8122
8123         rec->generation = btrfs_header_generation(leaf);
8124
8125         rec->objectid = key->objectid;
8126         rec->type = key->type;
8127         rec->offset = key->offset;
8128
8129         rec->length = rec->cache.size;
8130         rec->owner = btrfs_chunk_owner(leaf, ptr);
8131         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
8132         rec->type_flags = btrfs_chunk_type(leaf, ptr);
8133         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
8134         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
8135         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
8136         rec->num_stripes = num_stripes;
8137         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
8138
8139         for (i = 0; i < rec->num_stripes; ++i) {
8140                 rec->stripes[i].devid =
8141                         btrfs_stripe_devid_nr(leaf, ptr, i);
8142                 rec->stripes[i].offset =
8143                         btrfs_stripe_offset_nr(leaf, ptr, i);
8144                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
8145                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
8146                                 BTRFS_UUID_SIZE);
8147         }
8148
8149         return rec;
8150 }
8151
8152 static int process_chunk_item(struct cache_tree *chunk_cache,
8153                               struct btrfs_key *key, struct extent_buffer *eb,
8154                               int slot)
8155 {
8156         struct chunk_record *rec;
8157         struct btrfs_chunk *chunk;
8158         int ret = 0;
8159
8160         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
8161         /*
8162          * Do extra check for this chunk item,
8163          *
8164          * It's still possible one can craft a leaf with CHUNK_ITEM, with
8165          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
8166          * and owner<->key_type check.
8167          */
8168         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
8169                                       key->offset);
8170         if (ret < 0) {
8171                 error("chunk(%llu, %llu) is not valid, ignore it",
8172                       key->offset, btrfs_chunk_length(eb, chunk));
8173                 return 0;
8174         }
8175         rec = btrfs_new_chunk_record(eb, key, slot);
8176         ret = insert_cache_extent(chunk_cache, &rec->cache);
8177         if (ret) {
8178                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
8179                         rec->offset, rec->length);
8180                 free(rec);
8181         }
8182
8183         return ret;
8184 }
8185
8186 static int process_device_item(struct rb_root *dev_cache,
8187                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
8188 {
8189         struct btrfs_dev_item *ptr;
8190         struct device_record *rec;
8191         int ret = 0;
8192
8193         ptr = btrfs_item_ptr(eb,
8194                 slot, struct btrfs_dev_item);
8195
8196         rec = malloc(sizeof(*rec));
8197         if (!rec) {
8198                 fprintf(stderr, "memory allocation failed\n");
8199                 return -ENOMEM;
8200         }
8201
8202         rec->devid = key->offset;
8203         rec->generation = btrfs_header_generation(eb);
8204
8205         rec->objectid = key->objectid;
8206         rec->type = key->type;
8207         rec->offset = key->offset;
8208
8209         rec->devid = btrfs_device_id(eb, ptr);
8210         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
8211         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
8212
8213         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
8214         if (ret) {
8215                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
8216                 free(rec);
8217         }
8218
8219         return ret;
8220 }
8221
8222 struct block_group_record *
8223 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
8224                              int slot)
8225 {
8226         struct btrfs_block_group_item *ptr;
8227         struct block_group_record *rec;
8228
8229         rec = calloc(1, sizeof(*rec));
8230         if (!rec) {
8231                 fprintf(stderr, "memory allocation failed\n");
8232                 exit(-1);
8233         }
8234
8235         rec->cache.start = key->objectid;
8236         rec->cache.size = key->offset;
8237
8238         rec->generation = btrfs_header_generation(leaf);
8239
8240         rec->objectid = key->objectid;
8241         rec->type = key->type;
8242         rec->offset = key->offset;
8243
8244         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
8245         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
8246
8247         INIT_LIST_HEAD(&rec->list);
8248
8249         return rec;
8250 }
8251
8252 static int process_block_group_item(struct block_group_tree *block_group_cache,
8253                                     struct btrfs_key *key,
8254                                     struct extent_buffer *eb, int slot)
8255 {
8256         struct block_group_record *rec;
8257         int ret = 0;
8258
8259         rec = btrfs_new_block_group_record(eb, key, slot);
8260         ret = insert_block_group_record(block_group_cache, rec);
8261         if (ret) {
8262                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
8263                         rec->objectid, rec->offset);
8264                 free(rec);
8265         }
8266
8267         return ret;
8268 }
8269
8270 struct device_extent_record *
8271 btrfs_new_device_extent_record(struct extent_buffer *leaf,
8272                                struct btrfs_key *key, int slot)
8273 {
8274         struct device_extent_record *rec;
8275         struct btrfs_dev_extent *ptr;
8276
8277         rec = calloc(1, sizeof(*rec));
8278         if (!rec) {
8279                 fprintf(stderr, "memory allocation failed\n");
8280                 exit(-1);
8281         }
8282
8283         rec->cache.objectid = key->objectid;
8284         rec->cache.start = key->offset;
8285
8286         rec->generation = btrfs_header_generation(leaf);
8287
8288         rec->objectid = key->objectid;
8289         rec->type = key->type;
8290         rec->offset = key->offset;
8291
8292         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
8293         rec->chunk_objecteid =
8294                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
8295         rec->chunk_offset =
8296                 btrfs_dev_extent_chunk_offset(leaf, ptr);
8297         rec->length = btrfs_dev_extent_length(leaf, ptr);
8298         rec->cache.size = rec->length;
8299
8300         INIT_LIST_HEAD(&rec->chunk_list);
8301         INIT_LIST_HEAD(&rec->device_list);
8302
8303         return rec;
8304 }
8305
8306 static int
8307 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
8308                            struct btrfs_key *key, struct extent_buffer *eb,
8309                            int slot)
8310 {
8311         struct device_extent_record *rec;
8312         int ret;
8313
8314         rec = btrfs_new_device_extent_record(eb, key, slot);
8315         ret = insert_device_extent_record(dev_extent_cache, rec);
8316         if (ret) {
8317                 fprintf(stderr,
8318                         "Device extent[%llu, %llu, %llu] existed.\n",
8319                         rec->objectid, rec->offset, rec->length);
8320                 free(rec);
8321         }
8322
8323         return ret;
8324 }
8325
8326 static int process_extent_item(struct btrfs_root *root,
8327                                struct cache_tree *extent_cache,
8328                                struct extent_buffer *eb, int slot)
8329 {
8330         struct btrfs_extent_item *ei;
8331         struct btrfs_extent_inline_ref *iref;
8332         struct btrfs_extent_data_ref *dref;
8333         struct btrfs_shared_data_ref *sref;
8334         struct btrfs_key key;
8335         struct extent_record tmpl;
8336         unsigned long end;
8337         unsigned long ptr;
8338         int ret;
8339         int type;
8340         u32 item_size = btrfs_item_size_nr(eb, slot);
8341         u64 refs = 0;
8342         u64 offset;
8343         u64 num_bytes;
8344         int metadata = 0;
8345
8346         btrfs_item_key_to_cpu(eb, &key, slot);
8347
8348         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8349                 metadata = 1;
8350                 num_bytes = root->fs_info->nodesize;
8351         } else {
8352                 num_bytes = key.offset;
8353         }
8354
8355         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8356                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8357                       key.objectid, root->fs_info->sectorsize);
8358                 return -EIO;
8359         }
8360         if (item_size < sizeof(*ei)) {
8361 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8362                 struct btrfs_extent_item_v0 *ei0;
8363                 BUG_ON(item_size != sizeof(*ei0));
8364                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8365                 refs = btrfs_extent_refs_v0(eb, ei0);
8366 #else
8367                 BUG();
8368 #endif
8369                 memset(&tmpl, 0, sizeof(tmpl));
8370                 tmpl.start = key.objectid;
8371                 tmpl.nr = num_bytes;
8372                 tmpl.extent_item_refs = refs;
8373                 tmpl.metadata = metadata;
8374                 tmpl.found_rec = 1;
8375                 tmpl.max_size = num_bytes;
8376
8377                 return add_extent_rec(extent_cache, &tmpl);
8378         }
8379
8380         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8381         refs = btrfs_extent_refs(eb, ei);
8382         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8383                 metadata = 1;
8384         else
8385                 metadata = 0;
8386         if (metadata && num_bytes != root->fs_info->nodesize) {
8387                 error("ignore invalid metadata extent, length %llu does not equal to %u",
8388                       num_bytes, root->fs_info->nodesize);
8389                 return -EIO;
8390         }
8391         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8392                 error("ignore invalid data extent, length %llu is not aligned to %u",
8393                       num_bytes, root->fs_info->sectorsize);
8394                 return -EIO;
8395         }
8396
8397         memset(&tmpl, 0, sizeof(tmpl));
8398         tmpl.start = key.objectid;
8399         tmpl.nr = num_bytes;
8400         tmpl.extent_item_refs = refs;
8401         tmpl.metadata = metadata;
8402         tmpl.found_rec = 1;
8403         tmpl.max_size = num_bytes;
8404         add_extent_rec(extent_cache, &tmpl);
8405
8406         ptr = (unsigned long)(ei + 1);
8407         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8408             key.type == BTRFS_EXTENT_ITEM_KEY)
8409                 ptr += sizeof(struct btrfs_tree_block_info);
8410
8411         end = (unsigned long)ei + item_size;
8412         while (ptr < end) {
8413                 iref = (struct btrfs_extent_inline_ref *)ptr;
8414                 type = btrfs_extent_inline_ref_type(eb, iref);
8415                 offset = btrfs_extent_inline_ref_offset(eb, iref);
8416                 switch (type) {
8417                 case BTRFS_TREE_BLOCK_REF_KEY:
8418                         ret = add_tree_backref(extent_cache, key.objectid,
8419                                         0, offset, 0);
8420                         if (ret < 0)
8421                                 error(
8422                         "add_tree_backref failed (extent items tree block): %s",
8423                                       strerror(-ret));
8424                         break;
8425                 case BTRFS_SHARED_BLOCK_REF_KEY:
8426                         ret = add_tree_backref(extent_cache, key.objectid,
8427                                         offset, 0, 0);
8428                         if (ret < 0)
8429                                 error(
8430                         "add_tree_backref failed (extent items shared block): %s",
8431                                       strerror(-ret));
8432                         break;
8433                 case BTRFS_EXTENT_DATA_REF_KEY:
8434                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8435                         add_data_backref(extent_cache, key.objectid, 0,
8436                                         btrfs_extent_data_ref_root(eb, dref),
8437                                         btrfs_extent_data_ref_objectid(eb,
8438                                                                        dref),
8439                                         btrfs_extent_data_ref_offset(eb, dref),
8440                                         btrfs_extent_data_ref_count(eb, dref),
8441                                         0, num_bytes);
8442                         break;
8443                 case BTRFS_SHARED_DATA_REF_KEY:
8444                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
8445                         add_data_backref(extent_cache, key.objectid, offset,
8446                                         0, 0, 0,
8447                                         btrfs_shared_data_ref_count(eb, sref),
8448                                         0, num_bytes);
8449                         break;
8450                 default:
8451                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8452                                 key.objectid, key.type, num_bytes);
8453                         goto out;
8454                 }
8455                 ptr += btrfs_extent_inline_ref_size(type);
8456         }
8457         WARN_ON(ptr > end);
8458 out:
8459         return 0;
8460 }
8461
8462 static int check_cache_range(struct btrfs_root *root,
8463                              struct btrfs_block_group_cache *cache,
8464                              u64 offset, u64 bytes)
8465 {
8466         struct btrfs_free_space *entry;
8467         u64 *logical;
8468         u64 bytenr;
8469         int stripe_len;
8470         int i, nr, ret;
8471
8472         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8473                 bytenr = btrfs_sb_offset(i);
8474                 ret = btrfs_rmap_block(root->fs_info,
8475                                        cache->key.objectid, bytenr, 0,
8476                                        &logical, &nr, &stripe_len);
8477                 if (ret)
8478                         return ret;
8479
8480                 while (nr--) {
8481                         if (logical[nr] + stripe_len <= offset)
8482                                 continue;
8483                         if (offset + bytes <= logical[nr])
8484                                 continue;
8485                         if (logical[nr] == offset) {
8486                                 if (stripe_len >= bytes) {
8487                                         free(logical);
8488                                         return 0;
8489                                 }
8490                                 bytes -= stripe_len;
8491                                 offset += stripe_len;
8492                         } else if (logical[nr] < offset) {
8493                                 if (logical[nr] + stripe_len >=
8494                                     offset + bytes) {
8495                                         free(logical);
8496                                         return 0;
8497                                 }
8498                                 bytes = (offset + bytes) -
8499                                         (logical[nr] + stripe_len);
8500                                 offset = logical[nr] + stripe_len;
8501                         } else {
8502                                 /*
8503                                  * Could be tricky, the super may land in the
8504                                  * middle of the area we're checking.  First
8505                                  * check the easiest case, it's at the end.
8506                                  */
8507                                 if (logical[nr] + stripe_len >=
8508                                     bytes + offset) {
8509                                         bytes = logical[nr] - offset;
8510                                         continue;
8511                                 }
8512
8513                                 /* Check the left side */
8514                                 ret = check_cache_range(root, cache,
8515                                                         offset,
8516                                                         logical[nr] - offset);
8517                                 if (ret) {
8518                                         free(logical);
8519                                         return ret;
8520                                 }
8521
8522                                 /* Now we continue with the right side */
8523                                 bytes = (offset + bytes) -
8524                                         (logical[nr] + stripe_len);
8525                                 offset = logical[nr] + stripe_len;
8526                         }
8527                 }
8528
8529                 free(logical);
8530         }
8531
8532         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8533         if (!entry) {
8534                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8535                         offset, offset+bytes);
8536                 return -EINVAL;
8537         }
8538
8539         if (entry->offset != offset) {
8540                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8541                         entry->offset);
8542                 return -EINVAL;
8543         }
8544
8545         if (entry->bytes != bytes) {
8546                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8547                         bytes, entry->bytes, offset);
8548                 return -EINVAL;
8549         }
8550
8551         unlink_free_space(cache->free_space_ctl, entry);
8552         free(entry);
8553         return 0;
8554 }
8555
8556 static int verify_space_cache(struct btrfs_root *root,
8557                               struct btrfs_block_group_cache *cache)
8558 {
8559         struct btrfs_path path;
8560         struct extent_buffer *leaf;
8561         struct btrfs_key key;
8562         u64 last;
8563         int ret = 0;
8564
8565         root = root->fs_info->extent_root;
8566
8567         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8568
8569         btrfs_init_path(&path);
8570         key.objectid = last;
8571         key.offset = 0;
8572         key.type = BTRFS_EXTENT_ITEM_KEY;
8573         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8574         if (ret < 0)
8575                 goto out;
8576         ret = 0;
8577         while (1) {
8578                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8579                         ret = btrfs_next_leaf(root, &path);
8580                         if (ret < 0)
8581                                 goto out;
8582                         if (ret > 0) {
8583                                 ret = 0;
8584                                 break;
8585                         }
8586                 }
8587                 leaf = path.nodes[0];
8588                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8589                 if (key.objectid >= cache->key.offset + cache->key.objectid)
8590                         break;
8591                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8592                     key.type != BTRFS_METADATA_ITEM_KEY) {
8593                         path.slots[0]++;
8594                         continue;
8595                 }
8596
8597                 if (last == key.objectid) {
8598                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
8599                                 last = key.objectid + key.offset;
8600                         else
8601                                 last = key.objectid + root->fs_info->nodesize;
8602                         path.slots[0]++;
8603                         continue;
8604                 }
8605
8606                 ret = check_cache_range(root, cache, last,
8607                                         key.objectid - last);
8608                 if (ret)
8609                         break;
8610                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8611                         last = key.objectid + key.offset;
8612                 else
8613                         last = key.objectid + root->fs_info->nodesize;
8614                 path.slots[0]++;
8615         }
8616
8617         if (last < cache->key.objectid + cache->key.offset)
8618                 ret = check_cache_range(root, cache, last,
8619                                         cache->key.objectid +
8620                                         cache->key.offset - last);
8621
8622 out:
8623         btrfs_release_path(&path);
8624
8625         if (!ret &&
8626             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8627                 fprintf(stderr, "There are still entries left in the space "
8628                         "cache\n");
8629                 ret = -EINVAL;
8630         }
8631
8632         return ret;
8633 }
8634
8635 static int check_space_cache(struct btrfs_root *root)
8636 {
8637         struct btrfs_block_group_cache *cache;
8638         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8639         int ret;
8640         int error = 0;
8641
8642         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8643             btrfs_super_generation(root->fs_info->super_copy) !=
8644             btrfs_super_cache_generation(root->fs_info->super_copy)) {
8645                 printf("cache and super generation don't match, space cache "
8646                        "will be invalidated\n");
8647                 return 0;
8648         }
8649
8650         if (ctx.progress_enabled) {
8651                 ctx.tp = TASK_FREE_SPACE;
8652                 task_start(ctx.info);
8653         }
8654
8655         while (1) {
8656                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8657                 if (!cache)
8658                         break;
8659
8660                 start = cache->key.objectid + cache->key.offset;
8661                 if (!cache->free_space_ctl) {
8662                         if (btrfs_init_free_space_ctl(cache,
8663                                                 root->fs_info->sectorsize)) {
8664                                 ret = -ENOMEM;
8665                                 break;
8666                         }
8667                 } else {
8668                         btrfs_remove_free_space_cache(cache);
8669                 }
8670
8671                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8672                         ret = exclude_super_stripes(root, cache);
8673                         if (ret) {
8674                                 fprintf(stderr, "could not exclude super stripes: %s\n",
8675                                         strerror(-ret));
8676                                 error++;
8677                                 continue;
8678                         }
8679                         ret = load_free_space_tree(root->fs_info, cache);
8680                         free_excluded_extents(root, cache);
8681                         if (ret < 0) {
8682                                 fprintf(stderr, "could not load free space tree: %s\n",
8683                                         strerror(-ret));
8684                                 error++;
8685                                 continue;
8686                         }
8687                         error += ret;
8688                 } else {
8689                         ret = load_free_space_cache(root->fs_info, cache);
8690                         if (!ret)
8691                                 continue;
8692                 }
8693
8694                 ret = verify_space_cache(root, cache);
8695                 if (ret) {
8696                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8697                                 cache->key.objectid);
8698                         error++;
8699                 }
8700         }
8701
8702         task_stop(ctx.info);
8703
8704         return error ? -EINVAL : 0;
8705 }
8706
8707 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8708                         u64 num_bytes, unsigned long leaf_offset,
8709                         struct extent_buffer *eb) {
8710
8711         struct btrfs_fs_info *fs_info = root->fs_info;
8712         u64 offset = 0;
8713         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8714         char *data;
8715         unsigned long csum_offset;
8716         u32 csum;
8717         u32 csum_expected;
8718         u64 read_len;
8719         u64 data_checked = 0;
8720         u64 tmp;
8721         int ret = 0;
8722         int mirror;
8723         int num_copies;
8724
8725         if (num_bytes % fs_info->sectorsize)
8726                 return -EINVAL;
8727
8728         data = malloc(num_bytes);
8729         if (!data)
8730                 return -ENOMEM;
8731
8732         while (offset < num_bytes) {
8733                 mirror = 0;
8734 again:
8735                 read_len = num_bytes - offset;
8736                 /* read as much space once a time */
8737                 ret = read_extent_data(fs_info, data + offset,
8738                                 bytenr + offset, &read_len, mirror);
8739                 if (ret)
8740                         goto out;
8741                 data_checked = 0;
8742                 /* verify every 4k data's checksum */
8743                 while (data_checked < read_len) {
8744                         csum = ~(u32)0;
8745                         tmp = offset + data_checked;
8746
8747                         csum = btrfs_csum_data((char *)data + tmp,
8748                                                csum, fs_info->sectorsize);
8749                         btrfs_csum_final(csum, (u8 *)&csum);
8750
8751                         csum_offset = leaf_offset +
8752                                  tmp / fs_info->sectorsize * csum_size;
8753                         read_extent_buffer(eb, (char *)&csum_expected,
8754                                            csum_offset, csum_size);
8755                         /* try another mirror */
8756                         if (csum != csum_expected) {
8757                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8758                                                 mirror, bytenr + tmp,
8759                                                 csum, csum_expected);
8760                                 num_copies = btrfs_num_copies(root->fs_info,
8761                                                 bytenr, num_bytes);
8762                                 if (mirror < num_copies - 1) {
8763                                         mirror += 1;
8764                                         goto again;
8765                                 }
8766                         }
8767                         data_checked += fs_info->sectorsize;
8768                 }
8769                 offset += read_len;
8770         }
8771 out:
8772         free(data);
8773         return ret;
8774 }
8775
8776 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8777                                u64 num_bytes)
8778 {
8779         struct btrfs_path path;
8780         struct extent_buffer *leaf;
8781         struct btrfs_key key;
8782         int ret;
8783
8784         btrfs_init_path(&path);
8785         key.objectid = bytenr;
8786         key.type = BTRFS_EXTENT_ITEM_KEY;
8787         key.offset = (u64)-1;
8788
8789 again:
8790         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8791                                 0, 0);
8792         if (ret < 0) {
8793                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8794                 btrfs_release_path(&path);
8795                 return ret;
8796         } else if (ret) {
8797                 if (path.slots[0] > 0) {
8798                         path.slots[0]--;
8799                 } else {
8800                         ret = btrfs_prev_leaf(root, &path);
8801                         if (ret < 0) {
8802                                 goto out;
8803                         } else if (ret > 0) {
8804                                 ret = 0;
8805                                 goto out;
8806                         }
8807                 }
8808         }
8809
8810         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8811
8812         /*
8813          * Block group items come before extent items if they have the same
8814          * bytenr, so walk back one more just in case.  Dear future traveller,
8815          * first congrats on mastering time travel.  Now if it's not too much
8816          * trouble could you go back to 2006 and tell Chris to make the
8817          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8818          * EXTENT_ITEM_KEY please?
8819          */
8820         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8821                 if (path.slots[0] > 0) {
8822                         path.slots[0]--;
8823                 } else {
8824                         ret = btrfs_prev_leaf(root, &path);
8825                         if (ret < 0) {
8826                                 goto out;
8827                         } else if (ret > 0) {
8828                                 ret = 0;
8829                                 goto out;
8830                         }
8831                 }
8832                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8833         }
8834
8835         while (num_bytes) {
8836                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8837                         ret = btrfs_next_leaf(root, &path);
8838                         if (ret < 0) {
8839                                 fprintf(stderr, "Error going to next leaf "
8840                                         "%d\n", ret);
8841                                 btrfs_release_path(&path);
8842                                 return ret;
8843                         } else if (ret) {
8844                                 break;
8845                         }
8846                 }
8847                 leaf = path.nodes[0];
8848                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8849                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8850                         path.slots[0]++;
8851                         continue;
8852                 }
8853                 if (key.objectid + key.offset < bytenr) {
8854                         path.slots[0]++;
8855                         continue;
8856                 }
8857                 if (key.objectid > bytenr + num_bytes)
8858                         break;
8859
8860                 if (key.objectid == bytenr) {
8861                         if (key.offset >= num_bytes) {
8862                                 num_bytes = 0;
8863                                 break;
8864                         }
8865                         num_bytes -= key.offset;
8866                         bytenr += key.offset;
8867                 } else if (key.objectid < bytenr) {
8868                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8869                                 num_bytes = 0;
8870                                 break;
8871                         }
8872                         num_bytes = (bytenr + num_bytes) -
8873                                 (key.objectid + key.offset);
8874                         bytenr = key.objectid + key.offset;
8875                 } else {
8876                         if (key.objectid + key.offset < bytenr + num_bytes) {
8877                                 u64 new_start = key.objectid + key.offset;
8878                                 u64 new_bytes = bytenr + num_bytes - new_start;
8879
8880                                 /*
8881                                  * Weird case, the extent is in the middle of
8882                                  * our range, we'll have to search one side
8883                                  * and then the other.  Not sure if this happens
8884                                  * in real life, but no harm in coding it up
8885                                  * anyway just in case.
8886                                  */
8887                                 btrfs_release_path(&path);
8888                                 ret = check_extent_exists(root, new_start,
8889                                                           new_bytes);
8890                                 if (ret) {
8891                                         fprintf(stderr, "Right section didn't "
8892                                                 "have a record\n");
8893                                         break;
8894                                 }
8895                                 num_bytes = key.objectid - bytenr;
8896                                 goto again;
8897                         }
8898                         num_bytes = key.objectid - bytenr;
8899                 }
8900                 path.slots[0]++;
8901         }
8902         ret = 0;
8903
8904 out:
8905         if (num_bytes && !ret) {
8906                 fprintf(stderr, "There are no extents for csum range "
8907                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8908                 ret = 1;
8909         }
8910
8911         btrfs_release_path(&path);
8912         return ret;
8913 }
8914
8915 static int check_csums(struct btrfs_root *root)
8916 {
8917         struct btrfs_path path;
8918         struct extent_buffer *leaf;
8919         struct btrfs_key key;
8920         u64 offset = 0, num_bytes = 0;
8921         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8922         int errors = 0;
8923         int ret;
8924         u64 data_len;
8925         unsigned long leaf_offset;
8926
8927         root = root->fs_info->csum_root;
8928         if (!extent_buffer_uptodate(root->node)) {
8929                 fprintf(stderr, "No valid csum tree found\n");
8930                 return -ENOENT;
8931         }
8932
8933         btrfs_init_path(&path);
8934         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8935         key.type = BTRFS_EXTENT_CSUM_KEY;
8936         key.offset = 0;
8937         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8938         if (ret < 0) {
8939                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8940                 btrfs_release_path(&path);
8941                 return ret;
8942         }
8943
8944         if (ret > 0 && path.slots[0])
8945                 path.slots[0]--;
8946         ret = 0;
8947
8948         while (1) {
8949                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8950                         ret = btrfs_next_leaf(root, &path);
8951                         if (ret < 0) {
8952                                 fprintf(stderr, "Error going to next leaf "
8953                                         "%d\n", ret);
8954                                 break;
8955                         }
8956                         if (ret)
8957                                 break;
8958                 }
8959                 leaf = path.nodes[0];
8960
8961                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8962                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8963                         path.slots[0]++;
8964                         continue;
8965                 }
8966
8967                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8968                               csum_size) * root->fs_info->sectorsize;
8969                 if (!check_data_csum)
8970                         goto skip_csum_check;
8971                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8972                 ret = check_extent_csums(root, key.offset, data_len,
8973                                          leaf_offset, leaf);
8974                 if (ret)
8975                         break;
8976 skip_csum_check:
8977                 if (!num_bytes) {
8978                         offset = key.offset;
8979                 } else if (key.offset != offset + num_bytes) {
8980                         ret = check_extent_exists(root, offset, num_bytes);
8981                         if (ret) {
8982                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8983                                         "there is no extent record\n",
8984                                         offset, offset+num_bytes);
8985                                 errors++;
8986                         }
8987                         offset = key.offset;
8988                         num_bytes = 0;
8989                 }
8990                 num_bytes += data_len;
8991                 path.slots[0]++;
8992         }
8993
8994         btrfs_release_path(&path);
8995         return errors;
8996 }
8997
8998 static int is_dropped_key(struct btrfs_key *key,
8999                           struct btrfs_key *drop_key) {
9000         if (key->objectid < drop_key->objectid)
9001                 return 1;
9002         else if (key->objectid == drop_key->objectid) {
9003                 if (key->type < drop_key->type)
9004                         return 1;
9005                 else if (key->type == drop_key->type) {
9006                         if (key->offset < drop_key->offset)
9007                                 return 1;
9008                 }
9009         }
9010         return 0;
9011 }
9012
9013 /*
9014  * Here are the rules for FULL_BACKREF.
9015  *
9016  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
9017  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
9018  *      FULL_BACKREF set.
9019  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
9020  *    if it happened after the relocation occurred since we'll have dropped the
9021  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
9022  *    have no real way to know for sure.
9023  *
9024  * We process the blocks one root at a time, and we start from the lowest root
9025  * objectid and go to the highest.  So we can just lookup the owner backref for
9026  * the record and if we don't find it then we know it doesn't exist and we have
9027  * a FULL BACKREF.
9028  *
9029  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
9030  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
9031  * be set or not and then we can check later once we've gathered all the refs.
9032  */
9033 static int calc_extent_flag(struct cache_tree *extent_cache,
9034                            struct extent_buffer *buf,
9035                            struct root_item_record *ri,
9036                            u64 *flags)
9037 {
9038         struct extent_record *rec;
9039         struct cache_extent *cache;
9040         struct tree_backref *tback;
9041         u64 owner = 0;
9042
9043         cache = lookup_cache_extent(extent_cache, buf->start, 1);
9044         /* we have added this extent before */
9045         if (!cache)
9046                 return -ENOENT;
9047
9048         rec = container_of(cache, struct extent_record, cache);
9049
9050         /*
9051          * Except file/reloc tree, we can not have
9052          * FULL BACKREF MODE
9053          */
9054         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
9055                 goto normal;
9056         /*
9057          * root node
9058          */
9059         if (buf->start == ri->bytenr)
9060                 goto normal;
9061
9062         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
9063                 goto full_backref;
9064
9065         owner = btrfs_header_owner(buf);
9066         if (owner == ri->objectid)
9067                 goto normal;
9068
9069         tback = find_tree_backref(rec, 0, owner);
9070         if (!tback)
9071                 goto full_backref;
9072 normal:
9073         *flags = 0;
9074         if (rec->flag_block_full_backref != FLAG_UNSET &&
9075             rec->flag_block_full_backref != 0)
9076                 rec->bad_full_backref = 1;
9077         return 0;
9078 full_backref:
9079         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9080         if (rec->flag_block_full_backref != FLAG_UNSET &&
9081             rec->flag_block_full_backref != 1)
9082                 rec->bad_full_backref = 1;
9083         return 0;
9084 }
9085
9086 static void report_mismatch_key_root(u8 key_type, u64 rootid)
9087 {
9088         fprintf(stderr, "Invalid key type(");
9089         print_key_type(stderr, 0, key_type);
9090         fprintf(stderr, ") found in root(");
9091         print_objectid(stderr, rootid, 0);
9092         fprintf(stderr, ")\n");
9093 }
9094
9095 /*
9096  * Check if the key is valid with its extent buffer.
9097  *
9098  * This is a early check in case invalid key exists in a extent buffer
9099  * This is not comprehensive yet, but should prevent wrong key/item passed
9100  * further
9101  */
9102 static int check_type_with_root(u64 rootid, u8 key_type)
9103 {
9104         switch (key_type) {
9105         /* Only valid in chunk tree */
9106         case BTRFS_DEV_ITEM_KEY:
9107         case BTRFS_CHUNK_ITEM_KEY:
9108                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
9109                         goto err;
9110                 break;
9111         /* valid in csum and log tree */
9112         case BTRFS_CSUM_TREE_OBJECTID:
9113                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
9114                       is_fstree(rootid)))
9115                         goto err;
9116                 break;
9117         case BTRFS_EXTENT_ITEM_KEY:
9118         case BTRFS_METADATA_ITEM_KEY:
9119         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9120                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
9121                         goto err;
9122                 break;
9123         case BTRFS_ROOT_ITEM_KEY:
9124                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
9125                         goto err;
9126                 break;
9127         case BTRFS_DEV_EXTENT_KEY:
9128                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
9129                         goto err;
9130                 break;
9131         }
9132         return 0;
9133 err:
9134         report_mismatch_key_root(key_type, rootid);
9135         return -EINVAL;
9136 }
9137
9138 static int run_next_block(struct btrfs_root *root,
9139                           struct block_info *bits,
9140                           int bits_nr,
9141                           u64 *last,
9142                           struct cache_tree *pending,
9143                           struct cache_tree *seen,
9144                           struct cache_tree *reada,
9145                           struct cache_tree *nodes,
9146                           struct cache_tree *extent_cache,
9147                           struct cache_tree *chunk_cache,
9148                           struct rb_root *dev_cache,
9149                           struct block_group_tree *block_group_cache,
9150                           struct device_extent_tree *dev_extent_cache,
9151                           struct root_item_record *ri)
9152 {
9153         struct btrfs_fs_info *fs_info = root->fs_info;
9154         struct extent_buffer *buf;
9155         struct extent_record *rec = NULL;
9156         u64 bytenr;
9157         u32 size;
9158         u64 parent;
9159         u64 owner;
9160         u64 flags;
9161         u64 ptr;
9162         u64 gen = 0;
9163         int ret = 0;
9164         int i;
9165         int nritems;
9166         struct btrfs_key key;
9167         struct cache_extent *cache;
9168         int reada_bits;
9169
9170         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
9171                                     bits_nr, &reada_bits);
9172         if (nritems == 0)
9173                 return 1;
9174
9175         if (!reada_bits) {
9176                 for(i = 0; i < nritems; i++) {
9177                         ret = add_cache_extent(reada, bits[i].start,
9178                                                bits[i].size);
9179                         if (ret == -EEXIST)
9180                                 continue;
9181
9182                         /* fixme, get the parent transid */
9183                         readahead_tree_block(fs_info, bits[i].start, 0);
9184                 }
9185         }
9186         *last = bits[0].start;
9187         bytenr = bits[0].start;
9188         size = bits[0].size;
9189
9190         cache = lookup_cache_extent(pending, bytenr, size);
9191         if (cache) {
9192                 remove_cache_extent(pending, cache);
9193                 free(cache);
9194         }
9195         cache = lookup_cache_extent(reada, bytenr, size);
9196         if (cache) {
9197                 remove_cache_extent(reada, cache);
9198                 free(cache);
9199         }
9200         cache = lookup_cache_extent(nodes, bytenr, size);
9201         if (cache) {
9202                 remove_cache_extent(nodes, cache);
9203                 free(cache);
9204         }
9205         cache = lookup_cache_extent(extent_cache, bytenr, size);
9206         if (cache) {
9207                 rec = container_of(cache, struct extent_record, cache);
9208                 gen = rec->parent_generation;
9209         }
9210
9211         /* fixme, get the real parent transid */
9212         buf = read_tree_block(root->fs_info, bytenr, gen);
9213         if (!extent_buffer_uptodate(buf)) {
9214                 record_bad_block_io(root->fs_info,
9215                                     extent_cache, bytenr, size);
9216                 goto out;
9217         }
9218
9219         nritems = btrfs_header_nritems(buf);
9220
9221         flags = 0;
9222         if (!init_extent_tree) {
9223                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
9224                                        btrfs_header_level(buf), 1, NULL,
9225                                        &flags);
9226                 if (ret < 0) {
9227                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9228                         if (ret < 0) {
9229                                 fprintf(stderr, "Couldn't calc extent flags\n");
9230                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9231                         }
9232                 }
9233         } else {
9234                 flags = 0;
9235                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9236                 if (ret < 0) {
9237                         fprintf(stderr, "Couldn't calc extent flags\n");
9238                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9239                 }
9240         }
9241
9242         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9243                 if (ri != NULL &&
9244                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
9245                     ri->objectid == btrfs_header_owner(buf)) {
9246                         /*
9247                          * Ok we got to this block from it's original owner and
9248                          * we have FULL_BACKREF set.  Relocation can leave
9249                          * converted blocks over so this is altogether possible,
9250                          * however it's not possible if the generation > the
9251                          * last snapshot, so check for this case.
9252                          */
9253                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
9254                             btrfs_header_generation(buf) > ri->last_snapshot) {
9255                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9256                                 rec->bad_full_backref = 1;
9257                         }
9258                 }
9259         } else {
9260                 if (ri != NULL &&
9261                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
9262                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
9263                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9264                         rec->bad_full_backref = 1;
9265                 }
9266         }
9267
9268         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9269                 rec->flag_block_full_backref = 1;
9270                 parent = bytenr;
9271                 owner = 0;
9272         } else {
9273                 rec->flag_block_full_backref = 0;
9274                 parent = 0;
9275                 owner = btrfs_header_owner(buf);
9276         }
9277
9278         ret = check_block(root, extent_cache, buf, flags);
9279         if (ret)
9280                 goto out;
9281
9282         if (btrfs_is_leaf(buf)) {
9283                 btree_space_waste += btrfs_leaf_free_space(root, buf);
9284                 for (i = 0; i < nritems; i++) {
9285                         struct btrfs_file_extent_item *fi;
9286                         btrfs_item_key_to_cpu(buf, &key, i);
9287                         /*
9288                          * Check key type against the leaf owner.
9289                          * Could filter quite a lot of early error if
9290                          * owner is correct
9291                          */
9292                         if (check_type_with_root(btrfs_header_owner(buf),
9293                                                  key.type)) {
9294                                 fprintf(stderr, "ignoring invalid key\n");
9295                                 continue;
9296                         }
9297                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
9298                                 process_extent_item(root, extent_cache, buf,
9299                                                     i);
9300                                 continue;
9301                         }
9302                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9303                                 process_extent_item(root, extent_cache, buf,
9304                                                     i);
9305                                 continue;
9306                         }
9307                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
9308                                 total_csum_bytes +=
9309                                         btrfs_item_size_nr(buf, i);
9310                                 continue;
9311                         }
9312                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
9313                                 process_chunk_item(chunk_cache, &key, buf, i);
9314                                 continue;
9315                         }
9316                         if (key.type == BTRFS_DEV_ITEM_KEY) {
9317                                 process_device_item(dev_cache, &key, buf, i);
9318                                 continue;
9319                         }
9320                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9321                                 process_block_group_item(block_group_cache,
9322                                         &key, buf, i);
9323                                 continue;
9324                         }
9325                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
9326                                 process_device_extent_item(dev_extent_cache,
9327                                         &key, buf, i);
9328                                 continue;
9329
9330                         }
9331                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9332 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9333                                 process_extent_ref_v0(extent_cache, buf, i);
9334 #else
9335                                 BUG();
9336 #endif
9337                                 continue;
9338                         }
9339
9340                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9341                                 ret = add_tree_backref(extent_cache,
9342                                                 key.objectid, 0, key.offset, 0);
9343                                 if (ret < 0)
9344                                         error(
9345                                 "add_tree_backref failed (leaf tree block): %s",
9346                                               strerror(-ret));
9347                                 continue;
9348                         }
9349                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9350                                 ret = add_tree_backref(extent_cache,
9351                                                 key.objectid, key.offset, 0, 0);
9352                                 if (ret < 0)
9353                                         error(
9354                                 "add_tree_backref failed (leaf shared block): %s",
9355                                               strerror(-ret));
9356                                 continue;
9357                         }
9358                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9359                                 struct btrfs_extent_data_ref *ref;
9360                                 ref = btrfs_item_ptr(buf, i,
9361                                                 struct btrfs_extent_data_ref);
9362                                 add_data_backref(extent_cache,
9363                                         key.objectid, 0,
9364                                         btrfs_extent_data_ref_root(buf, ref),
9365                                         btrfs_extent_data_ref_objectid(buf,
9366                                                                        ref),
9367                                         btrfs_extent_data_ref_offset(buf, ref),
9368                                         btrfs_extent_data_ref_count(buf, ref),
9369                                         0, root->fs_info->sectorsize);
9370                                 continue;
9371                         }
9372                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9373                                 struct btrfs_shared_data_ref *ref;
9374                                 ref = btrfs_item_ptr(buf, i,
9375                                                 struct btrfs_shared_data_ref);
9376                                 add_data_backref(extent_cache,
9377                                         key.objectid, key.offset, 0, 0, 0,
9378                                         btrfs_shared_data_ref_count(buf, ref),
9379                                         0, root->fs_info->sectorsize);
9380                                 continue;
9381                         }
9382                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9383                                 struct bad_item *bad;
9384
9385                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9386                                         continue;
9387                                 if (!owner)
9388                                         continue;
9389                                 bad = malloc(sizeof(struct bad_item));
9390                                 if (!bad)
9391                                         continue;
9392                                 INIT_LIST_HEAD(&bad->list);
9393                                 memcpy(&bad->key, &key,
9394                                        sizeof(struct btrfs_key));
9395                                 bad->root_id = owner;
9396                                 list_add_tail(&bad->list, &delete_items);
9397                                 continue;
9398                         }
9399                         if (key.type != BTRFS_EXTENT_DATA_KEY)
9400                                 continue;
9401                         fi = btrfs_item_ptr(buf, i,
9402                                             struct btrfs_file_extent_item);
9403                         if (btrfs_file_extent_type(buf, fi) ==
9404                             BTRFS_FILE_EXTENT_INLINE)
9405                                 continue;
9406                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9407                                 continue;
9408
9409                         data_bytes_allocated +=
9410                                 btrfs_file_extent_disk_num_bytes(buf, fi);
9411                         if (data_bytes_allocated < root->fs_info->sectorsize) {
9412                                 abort();
9413                         }
9414                         data_bytes_referenced +=
9415                                 btrfs_file_extent_num_bytes(buf, fi);
9416                         add_data_backref(extent_cache,
9417                                 btrfs_file_extent_disk_bytenr(buf, fi),
9418                                 parent, owner, key.objectid, key.offset -
9419                                 btrfs_file_extent_offset(buf, fi), 1, 1,
9420                                 btrfs_file_extent_disk_num_bytes(buf, fi));
9421                 }
9422         } else {
9423                 int level;
9424                 struct btrfs_key first_key;
9425
9426                 first_key.objectid = 0;
9427
9428                 if (nritems > 0)
9429                         btrfs_item_key_to_cpu(buf, &first_key, 0);
9430                 level = btrfs_header_level(buf);
9431                 for (i = 0; i < nritems; i++) {
9432                         struct extent_record tmpl;
9433
9434                         ptr = btrfs_node_blockptr(buf, i);
9435                         size = root->fs_info->nodesize;
9436                         btrfs_node_key_to_cpu(buf, &key, i);
9437                         if (ri != NULL) {
9438                                 if ((level == ri->drop_level)
9439                                     && is_dropped_key(&key, &ri->drop_key)) {
9440                                         continue;
9441                                 }
9442                         }
9443
9444                         memset(&tmpl, 0, sizeof(tmpl));
9445                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9446                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9447                         tmpl.start = ptr;
9448                         tmpl.nr = size;
9449                         tmpl.refs = 1;
9450                         tmpl.metadata = 1;
9451                         tmpl.max_size = size;
9452                         ret = add_extent_rec(extent_cache, &tmpl);
9453                         if (ret < 0)
9454                                 goto out;
9455
9456                         ret = add_tree_backref(extent_cache, ptr, parent,
9457                                         owner, 1);
9458                         if (ret < 0) {
9459                                 error(
9460                                 "add_tree_backref failed (non-leaf block): %s",
9461                                       strerror(-ret));
9462                                 continue;
9463                         }
9464
9465                         if (level > 1) {
9466                                 add_pending(nodes, seen, ptr, size);
9467                         } else {
9468                                 add_pending(pending, seen, ptr, size);
9469                         }
9470                 }
9471                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
9472                                       nritems) * sizeof(struct btrfs_key_ptr);
9473         }
9474         total_btree_bytes += buf->len;
9475         if (fs_root_objectid(btrfs_header_owner(buf)))
9476                 total_fs_tree_bytes += buf->len;
9477         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9478                 total_extent_tree_bytes += buf->len;
9479 out:
9480         free_extent_buffer(buf);
9481         return ret;
9482 }
9483
9484 static int add_root_to_pending(struct extent_buffer *buf,
9485                                struct cache_tree *extent_cache,
9486                                struct cache_tree *pending,
9487                                struct cache_tree *seen,
9488                                struct cache_tree *nodes,
9489                                u64 objectid)
9490 {
9491         struct extent_record tmpl;
9492         int ret;
9493
9494         if (btrfs_header_level(buf) > 0)
9495                 add_pending(nodes, seen, buf->start, buf->len);
9496         else
9497                 add_pending(pending, seen, buf->start, buf->len);
9498
9499         memset(&tmpl, 0, sizeof(tmpl));
9500         tmpl.start = buf->start;
9501         tmpl.nr = buf->len;
9502         tmpl.is_root = 1;
9503         tmpl.refs = 1;
9504         tmpl.metadata = 1;
9505         tmpl.max_size = buf->len;
9506         add_extent_rec(extent_cache, &tmpl);
9507
9508         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9509             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9510                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9511                                 0, 1);
9512         else
9513                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9514                                 1);
9515         return ret;
9516 }
9517
9518 /* as we fix the tree, we might be deleting blocks that
9519  * we're tracking for repair.  This hook makes sure we
9520  * remove any backrefs for blocks as we are fixing them.
9521  */
9522 static int free_extent_hook(struct btrfs_trans_handle *trans,
9523                             struct btrfs_root *root,
9524                             u64 bytenr, u64 num_bytes, u64 parent,
9525                             u64 root_objectid, u64 owner, u64 offset,
9526                             int refs_to_drop)
9527 {
9528         struct extent_record *rec;
9529         struct cache_extent *cache;
9530         int is_data;
9531         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9532
9533         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9534         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9535         if (!cache)
9536                 return 0;
9537
9538         rec = container_of(cache, struct extent_record, cache);
9539         if (is_data) {
9540                 struct data_backref *back;
9541                 back = find_data_backref(rec, parent, root_objectid, owner,
9542                                          offset, 1, bytenr, num_bytes);
9543                 if (!back)
9544                         goto out;
9545                 if (back->node.found_ref) {
9546                         back->found_ref -= refs_to_drop;
9547                         if (rec->refs)
9548                                 rec->refs -= refs_to_drop;
9549                 }
9550                 if (back->node.found_extent_tree) {
9551                         back->num_refs -= refs_to_drop;
9552                         if (rec->extent_item_refs)
9553                                 rec->extent_item_refs -= refs_to_drop;
9554                 }
9555                 if (back->found_ref == 0)
9556                         back->node.found_ref = 0;
9557                 if (back->num_refs == 0)
9558                         back->node.found_extent_tree = 0;
9559
9560                 if (!back->node.found_extent_tree && back->node.found_ref) {
9561                         rb_erase(&back->node.node, &rec->backref_tree);
9562                         free(back);
9563                 }
9564         } else {
9565                 struct tree_backref *back;
9566                 back = find_tree_backref(rec, parent, root_objectid);
9567                 if (!back)
9568                         goto out;
9569                 if (back->node.found_ref) {
9570                         if (rec->refs)
9571                                 rec->refs--;
9572                         back->node.found_ref = 0;
9573                 }
9574                 if (back->node.found_extent_tree) {
9575                         if (rec->extent_item_refs)
9576                                 rec->extent_item_refs--;
9577                         back->node.found_extent_tree = 0;
9578                 }
9579                 if (!back->node.found_extent_tree && back->node.found_ref) {
9580                         rb_erase(&back->node.node, &rec->backref_tree);
9581                         free(back);
9582                 }
9583         }
9584         maybe_free_extent_rec(extent_cache, rec);
9585 out:
9586         return 0;
9587 }
9588
9589 static int delete_extent_records(struct btrfs_trans_handle *trans,
9590                                  struct btrfs_root *root,
9591                                  struct btrfs_path *path,
9592                                  u64 bytenr)
9593 {
9594         struct btrfs_key key;
9595         struct btrfs_key found_key;
9596         struct extent_buffer *leaf;
9597         int ret;
9598         int slot;
9599
9600
9601         key.objectid = bytenr;
9602         key.type = (u8)-1;
9603         key.offset = (u64)-1;
9604
9605         while(1) {
9606                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9607                                         &key, path, 0, 1);
9608                 if (ret < 0)
9609                         break;
9610
9611                 if (ret > 0) {
9612                         ret = 0;
9613                         if (path->slots[0] == 0)
9614                                 break;
9615                         path->slots[0]--;
9616                 }
9617                 ret = 0;
9618
9619                 leaf = path->nodes[0];
9620                 slot = path->slots[0];
9621
9622                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9623                 if (found_key.objectid != bytenr)
9624                         break;
9625
9626                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9627                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
9628                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9629                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9630                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9631                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9632                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9633                         btrfs_release_path(path);
9634                         if (found_key.type == 0) {
9635                                 if (found_key.offset == 0)
9636                                         break;
9637                                 key.offset = found_key.offset - 1;
9638                                 key.type = found_key.type;
9639                         }
9640                         key.type = found_key.type - 1;
9641                         key.offset = (u64)-1;
9642                         continue;
9643                 }
9644
9645                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9646                         found_key.objectid, found_key.type, found_key.offset);
9647
9648                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9649                 if (ret)
9650                         break;
9651                 btrfs_release_path(path);
9652
9653                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9654                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
9655                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9656                                 found_key.offset : root->fs_info->nodesize;
9657
9658                         ret = btrfs_update_block_group(trans, root, bytenr,
9659                                                        bytes, 0, 0);
9660                         if (ret)
9661                                 break;
9662                 }
9663         }
9664
9665         btrfs_release_path(path);
9666         return ret;
9667 }
9668
9669 /*
9670  * for a single backref, this will allocate a new extent
9671  * and add the backref to it.
9672  */
9673 static int record_extent(struct btrfs_trans_handle *trans,
9674                          struct btrfs_fs_info *info,
9675                          struct btrfs_path *path,
9676                          struct extent_record *rec,
9677                          struct extent_backref *back,
9678                          int allocated, u64 flags)
9679 {
9680         int ret = 0;
9681         struct btrfs_root *extent_root = info->extent_root;
9682         struct extent_buffer *leaf;
9683         struct btrfs_key ins_key;
9684         struct btrfs_extent_item *ei;
9685         struct data_backref *dback;
9686         struct btrfs_tree_block_info *bi;
9687
9688         if (!back->is_data)
9689                 rec->max_size = max_t(u64, rec->max_size,
9690                                     info->nodesize);
9691
9692         if (!allocated) {
9693                 u32 item_size = sizeof(*ei);
9694
9695                 if (!back->is_data)
9696                         item_size += sizeof(*bi);
9697
9698                 ins_key.objectid = rec->start;
9699                 ins_key.offset = rec->max_size;
9700                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9701
9702                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9703                                         &ins_key, item_size);
9704                 if (ret)
9705                         goto fail;
9706
9707                 leaf = path->nodes[0];
9708                 ei = btrfs_item_ptr(leaf, path->slots[0],
9709                                     struct btrfs_extent_item);
9710
9711                 btrfs_set_extent_refs(leaf, ei, 0);
9712                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9713
9714                 if (back->is_data) {
9715                         btrfs_set_extent_flags(leaf, ei,
9716                                                BTRFS_EXTENT_FLAG_DATA);
9717                 } else {
9718                         struct btrfs_disk_key copy_key;;
9719
9720                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9721                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9722                                              sizeof(*bi));
9723
9724                         btrfs_set_disk_key_objectid(&copy_key,
9725                                                     rec->info_objectid);
9726                         btrfs_set_disk_key_type(&copy_key, 0);
9727                         btrfs_set_disk_key_offset(&copy_key, 0);
9728
9729                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9730                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9731
9732                         btrfs_set_extent_flags(leaf, ei,
9733                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9734                 }
9735
9736                 btrfs_mark_buffer_dirty(leaf);
9737                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9738                                                rec->max_size, 1, 0);
9739                 if (ret)
9740                         goto fail;
9741                 btrfs_release_path(path);
9742         }
9743
9744         if (back->is_data) {
9745                 u64 parent;
9746                 int i;
9747
9748                 dback = to_data_backref(back);
9749                 if (back->full_backref)
9750                         parent = dback->parent;
9751                 else
9752                         parent = 0;
9753
9754                 for (i = 0; i < dback->found_ref; i++) {
9755                         /* if parent != 0, we're doing a full backref
9756                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9757                          * just makes the backref allocator create a data
9758                          * backref
9759                          */
9760                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9761                                                    rec->start, rec->max_size,
9762                                                    parent,
9763                                                    dback->root,
9764                                                    parent ?
9765                                                    BTRFS_FIRST_FREE_OBJECTID :
9766                                                    dback->owner,
9767                                                    dback->offset);
9768                         if (ret)
9769                                 break;
9770                 }
9771                 fprintf(stderr, "adding new data backref"
9772                                 " on %llu %s %llu owner %llu"
9773                                 " offset %llu found %d\n",
9774                                 (unsigned long long)rec->start,
9775                                 back->full_backref ?
9776                                 "parent" : "root",
9777                                 back->full_backref ?
9778                                 (unsigned long long)parent :
9779                                 (unsigned long long)dback->root,
9780                                 (unsigned long long)dback->owner,
9781                                 (unsigned long long)dback->offset,
9782                                 dback->found_ref);
9783         } else {
9784                 u64 parent;
9785                 struct tree_backref *tback;
9786
9787                 tback = to_tree_backref(back);
9788                 if (back->full_backref)
9789                         parent = tback->parent;
9790                 else
9791                         parent = 0;
9792
9793                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9794                                            rec->start, rec->max_size,
9795                                            parent, tback->root, 0, 0);
9796                 fprintf(stderr, "adding new tree backref on "
9797                         "start %llu len %llu parent %llu root %llu\n",
9798                         rec->start, rec->max_size, parent, tback->root);
9799         }
9800 fail:
9801         btrfs_release_path(path);
9802         return ret;
9803 }
9804
9805 static struct extent_entry *find_entry(struct list_head *entries,
9806                                        u64 bytenr, u64 bytes)
9807 {
9808         struct extent_entry *entry = NULL;
9809
9810         list_for_each_entry(entry, entries, list) {
9811                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9812                         return entry;
9813         }
9814
9815         return NULL;
9816 }
9817
9818 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9819 {
9820         struct extent_entry *entry, *best = NULL, *prev = NULL;
9821
9822         list_for_each_entry(entry, entries, list) {
9823                 /*
9824                  * If there are as many broken entries as entries then we know
9825                  * not to trust this particular entry.
9826                  */
9827                 if (entry->broken == entry->count)
9828                         continue;
9829
9830                 /*
9831                  * Special case, when there are only two entries and 'best' is
9832                  * the first one
9833                  */
9834                 if (!prev) {
9835                         best = entry;
9836                         prev = entry;
9837                         continue;
9838                 }
9839
9840                 /*
9841                  * If our current entry == best then we can't be sure our best
9842                  * is really the best, so we need to keep searching.
9843                  */
9844                 if (best && best->count == entry->count) {
9845                         prev = entry;
9846                         best = NULL;
9847                         continue;
9848                 }
9849
9850                 /* Prev == entry, not good enough, have to keep searching */
9851                 if (!prev->broken && prev->count == entry->count)
9852                         continue;
9853
9854                 if (!best)
9855                         best = (prev->count > entry->count) ? prev : entry;
9856                 else if (best->count < entry->count)
9857                         best = entry;
9858                 prev = entry;
9859         }
9860
9861         return best;
9862 }
9863
9864 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9865                       struct data_backref *dback, struct extent_entry *entry)
9866 {
9867         struct btrfs_trans_handle *trans;
9868         struct btrfs_root *root;
9869         struct btrfs_file_extent_item *fi;
9870         struct extent_buffer *leaf;
9871         struct btrfs_key key;
9872         u64 bytenr, bytes;
9873         int ret, err;
9874
9875         key.objectid = dback->root;
9876         key.type = BTRFS_ROOT_ITEM_KEY;
9877         key.offset = (u64)-1;
9878         root = btrfs_read_fs_root(info, &key);
9879         if (IS_ERR(root)) {
9880                 fprintf(stderr, "Couldn't find root for our ref\n");
9881                 return -EINVAL;
9882         }
9883
9884         /*
9885          * The backref points to the original offset of the extent if it was
9886          * split, so we need to search down to the offset we have and then walk
9887          * forward until we find the backref we're looking for.
9888          */
9889         key.objectid = dback->owner;
9890         key.type = BTRFS_EXTENT_DATA_KEY;
9891         key.offset = dback->offset;
9892         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9893         if (ret < 0) {
9894                 fprintf(stderr, "Error looking up ref %d\n", ret);
9895                 return ret;
9896         }
9897
9898         while (1) {
9899                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9900                         ret = btrfs_next_leaf(root, path);
9901                         if (ret) {
9902                                 fprintf(stderr, "Couldn't find our ref, next\n");
9903                                 return -EINVAL;
9904                         }
9905                 }
9906                 leaf = path->nodes[0];
9907                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9908                 if (key.objectid != dback->owner ||
9909                     key.type != BTRFS_EXTENT_DATA_KEY) {
9910                         fprintf(stderr, "Couldn't find our ref, search\n");
9911                         return -EINVAL;
9912                 }
9913                 fi = btrfs_item_ptr(leaf, path->slots[0],
9914                                     struct btrfs_file_extent_item);
9915                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9916                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9917
9918                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9919                         break;
9920                 path->slots[0]++;
9921         }
9922
9923         btrfs_release_path(path);
9924
9925         trans = btrfs_start_transaction(root, 1);
9926         if (IS_ERR(trans))
9927                 return PTR_ERR(trans);
9928
9929         /*
9930          * Ok we have the key of the file extent we want to fix, now we can cow
9931          * down to the thing and fix it.
9932          */
9933         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9934         if (ret < 0) {
9935                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9936                         key.objectid, key.type, key.offset, ret);
9937                 goto out;
9938         }
9939         if (ret > 0) {
9940                 fprintf(stderr, "Well that's odd, we just found this key "
9941                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9942                         key.offset);
9943                 ret = -EINVAL;
9944                 goto out;
9945         }
9946         leaf = path->nodes[0];
9947         fi = btrfs_item_ptr(leaf, path->slots[0],
9948                             struct btrfs_file_extent_item);
9949
9950         if (btrfs_file_extent_compression(leaf, fi) &&
9951             dback->disk_bytenr != entry->bytenr) {
9952                 fprintf(stderr, "Ref doesn't match the record start and is "
9953                         "compressed, please take a btrfs-image of this file "
9954                         "system and send it to a btrfs developer so they can "
9955                         "complete this functionality for bytenr %Lu\n",
9956                         dback->disk_bytenr);
9957                 ret = -EINVAL;
9958                 goto out;
9959         }
9960
9961         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9962                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9963         } else if (dback->disk_bytenr > entry->bytenr) {
9964                 u64 off_diff, offset;
9965
9966                 off_diff = dback->disk_bytenr - entry->bytenr;
9967                 offset = btrfs_file_extent_offset(leaf, fi);
9968                 if (dback->disk_bytenr + offset +
9969                     btrfs_file_extent_num_bytes(leaf, fi) >
9970                     entry->bytenr + entry->bytes) {
9971                         fprintf(stderr, "Ref is past the entry end, please "
9972                                 "take a btrfs-image of this file system and "
9973                                 "send it to a btrfs developer, ref %Lu\n",
9974                                 dback->disk_bytenr);
9975                         ret = -EINVAL;
9976                         goto out;
9977                 }
9978                 offset += off_diff;
9979                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9980                 btrfs_set_file_extent_offset(leaf, fi, offset);
9981         } else if (dback->disk_bytenr < entry->bytenr) {
9982                 u64 offset;
9983
9984                 offset = btrfs_file_extent_offset(leaf, fi);
9985                 if (dback->disk_bytenr + offset < entry->bytenr) {
9986                         fprintf(stderr, "Ref is before the entry start, please"
9987                                 " take a btrfs-image of this file system and "
9988                                 "send it to a btrfs developer, ref %Lu\n",
9989                                 dback->disk_bytenr);
9990                         ret = -EINVAL;
9991                         goto out;
9992                 }
9993
9994                 offset += dback->disk_bytenr;
9995                 offset -= entry->bytenr;
9996                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9997                 btrfs_set_file_extent_offset(leaf, fi, offset);
9998         }
9999
10000         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
10001
10002         /*
10003          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
10004          * only do this if we aren't using compression, otherwise it's a
10005          * trickier case.
10006          */
10007         if (!btrfs_file_extent_compression(leaf, fi))
10008                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
10009         else
10010                 printf("ram bytes may be wrong?\n");
10011         btrfs_mark_buffer_dirty(leaf);
10012 out:
10013         err = btrfs_commit_transaction(trans, root);
10014         btrfs_release_path(path);
10015         return ret ? ret : err;
10016 }
10017
10018 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
10019                            struct extent_record *rec)
10020 {
10021         struct extent_backref *back, *tmp;
10022         struct data_backref *dback;
10023         struct extent_entry *entry, *best = NULL;
10024         LIST_HEAD(entries);
10025         int nr_entries = 0;
10026         int broken_entries = 0;
10027         int ret = 0;
10028         short mismatch = 0;
10029
10030         /*
10031          * Metadata is easy and the backrefs should always agree on bytenr and
10032          * size, if not we've got bigger issues.
10033          */
10034         if (rec->metadata)
10035                 return 0;
10036
10037         rbtree_postorder_for_each_entry_safe(back, tmp,
10038                                              &rec->backref_tree, node) {
10039                 if (back->full_backref || !back->is_data)
10040                         continue;
10041
10042                 dback = to_data_backref(back);
10043
10044                 /*
10045                  * We only pay attention to backrefs that we found a real
10046                  * backref for.
10047                  */
10048                 if (dback->found_ref == 0)
10049                         continue;
10050
10051                 /*
10052                  * For now we only catch when the bytes don't match, not the
10053                  * bytenr.  We can easily do this at the same time, but I want
10054                  * to have a fs image to test on before we just add repair
10055                  * functionality willy-nilly so we know we won't screw up the
10056                  * repair.
10057                  */
10058
10059                 entry = find_entry(&entries, dback->disk_bytenr,
10060                                    dback->bytes);
10061                 if (!entry) {
10062                         entry = malloc(sizeof(struct extent_entry));
10063                         if (!entry) {
10064                                 ret = -ENOMEM;
10065                                 goto out;
10066                         }
10067                         memset(entry, 0, sizeof(*entry));
10068                         entry->bytenr = dback->disk_bytenr;
10069                         entry->bytes = dback->bytes;
10070                         list_add_tail(&entry->list, &entries);
10071                         nr_entries++;
10072                 }
10073
10074                 /*
10075                  * If we only have on entry we may think the entries agree when
10076                  * in reality they don't so we have to do some extra checking.
10077                  */
10078                 if (dback->disk_bytenr != rec->start ||
10079                     dback->bytes != rec->nr || back->broken)
10080                         mismatch = 1;
10081
10082                 if (back->broken) {
10083                         entry->broken++;
10084                         broken_entries++;
10085                 }
10086
10087                 entry->count++;
10088         }
10089
10090         /* Yay all the backrefs agree, carry on good sir */
10091         if (nr_entries <= 1 && !mismatch)
10092                 goto out;
10093
10094         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
10095                 "%Lu\n", rec->start);
10096
10097         /*
10098          * First we want to see if the backrefs can agree amongst themselves who
10099          * is right, so figure out which one of the entries has the highest
10100          * count.
10101          */
10102         best = find_most_right_entry(&entries);
10103
10104         /*
10105          * Ok so we may have an even split between what the backrefs think, so
10106          * this is where we use the extent ref to see what it thinks.
10107          */
10108         if (!best) {
10109                 entry = find_entry(&entries, rec->start, rec->nr);
10110                 if (!entry && (!broken_entries || !rec->found_rec)) {
10111                         fprintf(stderr, "Backrefs don't agree with each other "
10112                                 "and extent record doesn't agree with anybody,"
10113                                 " so we can't fix bytenr %Lu bytes %Lu\n",
10114                                 rec->start, rec->nr);
10115                         ret = -EINVAL;
10116                         goto out;
10117                 } else if (!entry) {
10118                         /*
10119                          * Ok our backrefs were broken, we'll assume this is the
10120                          * correct value and add an entry for this range.
10121                          */
10122                         entry = malloc(sizeof(struct extent_entry));
10123                         if (!entry) {
10124                                 ret = -ENOMEM;
10125                                 goto out;
10126                         }
10127                         memset(entry, 0, sizeof(*entry));
10128                         entry->bytenr = rec->start;
10129                         entry->bytes = rec->nr;
10130                         list_add_tail(&entry->list, &entries);
10131                         nr_entries++;
10132                 }
10133                 entry->count++;
10134                 best = find_most_right_entry(&entries);
10135                 if (!best) {
10136                         fprintf(stderr, "Backrefs and extent record evenly "
10137                                 "split on who is right, this is going to "
10138                                 "require user input to fix bytenr %Lu bytes "
10139                                 "%Lu\n", rec->start, rec->nr);
10140                         ret = -EINVAL;
10141                         goto out;
10142                 }
10143         }
10144
10145         /*
10146          * I don't think this can happen currently as we'll abort() if we catch
10147          * this case higher up, but in case somebody removes that we still can't
10148          * deal with it properly here yet, so just bail out of that's the case.
10149          */
10150         if (best->bytenr != rec->start) {
10151                 fprintf(stderr, "Extent start and backref starts don't match, "
10152                         "please use btrfs-image on this file system and send "
10153                         "it to a btrfs developer so they can make fsck fix "
10154                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
10155                         rec->start, rec->nr);
10156                 ret = -EINVAL;
10157                 goto out;
10158         }
10159
10160         /*
10161          * Ok great we all agreed on an extent record, let's go find the real
10162          * references and fix up the ones that don't match.
10163          */
10164         rbtree_postorder_for_each_entry_safe(back, tmp,
10165                                              &rec->backref_tree, node) {
10166                 if (back->full_backref || !back->is_data)
10167                         continue;
10168
10169                 dback = to_data_backref(back);
10170
10171                 /*
10172                  * Still ignoring backrefs that don't have a real ref attached
10173                  * to them.
10174                  */
10175                 if (dback->found_ref == 0)
10176                         continue;
10177
10178                 if (dback->bytes == best->bytes &&
10179                     dback->disk_bytenr == best->bytenr)
10180                         continue;
10181
10182                 ret = repair_ref(info, path, dback, best);
10183                 if (ret)
10184                         goto out;
10185         }
10186
10187         /*
10188          * Ok we messed with the actual refs, which means we need to drop our
10189          * entire cache and go back and rescan.  I know this is a huge pain and
10190          * adds a lot of extra work, but it's the only way to be safe.  Once all
10191          * the backrefs agree we may not need to do anything to the extent
10192          * record itself.
10193          */
10194         ret = -EAGAIN;
10195 out:
10196         while (!list_empty(&entries)) {
10197                 entry = list_entry(entries.next, struct extent_entry, list);
10198                 list_del_init(&entry->list);
10199                 free(entry);
10200         }
10201         return ret;
10202 }
10203
10204 static int process_duplicates(struct cache_tree *extent_cache,
10205                               struct extent_record *rec)
10206 {
10207         struct extent_record *good, *tmp;
10208         struct cache_extent *cache;
10209         int ret;
10210
10211         /*
10212          * If we found a extent record for this extent then return, or if we
10213          * have more than one duplicate we are likely going to need to delete
10214          * something.
10215          */
10216         if (rec->found_rec || rec->num_duplicates > 1)
10217                 return 0;
10218
10219         /* Shouldn't happen but just in case */
10220         BUG_ON(!rec->num_duplicates);
10221
10222         /*
10223          * So this happens if we end up with a backref that doesn't match the
10224          * actual extent entry.  So either the backref is bad or the extent
10225          * entry is bad.  Either way we want to have the extent_record actually
10226          * reflect what we found in the extent_tree, so we need to take the
10227          * duplicate out and use that as the extent_record since the only way we
10228          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
10229          */
10230         remove_cache_extent(extent_cache, &rec->cache);
10231
10232         good = to_extent_record(rec->dups.next);
10233         list_del_init(&good->list);
10234         INIT_LIST_HEAD(&good->backrefs);
10235         INIT_LIST_HEAD(&good->dups);
10236         good->cache.start = good->start;
10237         good->cache.size = good->nr;
10238         good->content_checked = 0;
10239         good->owner_ref_checked = 0;
10240         good->num_duplicates = 0;
10241         good->refs = rec->refs;
10242         list_splice_init(&rec->backrefs, &good->backrefs);
10243         while (1) {
10244                 cache = lookup_cache_extent(extent_cache, good->start,
10245                                             good->nr);
10246                 if (!cache)
10247                         break;
10248                 tmp = container_of(cache, struct extent_record, cache);
10249
10250                 /*
10251                  * If we find another overlapping extent and it's found_rec is
10252                  * set then it's a duplicate and we need to try and delete
10253                  * something.
10254                  */
10255                 if (tmp->found_rec || tmp->num_duplicates > 0) {
10256                         if (list_empty(&good->list))
10257                                 list_add_tail(&good->list,
10258                                               &duplicate_extents);
10259                         good->num_duplicates += tmp->num_duplicates + 1;
10260                         list_splice_init(&tmp->dups, &good->dups);
10261                         list_del_init(&tmp->list);
10262                         list_add_tail(&tmp->list, &good->dups);
10263                         remove_cache_extent(extent_cache, &tmp->cache);
10264                         continue;
10265                 }
10266
10267                 /*
10268                  * Ok we have another non extent item backed extent rec, so lets
10269                  * just add it to this extent and carry on like we did above.
10270                  */
10271                 good->refs += tmp->refs;
10272                 list_splice_init(&tmp->backrefs, &good->backrefs);
10273                 remove_cache_extent(extent_cache, &tmp->cache);
10274                 free(tmp);
10275         }
10276         ret = insert_cache_extent(extent_cache, &good->cache);
10277         BUG_ON(ret);
10278         free(rec);
10279         return good->num_duplicates ? 0 : 1;
10280 }
10281
10282 static int delete_duplicate_records(struct btrfs_root *root,
10283                                     struct extent_record *rec)
10284 {
10285         struct btrfs_trans_handle *trans;
10286         LIST_HEAD(delete_list);
10287         struct btrfs_path path;
10288         struct extent_record *tmp, *good, *n;
10289         int nr_del = 0;
10290         int ret = 0, err;
10291         struct btrfs_key key;
10292
10293         btrfs_init_path(&path);
10294
10295         good = rec;
10296         /* Find the record that covers all of the duplicates. */
10297         list_for_each_entry(tmp, &rec->dups, list) {
10298                 if (good->start < tmp->start)
10299                         continue;
10300                 if (good->nr > tmp->nr)
10301                         continue;
10302
10303                 if (tmp->start + tmp->nr < good->start + good->nr) {
10304                         fprintf(stderr, "Ok we have overlapping extents that "
10305                                 "aren't completely covered by each other, this "
10306                                 "is going to require more careful thought.  "
10307                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
10308                                 tmp->start, tmp->nr, good->start, good->nr);
10309                         abort();
10310                 }
10311                 good = tmp;
10312         }
10313
10314         if (good != rec)
10315                 list_add_tail(&rec->list, &delete_list);
10316
10317         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10318                 if (tmp == good)
10319                         continue;
10320                 list_move_tail(&tmp->list, &delete_list);
10321         }
10322
10323         root = root->fs_info->extent_root;
10324         trans = btrfs_start_transaction(root, 1);
10325         if (IS_ERR(trans)) {
10326                 ret = PTR_ERR(trans);
10327                 goto out;
10328         }
10329
10330         list_for_each_entry(tmp, &delete_list, list) {
10331                 if (tmp->found_rec == 0)
10332                         continue;
10333                 key.objectid = tmp->start;
10334                 key.type = BTRFS_EXTENT_ITEM_KEY;
10335                 key.offset = tmp->nr;
10336
10337                 /* Shouldn't happen but just in case */
10338                 if (tmp->metadata) {
10339                         fprintf(stderr, "Well this shouldn't happen, extent "
10340                                 "record overlaps but is metadata? "
10341                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10342                         abort();
10343                 }
10344
10345                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10346                 if (ret) {
10347                         if (ret > 0)
10348                                 ret = -EINVAL;
10349                         break;
10350                 }
10351                 ret = btrfs_del_item(trans, root, &path);
10352                 if (ret)
10353                         break;
10354                 btrfs_release_path(&path);
10355                 nr_del++;
10356         }
10357         err = btrfs_commit_transaction(trans, root);
10358         if (err && !ret)
10359                 ret = err;
10360 out:
10361         while (!list_empty(&delete_list)) {
10362                 tmp = to_extent_record(delete_list.next);
10363                 list_del_init(&tmp->list);
10364                 if (tmp == rec)
10365                         continue;
10366                 free(tmp);
10367         }
10368
10369         while (!list_empty(&rec->dups)) {
10370                 tmp = to_extent_record(rec->dups.next);
10371                 list_del_init(&tmp->list);
10372                 free(tmp);
10373         }
10374
10375         btrfs_release_path(&path);
10376
10377         if (!ret && !nr_del)
10378                 rec->num_duplicates = 0;
10379
10380         return ret ? ret : nr_del;
10381 }
10382
10383 static int find_possible_backrefs(struct btrfs_fs_info *info,
10384                                   struct btrfs_path *path,
10385                                   struct cache_tree *extent_cache,
10386                                   struct extent_record *rec)
10387 {
10388         struct btrfs_root *root;
10389         struct extent_backref *back, *tmp;
10390         struct data_backref *dback;
10391         struct cache_extent *cache;
10392         struct btrfs_file_extent_item *fi;
10393         struct btrfs_key key;
10394         u64 bytenr, bytes;
10395         int ret;
10396
10397         rbtree_postorder_for_each_entry_safe(back, tmp,
10398                                              &rec->backref_tree, node) {
10399                 /* Don't care about full backrefs (poor unloved backrefs) */
10400                 if (back->full_backref || !back->is_data)
10401                         continue;
10402
10403                 dback = to_data_backref(back);
10404
10405                 /* We found this one, we don't need to do a lookup */
10406                 if (dback->found_ref)
10407                         continue;
10408
10409                 key.objectid = dback->root;
10410                 key.type = BTRFS_ROOT_ITEM_KEY;
10411                 key.offset = (u64)-1;
10412
10413                 root = btrfs_read_fs_root(info, &key);
10414
10415                 /* No root, definitely a bad ref, skip */
10416                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10417                         continue;
10418                 /* Other err, exit */
10419                 if (IS_ERR(root))
10420                         return PTR_ERR(root);
10421
10422                 key.objectid = dback->owner;
10423                 key.type = BTRFS_EXTENT_DATA_KEY;
10424                 key.offset = dback->offset;
10425                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10426                 if (ret) {
10427                         btrfs_release_path(path);
10428                         if (ret < 0)
10429                                 return ret;
10430                         /* Didn't find it, we can carry on */
10431                         ret = 0;
10432                         continue;
10433                 }
10434
10435                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10436                                     struct btrfs_file_extent_item);
10437                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10438                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10439                 btrfs_release_path(path);
10440                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10441                 if (cache) {
10442                         struct extent_record *tmp;
10443                         tmp = container_of(cache, struct extent_record, cache);
10444
10445                         /*
10446                          * If we found an extent record for the bytenr for this
10447                          * particular backref then we can't add it to our
10448                          * current extent record.  We only want to add backrefs
10449                          * that don't have a corresponding extent item in the
10450                          * extent tree since they likely belong to this record
10451                          * and we need to fix it if it doesn't match bytenrs.
10452                          */
10453                         if  (tmp->found_rec)
10454                                 continue;
10455                 }
10456
10457                 dback->found_ref += 1;
10458                 dback->disk_bytenr = bytenr;
10459                 dback->bytes = bytes;
10460
10461                 /*
10462                  * Set this so the verify backref code knows not to trust the
10463                  * values in this backref.
10464                  */
10465                 back->broken = 1;
10466         }
10467
10468         return 0;
10469 }
10470
10471 /*
10472  * Record orphan data ref into corresponding root.
10473  *
10474  * Return 0 if the extent item contains data ref and recorded.
10475  * Return 1 if the extent item contains no useful data ref
10476  *   On that case, it may contains only shared_dataref or metadata backref
10477  *   or the file extent exists(this should be handled by the extent bytenr
10478  *   recovery routine)
10479  * Return <0 if something goes wrong.
10480  */
10481 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10482                                       struct extent_record *rec)
10483 {
10484         struct btrfs_key key;
10485         struct btrfs_root *dest_root;
10486         struct extent_backref *back, *tmp;
10487         struct data_backref *dback;
10488         struct orphan_data_extent *orphan;
10489         struct btrfs_path path;
10490         int recorded_data_ref = 0;
10491         int ret = 0;
10492
10493         if (rec->metadata)
10494                 return 1;
10495         btrfs_init_path(&path);
10496         rbtree_postorder_for_each_entry_safe(back, tmp,
10497                                              &rec->backref_tree, node) {
10498                 if (back->full_backref || !back->is_data ||
10499                     !back->found_extent_tree)
10500                         continue;
10501                 dback = to_data_backref(back);
10502                 if (dback->found_ref)
10503                         continue;
10504                 key.objectid = dback->root;
10505                 key.type = BTRFS_ROOT_ITEM_KEY;
10506                 key.offset = (u64)-1;
10507
10508                 dest_root = btrfs_read_fs_root(fs_info, &key);
10509
10510                 /* For non-exist root we just skip it */
10511                 if (IS_ERR(dest_root) || !dest_root)
10512                         continue;
10513
10514                 key.objectid = dback->owner;
10515                 key.type = BTRFS_EXTENT_DATA_KEY;
10516                 key.offset = dback->offset;
10517
10518                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10519                 btrfs_release_path(&path);
10520                 /*
10521                  * For ret < 0, it's OK since the fs-tree may be corrupted,
10522                  * we need to record it for inode/file extent rebuild.
10523                  * For ret > 0, we record it only for file extent rebuild.
10524                  * For ret == 0, the file extent exists but only bytenr
10525                  * mismatch, let the original bytenr fix routine to handle,
10526                  * don't record it.
10527                  */
10528                 if (ret == 0)
10529                         continue;
10530                 ret = 0;
10531                 orphan = malloc(sizeof(*orphan));
10532                 if (!orphan) {
10533                         ret = -ENOMEM;
10534                         goto out;
10535                 }
10536                 INIT_LIST_HEAD(&orphan->list);
10537                 orphan->root = dback->root;
10538                 orphan->objectid = dback->owner;
10539                 orphan->offset = dback->offset;
10540                 orphan->disk_bytenr = rec->cache.start;
10541                 orphan->disk_len = rec->cache.size;
10542                 list_add(&dest_root->orphan_data_extents, &orphan->list);
10543                 recorded_data_ref = 1;
10544         }
10545 out:
10546         btrfs_release_path(&path);
10547         if (!ret)
10548                 return !recorded_data_ref;
10549         else
10550                 return ret;
10551 }
10552
10553 /*
10554  * when an incorrect extent item is found, this will delete
10555  * all of the existing entries for it and recreate them
10556  * based on what the tree scan found.
10557  */
10558 static int fixup_extent_refs(struct btrfs_fs_info *info,
10559                              struct cache_tree *extent_cache,
10560                              struct extent_record *rec)
10561 {
10562         struct btrfs_trans_handle *trans = NULL;
10563         int ret;
10564         struct btrfs_path path;
10565         struct cache_extent *cache;
10566         struct extent_backref *back, *tmp;
10567         int allocated = 0;
10568         u64 flags = 0;
10569
10570         if (rec->flag_block_full_backref)
10571                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10572
10573         btrfs_init_path(&path);
10574         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10575                 /*
10576                  * Sometimes the backrefs themselves are so broken they don't
10577                  * get attached to any meaningful rec, so first go back and
10578                  * check any of our backrefs that we couldn't find and throw
10579                  * them into the list if we find the backref so that
10580                  * verify_backrefs can figure out what to do.
10581                  */
10582                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10583                 if (ret < 0)
10584                         goto out;
10585         }
10586
10587         /* step one, make sure all of the backrefs agree */
10588         ret = verify_backrefs(info, &path, rec);
10589         if (ret < 0)
10590                 goto out;
10591
10592         trans = btrfs_start_transaction(info->extent_root, 1);
10593         if (IS_ERR(trans)) {
10594                 ret = PTR_ERR(trans);
10595                 goto out;
10596         }
10597
10598         /* step two, delete all the existing records */
10599         ret = delete_extent_records(trans, info->extent_root, &path,
10600                                     rec->start);
10601
10602         if (ret < 0)
10603                 goto out;
10604
10605         /* was this block corrupt?  If so, don't add references to it */
10606         cache = lookup_cache_extent(info->corrupt_blocks,
10607                                     rec->start, rec->max_size);
10608         if (cache) {
10609                 ret = 0;
10610                 goto out;
10611         }
10612
10613         /* step three, recreate all the refs we did find */
10614         rbtree_postorder_for_each_entry_safe(back, tmp,
10615                                              &rec->backref_tree, node) {
10616                 /*
10617                  * if we didn't find any references, don't create a
10618                  * new extent record
10619                  */
10620                 if (!back->found_ref)
10621                         continue;
10622
10623                 rec->bad_full_backref = 0;
10624                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10625                 allocated = 1;
10626
10627                 if (ret)
10628                         goto out;
10629         }
10630 out:
10631         if (trans) {
10632                 int err = btrfs_commit_transaction(trans, info->extent_root);
10633                 if (!ret)
10634                         ret = err;
10635         }
10636
10637         if (!ret)
10638                 fprintf(stderr, "Repaired extent references for %llu\n",
10639                                 (unsigned long long)rec->start);
10640
10641         btrfs_release_path(&path);
10642         return ret;
10643 }
10644
10645 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10646                               struct extent_record *rec)
10647 {
10648         struct btrfs_trans_handle *trans;
10649         struct btrfs_root *root = fs_info->extent_root;
10650         struct btrfs_path path;
10651         struct btrfs_extent_item *ei;
10652         struct btrfs_key key;
10653         u64 flags;
10654         int ret = 0;
10655
10656         key.objectid = rec->start;
10657         if (rec->metadata) {
10658                 key.type = BTRFS_METADATA_ITEM_KEY;
10659                 key.offset = rec->info_level;
10660         } else {
10661                 key.type = BTRFS_EXTENT_ITEM_KEY;
10662                 key.offset = rec->max_size;
10663         }
10664
10665         trans = btrfs_start_transaction(root, 0);
10666         if (IS_ERR(trans))
10667                 return PTR_ERR(trans);
10668
10669         btrfs_init_path(&path);
10670         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10671         if (ret < 0) {
10672                 btrfs_release_path(&path);
10673                 btrfs_commit_transaction(trans, root);
10674                 return ret;
10675         } else if (ret) {
10676                 fprintf(stderr, "Didn't find extent for %llu\n",
10677                         (unsigned long long)rec->start);
10678                 btrfs_release_path(&path);
10679                 btrfs_commit_transaction(trans, root);
10680                 return -ENOENT;
10681         }
10682
10683         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10684                             struct btrfs_extent_item);
10685         flags = btrfs_extent_flags(path.nodes[0], ei);
10686         if (rec->flag_block_full_backref) {
10687                 fprintf(stderr, "setting full backref on %llu\n",
10688                         (unsigned long long)key.objectid);
10689                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10690         } else {
10691                 fprintf(stderr, "clearing full backref on %llu\n",
10692                         (unsigned long long)key.objectid);
10693                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10694         }
10695         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10696         btrfs_mark_buffer_dirty(path.nodes[0]);
10697         btrfs_release_path(&path);
10698         ret = btrfs_commit_transaction(trans, root);
10699         if (!ret)
10700                 fprintf(stderr, "Repaired extent flags for %llu\n",
10701                                 (unsigned long long)rec->start);
10702
10703         return ret;
10704 }
10705
10706 /* right now we only prune from the extent allocation tree */
10707 static int prune_one_block(struct btrfs_trans_handle *trans,
10708                            struct btrfs_fs_info *info,
10709                            struct btrfs_corrupt_block *corrupt)
10710 {
10711         int ret;
10712         struct btrfs_path path;
10713         struct extent_buffer *eb;
10714         u64 found;
10715         int slot;
10716         int nritems;
10717         int level = corrupt->level + 1;
10718
10719         btrfs_init_path(&path);
10720 again:
10721         /* we want to stop at the parent to our busted block */
10722         path.lowest_level = level;
10723
10724         ret = btrfs_search_slot(trans, info->extent_root,
10725                                 &corrupt->key, &path, -1, 1);
10726
10727         if (ret < 0)
10728                 goto out;
10729
10730         eb = path.nodes[level];
10731         if (!eb) {
10732                 ret = -ENOENT;
10733                 goto out;
10734         }
10735
10736         /*
10737          * hopefully the search gave us the block we want to prune,
10738          * lets try that first
10739          */
10740         slot = path.slots[level];
10741         found =  btrfs_node_blockptr(eb, slot);
10742         if (found == corrupt->cache.start)
10743                 goto del_ptr;
10744
10745         nritems = btrfs_header_nritems(eb);
10746
10747         /* the search failed, lets scan this node and hope we find it */
10748         for (slot = 0; slot < nritems; slot++) {
10749                 found =  btrfs_node_blockptr(eb, slot);
10750                 if (found == corrupt->cache.start)
10751                         goto del_ptr;
10752         }
10753         /*
10754          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10755          * to this block
10756          */
10757         if (eb == info->extent_root->node) {
10758                 ret = -ENOENT;
10759                 goto out;
10760         } else {
10761                 level++;
10762                 btrfs_release_path(&path);
10763                 goto again;
10764         }
10765
10766 del_ptr:
10767         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10768         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10769
10770 out:
10771         btrfs_release_path(&path);
10772         return ret;
10773 }
10774
10775 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10776 {
10777         struct btrfs_trans_handle *trans = NULL;
10778         struct cache_extent *cache;
10779         struct btrfs_corrupt_block *corrupt;
10780
10781         while (1) {
10782                 cache = search_cache_extent(info->corrupt_blocks, 0);
10783                 if (!cache)
10784                         break;
10785                 if (!trans) {
10786                         trans = btrfs_start_transaction(info->extent_root, 1);
10787                         if (IS_ERR(trans))
10788                                 return PTR_ERR(trans);
10789                 }
10790                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10791                 prune_one_block(trans, info, corrupt);
10792                 remove_cache_extent(info->corrupt_blocks, cache);
10793         }
10794         if (trans)
10795                 return btrfs_commit_transaction(trans, info->extent_root);
10796         return 0;
10797 }
10798
10799 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10800 {
10801         struct btrfs_block_group_cache *cache;
10802         u64 start, end;
10803         int ret;
10804
10805         while (1) {
10806                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10807                                             &start, &end, EXTENT_DIRTY);
10808                 if (ret)
10809                         break;
10810                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10811         }
10812
10813         start = 0;
10814         while (1) {
10815                 cache = btrfs_lookup_first_block_group(fs_info, start);
10816                 if (!cache)
10817                         break;
10818                 if (cache->cached)
10819                         cache->cached = 0;
10820                 start = cache->key.objectid + cache->key.offset;
10821         }
10822 }
10823
10824 static int check_extent_refs(struct btrfs_root *root,
10825                              struct cache_tree *extent_cache)
10826 {
10827         struct extent_record *rec;
10828         struct cache_extent *cache;
10829         int ret = 0;
10830         int had_dups = 0;
10831
10832         if (repair) {
10833                 /*
10834                  * if we're doing a repair, we have to make sure
10835                  * we don't allocate from the problem extents.
10836                  * In the worst case, this will be all the
10837                  * extents in the FS
10838                  */
10839                 cache = search_cache_extent(extent_cache, 0);
10840                 while(cache) {
10841                         rec = container_of(cache, struct extent_record, cache);
10842                         set_extent_dirty(root->fs_info->excluded_extents,
10843                                          rec->start,
10844                                          rec->start + rec->max_size - 1);
10845                         cache = next_cache_extent(cache);
10846                 }
10847
10848                 /* pin down all the corrupted blocks too */
10849                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10850                 while(cache) {
10851                         set_extent_dirty(root->fs_info->excluded_extents,
10852                                          cache->start,
10853                                          cache->start + cache->size - 1);
10854                         cache = next_cache_extent(cache);
10855                 }
10856                 prune_corrupt_blocks(root->fs_info);
10857                 reset_cached_block_groups(root->fs_info);
10858         }
10859
10860         reset_cached_block_groups(root->fs_info);
10861
10862         /*
10863          * We need to delete any duplicate entries we find first otherwise we
10864          * could mess up the extent tree when we have backrefs that actually
10865          * belong to a different extent item and not the weird duplicate one.
10866          */
10867         while (repair && !list_empty(&duplicate_extents)) {
10868                 rec = to_extent_record(duplicate_extents.next);
10869                 list_del_init(&rec->list);
10870
10871                 /* Sometimes we can find a backref before we find an actual
10872                  * extent, so we need to process it a little bit to see if there
10873                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10874                  * if this is a backref screwup.  If we need to delete stuff
10875                  * process_duplicates() will return 0, otherwise it will return
10876                  * 1 and we
10877                  */
10878                 if (process_duplicates(extent_cache, rec))
10879                         continue;
10880                 ret = delete_duplicate_records(root, rec);
10881                 if (ret < 0)
10882                         return ret;
10883                 /*
10884                  * delete_duplicate_records will return the number of entries
10885                  * deleted, so if it's greater than 0 then we know we actually
10886                  * did something and we need to remove.
10887                  */
10888                 if (ret)
10889                         had_dups = 1;
10890         }
10891
10892         if (had_dups)
10893                 return -EAGAIN;
10894
10895         while(1) {
10896                 int cur_err = 0;
10897                 int fix = 0;
10898
10899                 cache = search_cache_extent(extent_cache, 0);
10900                 if (!cache)
10901                         break;
10902                 rec = container_of(cache, struct extent_record, cache);
10903                 if (rec->num_duplicates) {
10904                         fprintf(stderr, "extent item %llu has multiple extent "
10905                                 "items\n", (unsigned long long)rec->start);
10906                         cur_err = 1;
10907                 }
10908
10909                 if (rec->refs != rec->extent_item_refs) {
10910                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10911                                 (unsigned long long)rec->start,
10912                                 (unsigned long long)rec->nr);
10913                         fprintf(stderr, "extent item %llu, found %llu\n",
10914                                 (unsigned long long)rec->extent_item_refs,
10915                                 (unsigned long long)rec->refs);
10916                         ret = record_orphan_data_extents(root->fs_info, rec);
10917                         if (ret < 0)
10918                                 goto repair_abort;
10919                         fix = ret;
10920                         cur_err = 1;
10921                 }
10922                 if (all_backpointers_checked(rec, 1)) {
10923                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10924                                 (unsigned long long)rec->start,
10925                                 (unsigned long long)rec->nr);
10926                         fix = 1;
10927                         cur_err = 1;
10928                 }
10929                 if (!rec->owner_ref_checked) {
10930                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10931                                 (unsigned long long)rec->start,
10932                                 (unsigned long long)rec->nr);
10933                         fix = 1;
10934                         cur_err = 1;
10935                 }
10936
10937                 if (repair && fix) {
10938                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10939                         if (ret)
10940                                 goto repair_abort;
10941                 }
10942
10943
10944                 if (rec->bad_full_backref) {
10945                         fprintf(stderr, "bad full backref, on [%llu]\n",
10946                                 (unsigned long long)rec->start);
10947                         if (repair) {
10948                                 ret = fixup_extent_flags(root->fs_info, rec);
10949                                 if (ret)
10950                                         goto repair_abort;
10951                                 fix = 1;
10952                         }
10953                         cur_err = 1;
10954                 }
10955                 /*
10956                  * Although it's not a extent ref's problem, we reuse this
10957                  * routine for error reporting.
10958                  * No repair function yet.
10959                  */
10960                 if (rec->crossing_stripes) {
10961                         fprintf(stderr,
10962                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10963                                 rec->start, rec->start + rec->max_size);
10964                         cur_err = 1;
10965                 }
10966
10967                 if (rec->wrong_chunk_type) {
10968                         fprintf(stderr,
10969                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10970                                 rec->start, rec->start + rec->max_size);
10971                         cur_err = 1;
10972                 }
10973
10974                 remove_cache_extent(extent_cache, cache);
10975                 free_all_extent_backrefs(rec);
10976                 if (!init_extent_tree && repair && (!cur_err || fix))
10977                         clear_extent_dirty(root->fs_info->excluded_extents,
10978                                            rec->start,
10979                                            rec->start + rec->max_size - 1);
10980                 free(rec);
10981         }
10982 repair_abort:
10983         if (repair) {
10984                 if (ret && ret != -EAGAIN) {
10985                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10986                         exit(1);
10987                 } else if (!ret) {
10988                         struct btrfs_trans_handle *trans;
10989
10990                         root = root->fs_info->extent_root;
10991                         trans = btrfs_start_transaction(root, 1);
10992                         if (IS_ERR(trans)) {
10993                                 ret = PTR_ERR(trans);
10994                                 goto repair_abort;
10995                         }
10996
10997                         ret = btrfs_fix_block_accounting(trans, root);
10998                         if (ret)
10999                                 goto repair_abort;
11000                         ret = btrfs_commit_transaction(trans, root);
11001                         if (ret)
11002                                 goto repair_abort;
11003                 }
11004                 return ret;
11005         }
11006         return 0;
11007 }
11008
11009 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
11010 {
11011         u64 stripe_size;
11012
11013         if (type & BTRFS_BLOCK_GROUP_RAID0) {
11014                 stripe_size = length;
11015                 stripe_size /= num_stripes;
11016         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
11017                 stripe_size = length * 2;
11018                 stripe_size /= num_stripes;
11019         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
11020                 stripe_size = length;
11021                 stripe_size /= (num_stripes - 1);
11022         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
11023                 stripe_size = length;
11024                 stripe_size /= (num_stripes - 2);
11025         } else {
11026                 stripe_size = length;
11027         }
11028         return stripe_size;
11029 }
11030
11031 /*
11032  * Check the chunk with its block group/dev list ref:
11033  * Return 0 if all refs seems valid.
11034  * Return 1 if part of refs seems valid, need later check for rebuild ref
11035  * like missing block group and needs to search extent tree to rebuild them.
11036  * Return -1 if essential refs are missing and unable to rebuild.
11037  */
11038 static int check_chunk_refs(struct chunk_record *chunk_rec,
11039                             struct block_group_tree *block_group_cache,
11040                             struct device_extent_tree *dev_extent_cache,
11041                             int silent)
11042 {
11043         struct cache_extent *block_group_item;
11044         struct block_group_record *block_group_rec;
11045         struct cache_extent *dev_extent_item;
11046         struct device_extent_record *dev_extent_rec;
11047         u64 devid;
11048         u64 offset;
11049         u64 length;
11050         int metadump_v2 = 0;
11051         int i;
11052         int ret = 0;
11053
11054         block_group_item = lookup_cache_extent(&block_group_cache->tree,
11055                                                chunk_rec->offset,
11056                                                chunk_rec->length);
11057         if (block_group_item) {
11058                 block_group_rec = container_of(block_group_item,
11059                                                struct block_group_record,
11060                                                cache);
11061                 if (chunk_rec->length != block_group_rec->offset ||
11062                     chunk_rec->offset != block_group_rec->objectid ||
11063                     (!metadump_v2 &&
11064                      chunk_rec->type_flags != block_group_rec->flags)) {
11065                         if (!silent)
11066                                 fprintf(stderr,
11067                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
11068                                         chunk_rec->objectid,
11069                                         chunk_rec->type,
11070                                         chunk_rec->offset,
11071                                         chunk_rec->length,
11072                                         chunk_rec->offset,
11073                                         chunk_rec->type_flags,
11074                                         block_group_rec->objectid,
11075                                         block_group_rec->type,
11076                                         block_group_rec->offset,
11077                                         block_group_rec->offset,
11078                                         block_group_rec->objectid,
11079                                         block_group_rec->flags);
11080                         ret = -1;
11081                 } else {
11082                         list_del_init(&block_group_rec->list);
11083                         chunk_rec->bg_rec = block_group_rec;
11084                 }
11085         } else {
11086                 if (!silent)
11087                         fprintf(stderr,
11088                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
11089                                 chunk_rec->objectid,
11090                                 chunk_rec->type,
11091                                 chunk_rec->offset,
11092                                 chunk_rec->length,
11093                                 chunk_rec->offset,
11094                                 chunk_rec->type_flags);
11095                 ret = 1;
11096         }
11097
11098         if (metadump_v2)
11099                 return ret;
11100
11101         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
11102                                     chunk_rec->num_stripes);
11103         for (i = 0; i < chunk_rec->num_stripes; ++i) {
11104                 devid = chunk_rec->stripes[i].devid;
11105                 offset = chunk_rec->stripes[i].offset;
11106                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
11107                                                        devid, offset, length);
11108                 if (dev_extent_item) {
11109                         dev_extent_rec = container_of(dev_extent_item,
11110                                                 struct device_extent_record,
11111                                                 cache);
11112                         if (dev_extent_rec->objectid != devid ||
11113                             dev_extent_rec->offset != offset ||
11114                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
11115                             dev_extent_rec->length != length) {
11116                                 if (!silent)
11117                                         fprintf(stderr,
11118                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
11119                                                 chunk_rec->objectid,
11120                                                 chunk_rec->type,
11121                                                 chunk_rec->offset,
11122                                                 chunk_rec->stripes[i].devid,
11123                                                 chunk_rec->stripes[i].offset,
11124                                                 dev_extent_rec->objectid,
11125                                                 dev_extent_rec->offset,
11126                                                 dev_extent_rec->length);
11127                                 ret = -1;
11128                         } else {
11129                                 list_move(&dev_extent_rec->chunk_list,
11130                                           &chunk_rec->dextents);
11131                         }
11132                 } else {
11133                         if (!silent)
11134                                 fprintf(stderr,
11135                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
11136                                         chunk_rec->objectid,
11137                                         chunk_rec->type,
11138                                         chunk_rec->offset,
11139                                         chunk_rec->stripes[i].devid,
11140                                         chunk_rec->stripes[i].offset);
11141                         ret = -1;
11142                 }
11143         }
11144         return ret;
11145 }
11146
11147 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
11148 int check_chunks(struct cache_tree *chunk_cache,
11149                  struct block_group_tree *block_group_cache,
11150                  struct device_extent_tree *dev_extent_cache,
11151                  struct list_head *good, struct list_head *bad,
11152                  struct list_head *rebuild, int silent)
11153 {
11154         struct cache_extent *chunk_item;
11155         struct chunk_record *chunk_rec;
11156         struct block_group_record *bg_rec;
11157         struct device_extent_record *dext_rec;
11158         int err;
11159         int ret = 0;
11160
11161         chunk_item = first_cache_extent(chunk_cache);
11162         while (chunk_item) {
11163                 chunk_rec = container_of(chunk_item, struct chunk_record,
11164                                          cache);
11165                 err = check_chunk_refs(chunk_rec, block_group_cache,
11166                                        dev_extent_cache, silent);
11167                 if (err < 0)
11168                         ret = err;
11169                 if (err == 0 && good)
11170                         list_add_tail(&chunk_rec->list, good);
11171                 if (err > 0 && rebuild)
11172                         list_add_tail(&chunk_rec->list, rebuild);
11173                 if (err < 0 && bad)
11174                         list_add_tail(&chunk_rec->list, bad);
11175                 chunk_item = next_cache_extent(chunk_item);
11176         }
11177
11178         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
11179                 if (!silent)
11180                         fprintf(stderr,
11181                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
11182                                 bg_rec->objectid,
11183                                 bg_rec->offset,
11184                                 bg_rec->flags);
11185                 if (!ret)
11186                         ret = 1;
11187         }
11188
11189         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
11190                             chunk_list) {
11191                 if (!silent)
11192                         fprintf(stderr,
11193                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
11194                                 dext_rec->objectid,
11195                                 dext_rec->offset,
11196                                 dext_rec->length);
11197                 if (!ret)
11198                         ret = 1;
11199         }
11200         return ret;
11201 }
11202
11203
11204 static int check_device_used(struct device_record *dev_rec,
11205                              struct device_extent_tree *dext_cache)
11206 {
11207         struct cache_extent *cache;
11208         struct device_extent_record *dev_extent_rec;
11209         u64 total_byte = 0;
11210
11211         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
11212         while (cache) {
11213                 dev_extent_rec = container_of(cache,
11214                                               struct device_extent_record,
11215                                               cache);
11216                 if (dev_extent_rec->objectid != dev_rec->devid)
11217                         break;
11218
11219                 list_del_init(&dev_extent_rec->device_list);
11220                 total_byte += dev_extent_rec->length;
11221                 cache = next_cache_extent(cache);
11222         }
11223
11224         if (total_byte != dev_rec->byte_used) {
11225                 fprintf(stderr,
11226                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
11227                         total_byte, dev_rec->byte_used, dev_rec->objectid,
11228                         dev_rec->type, dev_rec->offset);
11229                 return -1;
11230         } else {
11231                 return 0;
11232         }
11233 }
11234
11235 /* check btrfs_dev_item -> btrfs_dev_extent */
11236 static int check_devices(struct rb_root *dev_cache,
11237                          struct device_extent_tree *dev_extent_cache)
11238 {
11239         struct rb_node *dev_node;
11240         struct device_record *dev_rec;
11241         struct device_extent_record *dext_rec;
11242         int err;
11243         int ret = 0;
11244
11245         dev_node = rb_first(dev_cache);
11246         while (dev_node) {
11247                 dev_rec = container_of(dev_node, struct device_record, node);
11248                 err = check_device_used(dev_rec, dev_extent_cache);
11249                 if (err)
11250                         ret = err;
11251
11252                 dev_node = rb_next(dev_node);
11253         }
11254         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11255                             device_list) {
11256                 fprintf(stderr,
11257                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11258                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
11259                 if (!ret)
11260                         ret = 1;
11261         }
11262         return ret;
11263 }
11264
11265 static int add_root_item_to_list(struct list_head *head,
11266                                   u64 objectid, u64 bytenr, u64 last_snapshot,
11267                                   u8 level, u8 drop_level,
11268                                   struct btrfs_key *drop_key)
11269 {
11270
11271         struct root_item_record *ri_rec;
11272         ri_rec = malloc(sizeof(*ri_rec));
11273         if (!ri_rec)
11274                 return -ENOMEM;
11275         ri_rec->bytenr = bytenr;
11276         ri_rec->objectid = objectid;
11277         ri_rec->level = level;
11278         ri_rec->drop_level = drop_level;
11279         ri_rec->last_snapshot = last_snapshot;
11280         if (drop_key)
11281                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11282         list_add_tail(&ri_rec->list, head);
11283
11284         return 0;
11285 }
11286
11287 static void free_root_item_list(struct list_head *list)
11288 {
11289         struct root_item_record *ri_rec;
11290
11291         while (!list_empty(list)) {
11292                 ri_rec = list_first_entry(list, struct root_item_record,
11293                                           list);
11294                 list_del_init(&ri_rec->list);
11295                 free(ri_rec);
11296         }
11297 }
11298
11299 static int deal_root_from_list(struct list_head *list,
11300                                struct btrfs_root *root,
11301                                struct block_info *bits,
11302                                int bits_nr,
11303                                struct cache_tree *pending,
11304                                struct cache_tree *seen,
11305                                struct cache_tree *reada,
11306                                struct cache_tree *nodes,
11307                                struct cache_tree *extent_cache,
11308                                struct cache_tree *chunk_cache,
11309                                struct rb_root *dev_cache,
11310                                struct block_group_tree *block_group_cache,
11311                                struct device_extent_tree *dev_extent_cache)
11312 {
11313         int ret = 0;
11314         u64 last;
11315
11316         while (!list_empty(list)) {
11317                 struct root_item_record *rec;
11318                 struct extent_buffer *buf;
11319                 rec = list_entry(list->next,
11320                                  struct root_item_record, list);
11321                 last = 0;
11322                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11323                 if (!extent_buffer_uptodate(buf)) {
11324                         free_extent_buffer(buf);
11325                         ret = -EIO;
11326                         break;
11327                 }
11328                 ret = add_root_to_pending(buf, extent_cache, pending,
11329                                     seen, nodes, rec->objectid);
11330                 if (ret < 0)
11331                         break;
11332                 /*
11333                  * To rebuild extent tree, we need deal with snapshot
11334                  * one by one, otherwise we deal with node firstly which
11335                  * can maximize readahead.
11336                  */
11337                 while (1) {
11338                         ret = run_next_block(root, bits, bits_nr, &last,
11339                                              pending, seen, reada, nodes,
11340                                              extent_cache, chunk_cache,
11341                                              dev_cache, block_group_cache,
11342                                              dev_extent_cache, rec);
11343                         if (ret != 0)
11344                                 break;
11345                 }
11346                 free_extent_buffer(buf);
11347                 list_del(&rec->list);
11348                 free(rec);
11349                 if (ret < 0)
11350                         break;
11351         }
11352         while (ret >= 0) {
11353                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11354                                      reada, nodes, extent_cache, chunk_cache,
11355                                      dev_cache, block_group_cache,
11356                                      dev_extent_cache, NULL);
11357                 if (ret != 0) {
11358                         if (ret > 0)
11359                                 ret = 0;
11360                         break;
11361                 }
11362         }
11363         return ret;
11364 }
11365
11366 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11367 {
11368         struct rb_root dev_cache;
11369         struct cache_tree chunk_cache;
11370         struct block_group_tree block_group_cache;
11371         struct device_extent_tree dev_extent_cache;
11372         struct cache_tree extent_cache;
11373         struct cache_tree seen;
11374         struct cache_tree pending;
11375         struct cache_tree reada;
11376         struct cache_tree nodes;
11377         struct extent_io_tree excluded_extents;
11378         struct cache_tree corrupt_blocks;
11379         struct btrfs_path path;
11380         struct btrfs_key key;
11381         struct btrfs_key found_key;
11382         int ret, err = 0;
11383         struct block_info *bits;
11384         int bits_nr;
11385         struct extent_buffer *leaf;
11386         int slot;
11387         struct btrfs_root_item ri;
11388         struct list_head dropping_trees;
11389         struct list_head normal_trees;
11390         struct btrfs_root *root1;
11391         struct btrfs_root *root;
11392         u64 objectid;
11393         u8 level;
11394
11395         root = fs_info->fs_root;
11396         dev_cache = RB_ROOT;
11397         cache_tree_init(&chunk_cache);
11398         block_group_tree_init(&block_group_cache);
11399         device_extent_tree_init(&dev_extent_cache);
11400
11401         cache_tree_init(&extent_cache);
11402         cache_tree_init(&seen);
11403         cache_tree_init(&pending);
11404         cache_tree_init(&nodes);
11405         cache_tree_init(&reada);
11406         cache_tree_init(&corrupt_blocks);
11407         extent_io_tree_init(&excluded_extents);
11408         INIT_LIST_HEAD(&dropping_trees);
11409         INIT_LIST_HEAD(&normal_trees);
11410
11411         if (repair) {
11412                 fs_info->excluded_extents = &excluded_extents;
11413                 fs_info->fsck_extent_cache = &extent_cache;
11414                 fs_info->free_extent_hook = free_extent_hook;
11415                 fs_info->corrupt_blocks = &corrupt_blocks;
11416         }
11417
11418         bits_nr = 1024;
11419         bits = malloc(bits_nr * sizeof(struct block_info));
11420         if (!bits) {
11421                 perror("malloc");
11422                 exit(1);
11423         }
11424
11425         if (ctx.progress_enabled) {
11426                 ctx.tp = TASK_EXTENTS;
11427                 task_start(ctx.info);
11428         }
11429
11430 again:
11431         root1 = fs_info->tree_root;
11432         level = btrfs_header_level(root1->node);
11433         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11434                                     root1->node->start, 0, level, 0, NULL);
11435         if (ret < 0)
11436                 goto out;
11437         root1 = fs_info->chunk_root;
11438         level = btrfs_header_level(root1->node);
11439         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11440                                     root1->node->start, 0, level, 0, NULL);
11441         if (ret < 0)
11442                 goto out;
11443         btrfs_init_path(&path);
11444         key.offset = 0;
11445         key.objectid = 0;
11446         key.type = BTRFS_ROOT_ITEM_KEY;
11447         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11448         if (ret < 0)
11449                 goto out;
11450         while(1) {
11451                 leaf = path.nodes[0];
11452                 slot = path.slots[0];
11453                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11454                         ret = btrfs_next_leaf(root, &path);
11455                         if (ret != 0)
11456                                 break;
11457                         leaf = path.nodes[0];
11458                         slot = path.slots[0];
11459                 }
11460                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11461                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11462                         unsigned long offset;
11463                         u64 last_snapshot;
11464
11465                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11466                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11467                         last_snapshot = btrfs_root_last_snapshot(&ri);
11468                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11469                                 level = btrfs_root_level(&ri);
11470                                 ret = add_root_item_to_list(&normal_trees,
11471                                                 found_key.objectid,
11472                                                 btrfs_root_bytenr(&ri),
11473                                                 last_snapshot, level,
11474                                                 0, NULL);
11475                                 if (ret < 0)
11476                                         goto out;
11477                         } else {
11478                                 level = btrfs_root_level(&ri);
11479                                 objectid = found_key.objectid;
11480                                 btrfs_disk_key_to_cpu(&found_key,
11481                                                       &ri.drop_progress);
11482                                 ret = add_root_item_to_list(&dropping_trees,
11483                                                 objectid,
11484                                                 btrfs_root_bytenr(&ri),
11485                                                 last_snapshot, level,
11486                                                 ri.drop_level, &found_key);
11487                                 if (ret < 0)
11488                                         goto out;
11489                         }
11490                 }
11491                 path.slots[0]++;
11492         }
11493         btrfs_release_path(&path);
11494
11495         /*
11496          * check_block can return -EAGAIN if it fixes something, please keep
11497          * this in mind when dealing with return values from these functions, if
11498          * we get -EAGAIN we want to fall through and restart the loop.
11499          */
11500         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11501                                   &seen, &reada, &nodes, &extent_cache,
11502                                   &chunk_cache, &dev_cache, &block_group_cache,
11503                                   &dev_extent_cache);
11504         if (ret < 0) {
11505                 if (ret == -EAGAIN)
11506                         goto loop;
11507                 goto out;
11508         }
11509         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11510                                   &pending, &seen, &reada, &nodes,
11511                                   &extent_cache, &chunk_cache, &dev_cache,
11512                                   &block_group_cache, &dev_extent_cache);
11513         if (ret < 0) {
11514                 if (ret == -EAGAIN)
11515                         goto loop;
11516                 goto out;
11517         }
11518
11519         ret = check_chunks(&chunk_cache, &block_group_cache,
11520                            &dev_extent_cache, NULL, NULL, NULL, 0);
11521         if (ret) {
11522                 if (ret == -EAGAIN)
11523                         goto loop;
11524                 err = ret;
11525         }
11526
11527         ret = check_extent_refs(root, &extent_cache);
11528         if (ret < 0) {
11529                 if (ret == -EAGAIN)
11530                         goto loop;
11531                 goto out;
11532         }
11533
11534         ret = check_devices(&dev_cache, &dev_extent_cache);
11535         if (ret && err)
11536                 ret = err;
11537
11538 out:
11539         task_stop(ctx.info);
11540         if (repair) {
11541                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11542                 extent_io_tree_cleanup(&excluded_extents);
11543                 fs_info->fsck_extent_cache = NULL;
11544                 fs_info->free_extent_hook = NULL;
11545                 fs_info->corrupt_blocks = NULL;
11546                 fs_info->excluded_extents = NULL;
11547         }
11548         free(bits);
11549         free_chunk_cache_tree(&chunk_cache);
11550         free_device_cache_tree(&dev_cache);
11551         free_block_group_tree(&block_group_cache);
11552         free_device_extent_tree(&dev_extent_cache);
11553         free_extent_cache_tree(&seen);
11554         free_extent_cache_tree(&pending);
11555         free_extent_cache_tree(&reada);
11556         free_extent_cache_tree(&nodes);
11557         free_root_item_list(&normal_trees);
11558         free_root_item_list(&dropping_trees);
11559         return ret;
11560 loop:
11561         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11562         free_extent_cache_tree(&seen);
11563         free_extent_cache_tree(&pending);
11564         free_extent_cache_tree(&reada);
11565         free_extent_cache_tree(&nodes);
11566         free_chunk_cache_tree(&chunk_cache);
11567         free_block_group_tree(&block_group_cache);
11568         free_device_cache_tree(&dev_cache);
11569         free_device_extent_tree(&dev_extent_cache);
11570         free_extent_record_cache(&extent_cache);
11571         free_root_item_list(&normal_trees);
11572         free_root_item_list(&dropping_trees);
11573         extent_io_tree_cleanup(&excluded_extents);
11574         goto again;
11575 }
11576
11577 /*
11578  * Check backrefs of a tree block given by @bytenr or @eb.
11579  *
11580  * @root:       the root containing the @bytenr or @eb
11581  * @eb:         tree block extent buffer, can be NULL
11582  * @bytenr:     bytenr of the tree block to search
11583  * @level:      tree level of the tree block
11584  * @owner:      owner of the tree block
11585  *
11586  * Return >0 for any error found and output error message
11587  * Return 0 for no error found
11588  */
11589 static int check_tree_block_ref(struct btrfs_root *root,
11590                                 struct extent_buffer *eb, u64 bytenr,
11591                                 int level, u64 owner, struct node_refs *nrefs)
11592 {
11593         struct btrfs_key key;
11594         struct btrfs_root *extent_root = root->fs_info->extent_root;
11595         struct btrfs_path path;
11596         struct btrfs_extent_item *ei;
11597         struct btrfs_extent_inline_ref *iref;
11598         struct extent_buffer *leaf;
11599         unsigned long end;
11600         unsigned long ptr;
11601         int slot;
11602         int skinny_level;
11603         int root_level = btrfs_header_level(root->node);
11604         int type;
11605         u32 nodesize = root->fs_info->nodesize;
11606         u32 item_size;
11607         u64 offset;
11608         int tree_reloc_root = 0;
11609         int found_ref = 0;
11610         int err = 0;
11611         int ret;
11612         int strict = 1;
11613         int parent = 0;
11614
11615         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
11616             btrfs_header_bytenr(root->node) == bytenr)
11617                 tree_reloc_root = 1;
11618         btrfs_init_path(&path);
11619         key.objectid = bytenr;
11620         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11621                 key.type = BTRFS_METADATA_ITEM_KEY;
11622         else
11623                 key.type = BTRFS_EXTENT_ITEM_KEY;
11624         key.offset = (u64)-1;
11625
11626         /* Search for the backref in extent tree */
11627         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11628         if (ret < 0) {
11629                 err |= BACKREF_MISSING;
11630                 goto out;
11631         }
11632         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11633         if (ret) {
11634                 err |= BACKREF_MISSING;
11635                 goto out;
11636         }
11637
11638         leaf = path.nodes[0];
11639         slot = path.slots[0];
11640         btrfs_item_key_to_cpu(leaf, &key, slot);
11641
11642         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11643
11644         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11645                 skinny_level = (int)key.offset;
11646                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11647         } else {
11648                 struct btrfs_tree_block_info *info;
11649
11650                 info = (struct btrfs_tree_block_info *)(ei + 1);
11651                 skinny_level = btrfs_tree_block_level(leaf, info);
11652                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11653         }
11654
11655
11656         if (eb) {
11657                 u64 header_gen;
11658                 u64 extent_gen;
11659
11660                 /*
11661                  * Due to the feature of shared tree blocks, if the upper node
11662                  * is a fs root or shared node, the extent of checked node may
11663                  * not be updated until the next CoW.
11664                  */
11665                 if (nrefs)
11666                         strict = should_check_extent_strictly(root, nrefs,
11667                                         level);
11668                 if (!(btrfs_extent_flags(leaf, ei) &
11669                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11670                         error(
11671                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11672                                 key.objectid, nodesize,
11673                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11674                         err = BACKREF_MISMATCH;
11675                 }
11676                 header_gen = btrfs_header_generation(eb);
11677                 extent_gen = btrfs_extent_generation(leaf, ei);
11678                 if (header_gen != extent_gen) {
11679                         error(
11680         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11681                                 key.objectid, nodesize, header_gen,
11682                                 extent_gen);
11683                         err = BACKREF_MISMATCH;
11684                 }
11685                 if (level != skinny_level) {
11686                         error(
11687                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11688                                 key.objectid, nodesize, level, skinny_level);
11689                         err = BACKREF_MISMATCH;
11690                 }
11691                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11692                         error(
11693                         "extent[%llu %u] is referred by other roots than %llu",
11694                                 key.objectid, nodesize, root->objectid);
11695                         err = BACKREF_MISMATCH;
11696                 }
11697         }
11698
11699         /*
11700          * Iterate the extent/metadata item to find the exact backref
11701          */
11702         item_size = btrfs_item_size_nr(leaf, slot);
11703         ptr = (unsigned long)iref;
11704         end = (unsigned long)ei + item_size;
11705
11706         while (ptr < end) {
11707                 iref = (struct btrfs_extent_inline_ref *)ptr;
11708                 type = btrfs_extent_inline_ref_type(leaf, iref);
11709                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11710
11711                 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11712                         if (offset == root->objectid)
11713                                 found_ref = 1;
11714                         if (!strict && owner == offset)
11715                                 found_ref = 1;
11716                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11717                         /*
11718                          * Backref of tree reloc root points to itself, no need
11719                          * to check backref any more.
11720                          */
11721                         if (tree_reloc_root) {
11722                                 found_ref = 1;
11723                         } else {
11724                                 /*
11725                                  * Check if the backref points to valid
11726                                  * referencer
11727                                  */
11728                                 found_ref = !check_tree_block_ref( root, NULL,
11729                                                 offset, level + 1, owner,
11730                                                 NULL);
11731                         }
11732                 }
11733
11734                 if (found_ref)
11735                         break;
11736                 ptr += btrfs_extent_inline_ref_size(type);
11737         }
11738
11739         /*
11740          * Inlined extent item doesn't have what we need, check
11741          * TREE_BLOCK_REF_KEY
11742          */
11743         if (!found_ref) {
11744                 btrfs_release_path(&path);
11745                 key.objectid = bytenr;
11746                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11747                 key.offset = root->objectid;
11748
11749                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11750                 if (!ret)
11751                         found_ref = 1;
11752         }
11753         if (!found_ref)
11754                 err |= BACKREF_MISSING;
11755 out:
11756         btrfs_release_path(&path);
11757         if (nrefs && strict &&
11758             level < root_level && nrefs->full_backref[level + 1])
11759                 parent = nrefs->bytenr[level + 1];
11760         if (eb && (err & BACKREF_MISSING))
11761                 error(
11762         "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11763                       bytenr, nodesize, owner, level,
11764                       parent ? "parent" : "root",
11765                       parent ? parent : root->objectid);
11766         return err;
11767 }
11768
11769 /*
11770  * If @err contains BACKREF_MISSING then add extent of the
11771  * file_extent_data_item.
11772  *
11773  * Returns error bits after reapir.
11774  */
11775 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11776                                    struct btrfs_root *root,
11777                                    struct btrfs_path *pathp,
11778                                    struct node_refs *nrefs,
11779                                    int err)
11780 {
11781         struct btrfs_file_extent_item *fi;
11782         struct btrfs_key fi_key;
11783         struct btrfs_key key;
11784         struct btrfs_extent_item *ei;
11785         struct btrfs_path path;
11786         struct btrfs_root *extent_root = root->fs_info->extent_root;
11787         struct extent_buffer *eb;
11788         u64 size;
11789         u64 disk_bytenr;
11790         u64 num_bytes;
11791         u64 parent;
11792         u64 offset;
11793         u64 extent_offset;
11794         u64 file_offset;
11795         int generation;
11796         int slot;
11797         int ret = 0;
11798
11799         eb = pathp->nodes[0];
11800         slot = pathp->slots[0];
11801         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11802         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11803
11804         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11805             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11806                 return err;
11807
11808         file_offset = fi_key.offset;
11809         generation = btrfs_file_extent_generation(eb, fi);
11810         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11811         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11812         extent_offset = btrfs_file_extent_offset(eb, fi);
11813         offset = file_offset - extent_offset;
11814
11815         /* now repair only adds backref */
11816         if ((err & BACKREF_MISSING) == 0)
11817                 return err;
11818
11819         /* search extent item */
11820         key.objectid = disk_bytenr;
11821         key.type = BTRFS_EXTENT_ITEM_KEY;
11822         key.offset = num_bytes;
11823
11824         btrfs_init_path(&path);
11825         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11826         if (ret < 0) {
11827                 ret = -EIO;
11828                 goto out;
11829         }
11830
11831         /* insert an extent item */
11832         if (ret > 0) {
11833                 key.objectid = disk_bytenr;
11834                 key.type = BTRFS_EXTENT_ITEM_KEY;
11835                 key.offset = num_bytes;
11836                 size = sizeof(*ei);
11837
11838                 btrfs_release_path(&path);
11839                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11840                                               size);
11841                 if (ret)
11842                         goto out;
11843                 eb = path.nodes[0];
11844                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11845
11846                 btrfs_set_extent_refs(eb, ei, 0);
11847                 btrfs_set_extent_generation(eb, ei, generation);
11848                 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11849
11850                 btrfs_mark_buffer_dirty(eb);
11851                 ret = btrfs_update_block_group(trans, extent_root, disk_bytenr,
11852                                                num_bytes, 1, 0);
11853                 btrfs_release_path(&path);
11854         }
11855
11856         if (nrefs->full_backref[0])
11857                 parent = btrfs_header_bytenr(eb);
11858         else
11859                 parent = 0;
11860
11861         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11862                                    root->objectid,
11863                    parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11864                                    offset);
11865         if (ret) {
11866                 error(
11867                 "failed to increase extent data backref[%llu %llu] root %llu",
11868                       disk_bytenr, num_bytes, root->objectid);
11869                 goto out;
11870         } else {
11871                 printf("Add one extent data backref [%llu %llu]\n",
11872                        disk_bytenr, num_bytes);
11873         }
11874
11875         err &= ~BACKREF_MISSING;
11876 out:
11877         if (ret)
11878                 error("can't repair root %llu extent data item[%llu %llu]",
11879                       root->objectid, disk_bytenr, num_bytes);
11880         return err;
11881 }
11882
11883 /*
11884  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11885  *
11886  * Return >0 any error found and output error message
11887  * Return 0 for no error found
11888  */
11889 static int check_extent_data_item(struct btrfs_root *root,
11890                                   struct btrfs_path *pathp,
11891                                   struct node_refs *nrefs,  int account_bytes)
11892 {
11893         struct btrfs_file_extent_item *fi;
11894         struct extent_buffer *eb = pathp->nodes[0];
11895         struct btrfs_path path;
11896         struct btrfs_root *extent_root = root->fs_info->extent_root;
11897         struct btrfs_key fi_key;
11898         struct btrfs_key dbref_key;
11899         struct extent_buffer *leaf;
11900         struct btrfs_extent_item *ei;
11901         struct btrfs_extent_inline_ref *iref;
11902         struct btrfs_extent_data_ref *dref;
11903         u64 owner;
11904         u64 disk_bytenr;
11905         u64 disk_num_bytes;
11906         u64 extent_num_bytes;
11907         u64 extent_flags;
11908         u32 item_size;
11909         unsigned long end;
11910         unsigned long ptr;
11911         int type;
11912         u64 ref_root;
11913         int found_dbackref = 0;
11914         int slot = pathp->slots[0];
11915         int err = 0;
11916         int ret;
11917         int strict;
11918
11919         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11920         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11921
11922         /* Nothing to check for hole and inline data extents */
11923         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11924             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11925                 return 0;
11926
11927         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11928         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11929         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11930
11931         /* Check unaligned disk_num_bytes and num_bytes */
11932         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11933                 error(
11934 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11935                         fi_key.objectid, fi_key.offset, disk_num_bytes,
11936                         root->fs_info->sectorsize);
11937                 err |= BYTES_UNALIGNED;
11938         } else if (account_bytes) {
11939                 data_bytes_allocated += disk_num_bytes;
11940         }
11941         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11942                 error(
11943 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11944                         fi_key.objectid, fi_key.offset, extent_num_bytes,
11945                         root->fs_info->sectorsize);
11946                 err |= BYTES_UNALIGNED;
11947         } else if (account_bytes) {
11948                 data_bytes_referenced += extent_num_bytes;
11949         }
11950         owner = btrfs_header_owner(eb);
11951
11952         /* Check the extent item of the file extent in extent tree */
11953         btrfs_init_path(&path);
11954         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11955         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11956         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11957
11958         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11959         if (ret)
11960                 goto out;
11961
11962         leaf = path.nodes[0];
11963         slot = path.slots[0];
11964         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11965
11966         extent_flags = btrfs_extent_flags(leaf, ei);
11967
11968         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11969                 error(
11970                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11971                     disk_bytenr, disk_num_bytes,
11972                     BTRFS_EXTENT_FLAG_DATA);
11973                 err |= BACKREF_MISMATCH;
11974         }
11975
11976         /* Check data backref inside that extent item */
11977         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11978         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11979         ptr = (unsigned long)iref;
11980         end = (unsigned long)ei + item_size;
11981         strict = should_check_extent_strictly(root, nrefs, -1);
11982
11983         while (ptr < end) {
11984                 iref = (struct btrfs_extent_inline_ref *)ptr;
11985                 type = btrfs_extent_inline_ref_type(leaf, iref);
11986                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11987
11988                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11989                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
11990                         if (ref_root == root->objectid)
11991                                 found_dbackref = 1;
11992                         else if (!strict && owner == ref_root)
11993                                 found_dbackref = 1;
11994                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11995                         found_dbackref = !check_tree_block_ref(root, NULL,
11996                                 btrfs_extent_inline_ref_offset(leaf, iref),
11997                                 0, owner, NULL);
11998                 }
11999
12000                 if (found_dbackref)
12001                         break;
12002                 ptr += btrfs_extent_inline_ref_size(type);
12003         }
12004
12005         if (!found_dbackref) {
12006                 btrfs_release_path(&path);
12007
12008                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
12009                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12010                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
12011                 dbref_key.offset = hash_extent_data_ref(root->objectid,
12012                                 fi_key.objectid, fi_key.offset);
12013
12014                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12015                                         &dbref_key, &path, 0, 0);
12016                 if (!ret) {
12017                         found_dbackref = 1;
12018                         goto out;
12019                 }
12020
12021                 btrfs_release_path(&path);
12022
12023                 /*
12024                  * Neither inlined nor EXTENT_DATA_REF found, try
12025                  * SHARED_DATA_REF as last chance.
12026                  */
12027                 dbref_key.objectid = disk_bytenr;
12028                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
12029                 dbref_key.offset = eb->start;
12030
12031                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12032                                         &dbref_key, &path, 0, 0);
12033                 if (!ret) {
12034                         found_dbackref = 1;
12035                         goto out;
12036                 }
12037         }
12038
12039 out:
12040         if (!found_dbackref)
12041                 err |= BACKREF_MISSING;
12042         btrfs_release_path(&path);
12043         if (err & BACKREF_MISSING) {
12044                 error("data extent[%llu %llu] backref lost",
12045                       disk_bytenr, disk_num_bytes);
12046         }
12047         return err;
12048 }
12049
12050 /*
12051  * Get real tree block level for the case like shared block
12052  * Return >= 0 as tree level
12053  * Return <0 for error
12054  */
12055 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
12056 {
12057         struct extent_buffer *eb;
12058         struct btrfs_path path;
12059         struct btrfs_key key;
12060         struct btrfs_extent_item *ei;
12061         u64 flags;
12062         u64 transid;
12063         u8 backref_level;
12064         u8 header_level;
12065         int ret;
12066
12067         /* Search extent tree for extent generation and level */
12068         key.objectid = bytenr;
12069         key.type = BTRFS_METADATA_ITEM_KEY;
12070         key.offset = (u64)-1;
12071
12072         btrfs_init_path(&path);
12073         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
12074         if (ret < 0)
12075                 goto release_out;
12076         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
12077         if (ret < 0)
12078                 goto release_out;
12079         if (ret > 0) {
12080                 ret = -ENOENT;
12081                 goto release_out;
12082         }
12083
12084         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12085         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
12086                             struct btrfs_extent_item);
12087         flags = btrfs_extent_flags(path.nodes[0], ei);
12088         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
12089                 ret = -ENOENT;
12090                 goto release_out;
12091         }
12092
12093         /* Get transid for later read_tree_block() check */
12094         transid = btrfs_extent_generation(path.nodes[0], ei);
12095
12096         /* Get backref level as one source */
12097         if (key.type == BTRFS_METADATA_ITEM_KEY) {
12098                 backref_level = key.offset;
12099         } else {
12100                 struct btrfs_tree_block_info *info;
12101
12102                 info = (struct btrfs_tree_block_info *)(ei + 1);
12103                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
12104         }
12105         btrfs_release_path(&path);
12106
12107         /* Get level from tree block as an alternative source */
12108         eb = read_tree_block(fs_info, bytenr, transid);
12109         if (!extent_buffer_uptodate(eb)) {
12110                 free_extent_buffer(eb);
12111                 return -EIO;
12112         }
12113         header_level = btrfs_header_level(eb);
12114         free_extent_buffer(eb);
12115
12116         if (header_level != backref_level)
12117                 return -EIO;
12118         return header_level;
12119
12120 release_out:
12121         btrfs_release_path(&path);
12122         return ret;
12123 }
12124
12125 /*
12126  * Check if a tree block backref is valid (points to a valid tree block)
12127  * if level == -1, level will be resolved
12128  * Return >0 for any error found and print error message
12129  */
12130 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
12131                                     u64 bytenr, int level)
12132 {
12133         struct btrfs_root *root;
12134         struct btrfs_key key;
12135         struct btrfs_path path;
12136         struct extent_buffer *eb;
12137         struct extent_buffer *node;
12138         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12139         int err = 0;
12140         int ret;
12141
12142         /* Query level for level == -1 special case */
12143         if (level == -1)
12144                 level = query_tree_block_level(fs_info, bytenr);
12145         if (level < 0) {
12146                 err |= REFERENCER_MISSING;
12147                 goto out;
12148         }
12149
12150         key.objectid = root_id;
12151         key.type = BTRFS_ROOT_ITEM_KEY;
12152         key.offset = (u64)-1;
12153
12154         root = btrfs_read_fs_root(fs_info, &key);
12155         if (IS_ERR(root)) {
12156                 err |= REFERENCER_MISSING;
12157                 goto out;
12158         }
12159
12160         /* Read out the tree block to get item/node key */
12161         eb = read_tree_block(fs_info, bytenr, 0);
12162         if (!extent_buffer_uptodate(eb)) {
12163                 err |= REFERENCER_MISSING;
12164                 free_extent_buffer(eb);
12165                 goto out;
12166         }
12167
12168         /* Empty tree, no need to check key */
12169         if (!btrfs_header_nritems(eb) && !level) {
12170                 free_extent_buffer(eb);
12171                 goto out;
12172         }
12173
12174         if (level)
12175                 btrfs_node_key_to_cpu(eb, &key, 0);
12176         else
12177                 btrfs_item_key_to_cpu(eb, &key, 0);
12178
12179         free_extent_buffer(eb);
12180
12181         btrfs_init_path(&path);
12182         path.lowest_level = level;
12183         /* Search with the first key, to ensure we can reach it */
12184         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12185         if (ret < 0) {
12186                 err |= REFERENCER_MISSING;
12187                 goto release_out;
12188         }
12189
12190         node = path.nodes[level];
12191         if (btrfs_header_bytenr(node) != bytenr) {
12192                 error(
12193         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
12194                         bytenr, nodesize, bytenr,
12195                         btrfs_header_bytenr(node));
12196                 err |= REFERENCER_MISMATCH;
12197         }
12198         if (btrfs_header_level(node) != level) {
12199                 error(
12200         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
12201                         bytenr, nodesize, level,
12202                         btrfs_header_level(node));
12203                 err |= REFERENCER_MISMATCH;
12204         }
12205
12206 release_out:
12207         btrfs_release_path(&path);
12208 out:
12209         if (err & REFERENCER_MISSING) {
12210                 if (level < 0)
12211                         error("extent [%llu %d] lost referencer (owner: %llu)",
12212                                 bytenr, nodesize, root_id);
12213                 else
12214                         error(
12215                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
12216                                 bytenr, nodesize, root_id, level);
12217         }
12218
12219         return err;
12220 }
12221
12222 /*
12223  * Check if tree block @eb is tree reloc root.
12224  * Return 0 if it's not or any problem happens
12225  * Return 1 if it's a tree reloc root
12226  */
12227 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
12228                                  struct extent_buffer *eb)
12229 {
12230         struct btrfs_root *tree_reloc_root;
12231         struct btrfs_key key;
12232         u64 bytenr = btrfs_header_bytenr(eb);
12233         u64 owner = btrfs_header_owner(eb);
12234         int ret = 0;
12235
12236         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12237         key.offset = owner;
12238         key.type = BTRFS_ROOT_ITEM_KEY;
12239
12240         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
12241         if (IS_ERR(tree_reloc_root))
12242                 return 0;
12243
12244         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
12245                 ret = 1;
12246         btrfs_free_fs_root(tree_reloc_root);
12247         return ret;
12248 }
12249
12250 /*
12251  * Check referencer for shared block backref
12252  * If level == -1, this function will resolve the level.
12253  */
12254 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
12255                                      u64 parent, u64 bytenr, int level)
12256 {
12257         struct extent_buffer *eb;
12258         u32 nr;
12259         int found_parent = 0;
12260         int i;
12261
12262         eb = read_tree_block(fs_info, parent, 0);
12263         if (!extent_buffer_uptodate(eb))
12264                 goto out;
12265
12266         if (level == -1)
12267                 level = query_tree_block_level(fs_info, bytenr);
12268         if (level < 0)
12269                 goto out;
12270
12271         /* It's possible it's a tree reloc root */
12272         if (parent == bytenr) {
12273                 if (is_tree_reloc_root(fs_info, eb))
12274                         found_parent = 1;
12275                 goto out;
12276         }
12277
12278         if (level + 1 != btrfs_header_level(eb))
12279                 goto out;
12280
12281         nr = btrfs_header_nritems(eb);
12282         for (i = 0; i < nr; i++) {
12283                 if (bytenr == btrfs_node_blockptr(eb, i)) {
12284                         found_parent = 1;
12285                         break;
12286                 }
12287         }
12288 out:
12289         free_extent_buffer(eb);
12290         if (!found_parent) {
12291                 error(
12292         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12293                         bytenr, fs_info->nodesize, parent, level);
12294                 return REFERENCER_MISSING;
12295         }
12296         return 0;
12297 }
12298
12299 /*
12300  * Check referencer for normal (inlined) data ref
12301  * If len == 0, it will be resolved by searching in extent tree
12302  */
12303 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12304                                      u64 root_id, u64 objectid, u64 offset,
12305                                      u64 bytenr, u64 len, u32 count)
12306 {
12307         struct btrfs_root *root;
12308         struct btrfs_root *extent_root = fs_info->extent_root;
12309         struct btrfs_key key;
12310         struct btrfs_path path;
12311         struct extent_buffer *leaf;
12312         struct btrfs_file_extent_item *fi;
12313         u32 found_count = 0;
12314         int slot;
12315         int ret = 0;
12316
12317         if (!len) {
12318                 key.objectid = bytenr;
12319                 key.type = BTRFS_EXTENT_ITEM_KEY;
12320                 key.offset = (u64)-1;
12321
12322                 btrfs_init_path(&path);
12323                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12324                 if (ret < 0)
12325                         goto out;
12326                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12327                 if (ret)
12328                         goto out;
12329                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12330                 if (key.objectid != bytenr ||
12331                     key.type != BTRFS_EXTENT_ITEM_KEY)
12332                         goto out;
12333                 len = key.offset;
12334                 btrfs_release_path(&path);
12335         }
12336         key.objectid = root_id;
12337         key.type = BTRFS_ROOT_ITEM_KEY;
12338         key.offset = (u64)-1;
12339         btrfs_init_path(&path);
12340
12341         root = btrfs_read_fs_root(fs_info, &key);
12342         if (IS_ERR(root))
12343                 goto out;
12344
12345         key.objectid = objectid;
12346         key.type = BTRFS_EXTENT_DATA_KEY;
12347         /*
12348          * It can be nasty as data backref offset is
12349          * file offset - file extent offset, which is smaller or
12350          * equal to original backref offset.  The only special case is
12351          * overflow.  So we need to special check and do further search.
12352          */
12353         key.offset = offset & (1ULL << 63) ? 0 : offset;
12354
12355         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12356         if (ret < 0)
12357                 goto out;
12358
12359         /*
12360          * Search afterwards to get correct one
12361          * NOTE: As we must do a comprehensive check on the data backref to
12362          * make sure the dref count also matches, we must iterate all file
12363          * extents for that inode.
12364          */
12365         while (1) {
12366                 leaf = path.nodes[0];
12367                 slot = path.slots[0];
12368
12369                 if (slot >= btrfs_header_nritems(leaf))
12370                         goto next;
12371                 btrfs_item_key_to_cpu(leaf, &key, slot);
12372                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12373                         break;
12374                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12375                 /*
12376                  * Except normal disk bytenr and disk num bytes, we still
12377                  * need to do extra check on dbackref offset as
12378                  * dbackref offset = file_offset - file_extent_offset
12379                  */
12380                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12381                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12382                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12383                     offset)
12384                         found_count++;
12385
12386 next:
12387                 ret = btrfs_next_item(root, &path);
12388                 if (ret)
12389                         break;
12390         }
12391 out:
12392         btrfs_release_path(&path);
12393         if (found_count != count) {
12394                 error(
12395 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12396                         bytenr, len, root_id, objectid, offset, count, found_count);
12397                 return REFERENCER_MISSING;
12398         }
12399         return 0;
12400 }
12401
12402 /*
12403  * Check if the referencer of a shared data backref exists
12404  */
12405 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12406                                      u64 parent, u64 bytenr)
12407 {
12408         struct extent_buffer *eb;
12409         struct btrfs_key key;
12410         struct btrfs_file_extent_item *fi;
12411         u32 nr;
12412         int found_parent = 0;
12413         int i;
12414
12415         eb = read_tree_block(fs_info, parent, 0);
12416         if (!extent_buffer_uptodate(eb))
12417                 goto out;
12418
12419         nr = btrfs_header_nritems(eb);
12420         for (i = 0; i < nr; i++) {
12421                 btrfs_item_key_to_cpu(eb, &key, i);
12422                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12423                         continue;
12424
12425                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12426                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12427                         continue;
12428
12429                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12430                         found_parent = 1;
12431                         break;
12432                 }
12433         }
12434
12435 out:
12436         free_extent_buffer(eb);
12437         if (!found_parent) {
12438                 error("shared extent %llu referencer lost (parent: %llu)",
12439                         bytenr, parent);
12440                 return REFERENCER_MISSING;
12441         }
12442         return 0;
12443 }
12444
12445 /*
12446  * Only delete backref if REFERENCER_MISSING now
12447  *
12448  * Returns <0   the extent was deleted
12449  * Returns >0   the backref was deleted but extent still exists, returned value
12450  *               means error after repair
12451  * Returns  0   nothing happened
12452  */
12453 static int repair_extent_item(struct btrfs_trans_handle *trans,
12454                       struct btrfs_root *root, struct btrfs_path *path,
12455                       u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12456                       u64 owner, u64 offset, int err)
12457 {
12458         struct btrfs_key old_key;
12459         int freed = 0;
12460         int ret;
12461
12462         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12463
12464         if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12465                 /* delete the backref */
12466                 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12467                           num_bytes, parent, root_objectid, owner, offset);
12468                 if (!ret) {
12469                         freed = 1;
12470                         err &= ~REFERENCER_MISSING;
12471                         printf("Delete backref in extent [%llu %llu]\n",
12472                                bytenr, num_bytes);
12473                 } else {
12474                         error("fail to delete backref in extent [%llu %llu]",
12475                                bytenr, num_bytes);
12476                 }
12477         }
12478
12479         /* btrfs_free_extent may delete the extent */
12480         btrfs_release_path(path);
12481         ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12482
12483         if (ret)
12484                 ret = -ENOENT;
12485         else if (freed)
12486                 ret = err;
12487         return ret;
12488 }
12489
12490 /*
12491  * This function will check a given extent item, including its backref and
12492  * itself (like crossing stripe boundary and type)
12493  *
12494  * Since we don't use extent_record anymore, introduce new error bit
12495  */
12496 static int check_extent_item(struct btrfs_trans_handle *trans,
12497                              struct btrfs_fs_info *fs_info,
12498                              struct btrfs_path *path)
12499 {
12500         struct btrfs_extent_item *ei;
12501         struct btrfs_extent_inline_ref *iref;
12502         struct btrfs_extent_data_ref *dref;
12503         struct extent_buffer *eb = path->nodes[0];
12504         unsigned long end;
12505         unsigned long ptr;
12506         int slot = path->slots[0];
12507         int type;
12508         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12509         u32 item_size = btrfs_item_size_nr(eb, slot);
12510         u64 flags;
12511         u64 offset;
12512         u64 parent;
12513         u64 num_bytes;
12514         u64 root_objectid;
12515         u64 owner;
12516         u64 owner_offset;
12517         int metadata = 0;
12518         int level;
12519         struct btrfs_key key;
12520         int ret;
12521         int err = 0;
12522
12523         btrfs_item_key_to_cpu(eb, &key, slot);
12524         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12525                 bytes_used += key.offset;
12526                 num_bytes = key.offset;
12527         } else {
12528                 bytes_used += nodesize;
12529                 num_bytes = nodesize;
12530         }
12531
12532         if (item_size < sizeof(*ei)) {
12533                 /*
12534                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12535                  * old thing when on disk format is still un-determined.
12536                  * No need to care about it anymore
12537                  */
12538                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12539                 return -ENOTTY;
12540         }
12541
12542         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12543         flags = btrfs_extent_flags(eb, ei);
12544
12545         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12546                 metadata = 1;
12547         if (metadata && check_crossing_stripes(global_info, key.objectid,
12548                                                eb->len)) {
12549                 error("bad metadata [%llu, %llu) crossing stripe boundary",
12550                       key.objectid, key.objectid + nodesize);
12551                 err |= CROSSING_STRIPE_BOUNDARY;
12552         }
12553
12554         ptr = (unsigned long)(ei + 1);
12555
12556         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12557                 /* Old EXTENT_ITEM metadata */
12558                 struct btrfs_tree_block_info *info;
12559
12560                 info = (struct btrfs_tree_block_info *)ptr;
12561                 level = btrfs_tree_block_level(eb, info);
12562                 ptr += sizeof(struct btrfs_tree_block_info);
12563         } else {
12564                 /* New METADATA_ITEM */
12565                 level = key.offset;
12566         }
12567         end = (unsigned long)ei + item_size;
12568
12569 next:
12570         /* Reached extent item end normally */
12571         if (ptr == end)
12572                 goto out;
12573
12574         /* Beyond extent item end, wrong item size */
12575         if (ptr > end) {
12576                 err |= ITEM_SIZE_MISMATCH;
12577                 error("extent item at bytenr %llu slot %d has wrong size",
12578                         eb->start, slot);
12579                 goto out;
12580         }
12581
12582         parent = 0;
12583         root_objectid = 0;
12584         owner = 0;
12585         owner_offset = 0;
12586         /* Now check every backref in this extent item */
12587         iref = (struct btrfs_extent_inline_ref *)ptr;
12588         type = btrfs_extent_inline_ref_type(eb, iref);
12589         offset = btrfs_extent_inline_ref_offset(eb, iref);
12590         switch (type) {
12591         case BTRFS_TREE_BLOCK_REF_KEY:
12592                 root_objectid = offset;
12593                 owner = level;
12594                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12595                                                level);
12596                 err |= ret;
12597                 break;
12598         case BTRFS_SHARED_BLOCK_REF_KEY:
12599                 parent = offset;
12600                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12601                                                  level);
12602                 err |= ret;
12603                 break;
12604         case BTRFS_EXTENT_DATA_REF_KEY:
12605                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12606                 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12607                 owner = btrfs_extent_data_ref_objectid(eb, dref);
12608                 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12609                 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12610                                         owner_offset, key.objectid, key.offset,
12611                                         btrfs_extent_data_ref_count(eb, dref));
12612                 err |= ret;
12613                 break;
12614         case BTRFS_SHARED_DATA_REF_KEY:
12615                 parent = offset;
12616                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12617                 err |= ret;
12618                 break;
12619         default:
12620                 error("extent[%llu %d %llu] has unknown ref type: %d",
12621                         key.objectid, key.type, key.offset, type);
12622                 ret = UNKNOWN_TYPE;
12623                 err |= ret;
12624                 goto out;
12625         }
12626
12627         if (err && repair) {
12628                 ret = repair_extent_item(trans, fs_info->extent_root, path,
12629                          key.objectid, num_bytes, parent, root_objectid,
12630                          owner, owner_offset, ret);
12631                 if (ret < 0)
12632                         goto out;
12633                 if (ret) {
12634                         goto next;
12635                         err = ret;
12636                 }
12637         }
12638
12639         ptr += btrfs_extent_inline_ref_size(type);
12640         goto next;
12641
12642 out:
12643         return err;
12644 }
12645
12646 /*
12647  * Check if a dev extent item is referred correctly by its chunk
12648  */
12649 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12650                                  struct extent_buffer *eb, int slot)
12651 {
12652         struct btrfs_root *chunk_root = fs_info->chunk_root;
12653         struct btrfs_dev_extent *ptr;
12654         struct btrfs_path path;
12655         struct btrfs_key chunk_key;
12656         struct btrfs_key devext_key;
12657         struct btrfs_chunk *chunk;
12658         struct extent_buffer *l;
12659         int num_stripes;
12660         u64 length;
12661         int i;
12662         int found_chunk = 0;
12663         int ret;
12664
12665         btrfs_item_key_to_cpu(eb, &devext_key, slot);
12666         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12667         length = btrfs_dev_extent_length(eb, ptr);
12668
12669         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12670         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12671         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12672
12673         btrfs_init_path(&path);
12674         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12675         if (ret)
12676                 goto out;
12677
12678         l = path.nodes[0];
12679         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12680         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12681                                       chunk_key.offset);
12682         if (ret < 0)
12683                 goto out;
12684
12685         if (btrfs_stripe_length(fs_info, l, chunk) != length)
12686                 goto out;
12687
12688         num_stripes = btrfs_chunk_num_stripes(l, chunk);
12689         for (i = 0; i < num_stripes; i++) {
12690                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12691                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12692
12693                 if (devid == devext_key.objectid &&
12694                     offset == devext_key.offset) {
12695                         found_chunk = 1;
12696                         break;
12697                 }
12698         }
12699 out:
12700         btrfs_release_path(&path);
12701         if (!found_chunk) {
12702                 error(
12703                 "device extent[%llu, %llu, %llu] did not find the related chunk",
12704                         devext_key.objectid, devext_key.offset, length);
12705                 return REFERENCER_MISSING;
12706         }
12707         return 0;
12708 }
12709
12710 /*
12711  * Check if the used space is correct with the dev item
12712  */
12713 static int check_dev_item(struct btrfs_fs_info *fs_info,
12714                           struct extent_buffer *eb, int slot)
12715 {
12716         struct btrfs_root *dev_root = fs_info->dev_root;
12717         struct btrfs_dev_item *dev_item;
12718         struct btrfs_path path;
12719         struct btrfs_key key;
12720         struct btrfs_dev_extent *ptr;
12721         u64 dev_id;
12722         u64 used;
12723         u64 total = 0;
12724         int ret;
12725
12726         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12727         dev_id = btrfs_device_id(eb, dev_item);
12728         used = btrfs_device_bytes_used(eb, dev_item);
12729
12730         key.objectid = dev_id;
12731         key.type = BTRFS_DEV_EXTENT_KEY;
12732         key.offset = 0;
12733
12734         btrfs_init_path(&path);
12735         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12736         if (ret < 0) {
12737                 btrfs_item_key_to_cpu(eb, &key, slot);
12738                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12739                         key.objectid, key.type, key.offset);
12740                 btrfs_release_path(&path);
12741                 return REFERENCER_MISSING;
12742         }
12743
12744         /* Iterate dev_extents to calculate the used space of a device */
12745         while (1) {
12746                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12747                         goto next;
12748
12749                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12750                 if (key.objectid > dev_id)
12751                         break;
12752                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12753                         goto next;
12754
12755                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12756                                      struct btrfs_dev_extent);
12757                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12758 next:
12759                 ret = btrfs_next_item(dev_root, &path);
12760                 if (ret)
12761                         break;
12762         }
12763         btrfs_release_path(&path);
12764
12765         if (used != total) {
12766                 btrfs_item_key_to_cpu(eb, &key, slot);
12767                 error(
12768 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12769                         total, used, BTRFS_ROOT_TREE_OBJECTID,
12770                         BTRFS_DEV_EXTENT_KEY, dev_id);
12771                 return ACCOUNTING_MISMATCH;
12772         }
12773         return 0;
12774 }
12775
12776 /*
12777  * Check a block group item with its referener (chunk) and its used space
12778  * with extent/metadata item
12779  */
12780 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12781                                   struct extent_buffer *eb, int slot)
12782 {
12783         struct btrfs_root *extent_root = fs_info->extent_root;
12784         struct btrfs_root *chunk_root = fs_info->chunk_root;
12785         struct btrfs_block_group_item *bi;
12786         struct btrfs_block_group_item bg_item;
12787         struct btrfs_path path;
12788         struct btrfs_key bg_key;
12789         struct btrfs_key chunk_key;
12790         struct btrfs_key extent_key;
12791         struct btrfs_chunk *chunk;
12792         struct extent_buffer *leaf;
12793         struct btrfs_extent_item *ei;
12794         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12795         u64 flags;
12796         u64 bg_flags;
12797         u64 used;
12798         u64 total = 0;
12799         int ret;
12800         int err = 0;
12801
12802         btrfs_item_key_to_cpu(eb, &bg_key, slot);
12803         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12804         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12805         used = btrfs_block_group_used(&bg_item);
12806         bg_flags = btrfs_block_group_flags(&bg_item);
12807
12808         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12809         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12810         chunk_key.offset = bg_key.objectid;
12811
12812         btrfs_init_path(&path);
12813         /* Search for the referencer chunk */
12814         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12815         if (ret) {
12816                 error(
12817                 "block group[%llu %llu] did not find the related chunk item",
12818                         bg_key.objectid, bg_key.offset);
12819                 err |= REFERENCER_MISSING;
12820         } else {
12821                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12822                                         struct btrfs_chunk);
12823                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12824                                                 bg_key.offset) {
12825                         error(
12826         "block group[%llu %llu] related chunk item length does not match",
12827                                 bg_key.objectid, bg_key.offset);
12828                         err |= REFERENCER_MISMATCH;
12829                 }
12830         }
12831         btrfs_release_path(&path);
12832
12833         /* Search from the block group bytenr */
12834         extent_key.objectid = bg_key.objectid;
12835         extent_key.type = 0;
12836         extent_key.offset = 0;
12837
12838         btrfs_init_path(&path);
12839         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12840         if (ret < 0)
12841                 goto out;
12842
12843         /* Iterate extent tree to account used space */
12844         while (1) {
12845                 leaf = path.nodes[0];
12846
12847                 /* Search slot can point to the last item beyond leaf nritems */
12848                 if (path.slots[0] >= btrfs_header_nritems(leaf))
12849                         goto next;
12850
12851                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12852                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12853                         break;
12854
12855                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12856                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12857                         goto next;
12858                 if (extent_key.objectid < bg_key.objectid)
12859                         goto next;
12860
12861                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12862                         total += nodesize;
12863                 else
12864                         total += extent_key.offset;
12865
12866                 ei = btrfs_item_ptr(leaf, path.slots[0],
12867                                     struct btrfs_extent_item);
12868                 flags = btrfs_extent_flags(leaf, ei);
12869                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12870                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12871                                 error(
12872                         "bad extent[%llu, %llu) type mismatch with chunk",
12873                                         extent_key.objectid,
12874                                         extent_key.objectid + extent_key.offset);
12875                                 err |= CHUNK_TYPE_MISMATCH;
12876                         }
12877                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12878                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12879                                     BTRFS_BLOCK_GROUP_METADATA))) {
12880                                 error(
12881                         "bad extent[%llu, %llu) type mismatch with chunk",
12882                                         extent_key.objectid,
12883                                         extent_key.objectid + nodesize);
12884                                 err |= CHUNK_TYPE_MISMATCH;
12885                         }
12886                 }
12887 next:
12888                 ret = btrfs_next_item(extent_root, &path);
12889                 if (ret)
12890                         break;
12891         }
12892
12893 out:
12894         btrfs_release_path(&path);
12895
12896         if (total != used) {
12897                 error(
12898                 "block group[%llu %llu] used %llu but extent items used %llu",
12899                         bg_key.objectid, bg_key.offset, used, total);
12900                 err |= ACCOUNTING_MISMATCH;
12901         }
12902         return err;
12903 }
12904
12905 /*
12906  * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12907  * FIXME: We still need to repair error of dev_item.
12908  *
12909  * Returns error after repair.
12910  */
12911 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12912                              struct btrfs_root *chunk_root,
12913                              struct btrfs_path *path, int err)
12914 {
12915         struct btrfs_chunk *chunk;
12916         struct btrfs_key chunk_key;
12917         struct extent_buffer *eb = path->nodes[0];
12918         u64 length;
12919         int slot = path->slots[0];
12920         u64 type;
12921         int ret = 0;
12922
12923         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12924         if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12925                 return err;
12926         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12927         type = btrfs_chunk_type(path->nodes[0], chunk);
12928         length = btrfs_chunk_length(eb, chunk);
12929
12930         if (err & REFERENCER_MISSING) {
12931                 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12932                      type, chunk_key.objectid, chunk_key.offset, length);
12933                 if (ret) {
12934                         error("fail to add block group item[%llu %llu]",
12935                               chunk_key.offset, length);
12936                         goto out;
12937                 } else {
12938                         err &= ~REFERENCER_MISSING;
12939                         printf("Added block group item[%llu %llu]\n",
12940                                chunk_key.offset, length);
12941                 }
12942         }
12943
12944 out:
12945         return err;
12946 }
12947
12948 /*
12949  * Check a chunk item.
12950  * Including checking all referred dev_extents and block group
12951  */
12952 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12953                             struct extent_buffer *eb, int slot)
12954 {
12955         struct btrfs_root *extent_root = fs_info->extent_root;
12956         struct btrfs_root *dev_root = fs_info->dev_root;
12957         struct btrfs_path path;
12958         struct btrfs_key chunk_key;
12959         struct btrfs_key bg_key;
12960         struct btrfs_key devext_key;
12961         struct btrfs_chunk *chunk;
12962         struct extent_buffer *leaf;
12963         struct btrfs_block_group_item *bi;
12964         struct btrfs_block_group_item bg_item;
12965         struct btrfs_dev_extent *ptr;
12966         u64 length;
12967         u64 chunk_end;
12968         u64 stripe_len;
12969         u64 type;
12970         int num_stripes;
12971         u64 offset;
12972         u64 objectid;
12973         int i;
12974         int ret;
12975         int err = 0;
12976
12977         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12978         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12979         length = btrfs_chunk_length(eb, chunk);
12980         chunk_end = chunk_key.offset + length;
12981         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12982                                       chunk_key.offset);
12983         if (ret < 0) {
12984                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12985                         chunk_end);
12986                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12987                 goto out;
12988         }
12989         type = btrfs_chunk_type(eb, chunk);
12990
12991         bg_key.objectid = chunk_key.offset;
12992         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12993         bg_key.offset = length;
12994
12995         btrfs_init_path(&path);
12996         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12997         if (ret) {
12998                 error(
12999                 "chunk[%llu %llu) did not find the related block group item",
13000                         chunk_key.offset, chunk_end);
13001                 err |= REFERENCER_MISSING;
13002         } else{
13003                 leaf = path.nodes[0];
13004                 bi = btrfs_item_ptr(leaf, path.slots[0],
13005                                     struct btrfs_block_group_item);
13006                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
13007                                    sizeof(bg_item));
13008                 if (btrfs_block_group_flags(&bg_item) != type) {
13009                         error(
13010 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
13011                                 chunk_key.offset, chunk_end, type,
13012                                 btrfs_block_group_flags(&bg_item));
13013                         err |= REFERENCER_MISSING;
13014                 }
13015         }
13016
13017         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
13018         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
13019         for (i = 0; i < num_stripes; i++) {
13020                 btrfs_release_path(&path);
13021                 btrfs_init_path(&path);
13022                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
13023                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
13024                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
13025
13026                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
13027                                         0, 0);
13028                 if (ret)
13029                         goto not_match_dev;
13030
13031                 leaf = path.nodes[0];
13032                 ptr = btrfs_item_ptr(leaf, path.slots[0],
13033                                      struct btrfs_dev_extent);
13034                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
13035                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
13036                 if (objectid != chunk_key.objectid ||
13037                     offset != chunk_key.offset ||
13038                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
13039                         goto not_match_dev;
13040                 continue;
13041 not_match_dev:
13042                 err |= BACKREF_MISSING;
13043                 error(
13044                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
13045                         chunk_key.objectid, chunk_end, i);
13046                 continue;
13047         }
13048         btrfs_release_path(&path);
13049 out:
13050         return err;
13051 }
13052
13053 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
13054                                    struct btrfs_root *root,
13055                                    struct btrfs_path *path)
13056 {
13057         struct btrfs_key key;
13058         int ret = 0;
13059
13060         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
13061         btrfs_release_path(path);
13062         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
13063         if (ret) {
13064                 ret = -ENOENT;
13065                 goto out;
13066         }
13067
13068         ret = btrfs_del_item(trans, root, path);
13069         if (ret)
13070                 goto out;
13071
13072         if (path->slots[0] == 0)
13073                 btrfs_prev_leaf(root, path);
13074         else
13075                 path->slots[0]--;
13076 out:
13077         if (ret)
13078                 error("failed to delete root %llu item[%llu, %u, %llu]",
13079                       root->objectid, key.objectid, key.type, key.offset);
13080         else
13081                 printf("Deleted root %llu item[%llu, %u, %llu]\n",
13082                        root->objectid, key.objectid, key.type, key.offset);
13083         return ret;
13084 }
13085
13086 /*
13087  * Main entry function to check known items and update related accounting info
13088  */
13089 static int check_leaf_items(struct btrfs_trans_handle *trans,
13090                             struct btrfs_root *root, struct btrfs_path *path,
13091                             struct node_refs *nrefs, int account_bytes)
13092 {
13093         struct btrfs_fs_info *fs_info = root->fs_info;
13094         struct btrfs_key key;
13095         struct extent_buffer *eb;
13096         int slot;
13097         int type;
13098         struct btrfs_extent_data_ref *dref;
13099         int ret = 0;
13100         int err = 0;
13101
13102 again:
13103         eb = path->nodes[0];
13104         slot = path->slots[0];
13105         if (slot >= btrfs_header_nritems(eb)) {
13106                 if (slot == 0) {
13107                         error("empty leaf [%llu %u] root %llu", eb->start,
13108                                 root->fs_info->nodesize, root->objectid);
13109                         err |= EIO;
13110                 }
13111                 goto out;
13112         }
13113
13114         btrfs_item_key_to_cpu(eb, &key, slot);
13115         type = key.type;
13116
13117         switch (type) {
13118         case BTRFS_EXTENT_DATA_KEY:
13119                 ret = check_extent_data_item(root, path, nrefs, account_bytes);
13120                 if (repair && ret)
13121                         ret = repair_extent_data_item(trans, root, path, nrefs,
13122                                                       ret);
13123                 err |= ret;
13124                 break;
13125         case BTRFS_BLOCK_GROUP_ITEM_KEY:
13126                 ret = check_block_group_item(fs_info, eb, slot);
13127                 if (repair &&
13128                     ret & REFERENCER_MISSING)
13129                         ret = delete_extent_tree_item(trans, root, path);
13130                 err |= ret;
13131                 break;
13132         case BTRFS_DEV_ITEM_KEY:
13133                 ret = check_dev_item(fs_info, eb, slot);
13134                 err |= ret;
13135                 break;
13136         case BTRFS_CHUNK_ITEM_KEY:
13137                 ret = check_chunk_item(fs_info, eb, slot);
13138                 if (repair && ret)
13139                         ret = repair_chunk_item(trans, root, path, ret);
13140                 err |= ret;
13141                 break;
13142         case BTRFS_DEV_EXTENT_KEY:
13143                 ret = check_dev_extent_item(fs_info, eb, slot);
13144                 err |= ret;
13145                 break;
13146         case BTRFS_EXTENT_ITEM_KEY:
13147         case BTRFS_METADATA_ITEM_KEY:
13148                 ret = check_extent_item(trans, fs_info, path);
13149                 err |= ret;
13150                 break;
13151         case BTRFS_EXTENT_CSUM_KEY:
13152                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
13153                 err |= ret;
13154                 break;
13155         case BTRFS_TREE_BLOCK_REF_KEY:
13156                 ret = check_tree_block_backref(fs_info, key.offset,
13157                                                key.objectid, -1);
13158                 if (repair &&
13159                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13160                         ret = delete_extent_tree_item(trans, root, path);
13161                 err |= ret;
13162                 break;
13163         case BTRFS_EXTENT_DATA_REF_KEY:
13164                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
13165                 ret = check_extent_data_backref(fs_info,
13166                                 btrfs_extent_data_ref_root(eb, dref),
13167                                 btrfs_extent_data_ref_objectid(eb, dref),
13168                                 btrfs_extent_data_ref_offset(eb, dref),
13169                                 key.objectid, 0,
13170                                 btrfs_extent_data_ref_count(eb, dref));
13171                 if (repair &&
13172                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13173                         ret = delete_extent_tree_item(trans, root, path);
13174                 err |= ret;
13175                 break;
13176         case BTRFS_SHARED_BLOCK_REF_KEY:
13177                 ret = check_shared_block_backref(fs_info, key.offset,
13178                                                  key.objectid, -1);
13179                 if (repair &&
13180                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13181                         ret = delete_extent_tree_item(trans, root, path);
13182                 err |= ret;
13183                 break;
13184         case BTRFS_SHARED_DATA_REF_KEY:
13185                 ret = check_shared_data_backref(fs_info, key.offset,
13186                                                 key.objectid);
13187                 if (repair &&
13188                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13189                         ret = delete_extent_tree_item(trans, root, path);
13190                 err |= ret;
13191                 break;
13192         default:
13193                 break;
13194         }
13195
13196         ++path->slots[0];
13197         goto again;
13198 out:
13199         return err;
13200 }
13201
13202 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info);
13203
13204 /*
13205  * Low memory usage version check_chunks_and_extents.
13206  */
13207 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
13208 {
13209         struct btrfs_trans_handle *trans = NULL;
13210         struct btrfs_path path;
13211         struct btrfs_key old_key;
13212         struct btrfs_key key;
13213         struct btrfs_root *root1;
13214         struct btrfs_root *root;
13215         struct btrfs_root *cur_root;
13216         int err = 0;
13217         int ret;
13218
13219         root = fs_info->fs_root;
13220
13221         if (repair) {
13222                 /* pin every tree block to avoid extent overwrite */
13223                 ret = pin_metadata_blocks(fs_info);
13224                 if (ret) {
13225                         error("failed to pin metadata blocks");
13226                         return ret;
13227                 }
13228                 trans = btrfs_start_transaction(fs_info->extent_root, 1);
13229                 if (IS_ERR(trans)) {
13230                         error("failed to start transaction before check");
13231                         return PTR_ERR(trans);
13232                 }
13233         }
13234
13235         root1 = root->fs_info->chunk_root;
13236         ret = check_btrfs_root(trans, root1, 0, 1);
13237         err |= ret;
13238
13239         root1 = root->fs_info->tree_root;
13240         ret = check_btrfs_root(trans, root1, 0, 1);
13241         err |= ret;
13242
13243         btrfs_init_path(&path);
13244         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
13245         key.offset = 0;
13246         key.type = BTRFS_ROOT_ITEM_KEY;
13247
13248         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
13249         if (ret) {
13250                 error("cannot find extent tree in tree_root");
13251                 goto out;
13252         }
13253
13254         while (1) {
13255                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13256                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13257                         goto next;
13258                 old_key = key;
13259                 key.offset = (u64)-1;
13260
13261                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13262                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13263                                         &key);
13264                 else
13265                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
13266                 if (IS_ERR(cur_root) || !cur_root) {
13267                         error("failed to read tree: %lld", key.objectid);
13268                         goto next;
13269                 }
13270
13271                 ret = check_btrfs_root(trans, cur_root, 0, 1);
13272                 err |= ret;
13273
13274                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13275                         btrfs_free_fs_root(cur_root);
13276
13277                 btrfs_release_path(&path);
13278                 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13279                                         &old_key, &path, 0, 0);
13280                 if (ret)
13281                         goto out;
13282 next:
13283                 ret = btrfs_next_item(root1, &path);
13284                 if (ret)
13285                         goto out;
13286         }
13287 out:
13288
13289         /* if repair, update block accounting */
13290         if (repair) {
13291                 ret = btrfs_fix_block_accounting(trans, root);
13292                 if (ret)
13293                         err |= ret;
13294         }
13295
13296         if (trans)
13297                 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13298
13299         btrfs_release_path(&path);
13300
13301         return err;
13302 }
13303
13304 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13305 {
13306         int ret;
13307
13308         if (!ctx.progress_enabled)
13309                 fprintf(stderr, "checking extents\n");
13310         if (check_mode == CHECK_MODE_LOWMEM)
13311                 ret = check_chunks_and_extents_v2(fs_info);
13312         else
13313                 ret = check_chunks_and_extents(fs_info);
13314
13315         return ret;
13316 }
13317
13318 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13319                            struct btrfs_root *root, int overwrite)
13320 {
13321         struct extent_buffer *c;
13322         struct extent_buffer *old = root->node;
13323         int level;
13324         int ret;
13325         struct btrfs_disk_key disk_key = {0,0,0};
13326
13327         level = 0;
13328
13329         if (overwrite) {
13330                 c = old;
13331                 extent_buffer_get(c);
13332                 goto init;
13333         }
13334         c = btrfs_alloc_free_block(trans, root,
13335                                    root->fs_info->nodesize,
13336                                    root->root_key.objectid,
13337                                    &disk_key, level, 0, 0);
13338         if (IS_ERR(c)) {
13339                 c = old;
13340                 extent_buffer_get(c);
13341                 overwrite = 1;
13342         }
13343 init:
13344         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13345         btrfs_set_header_level(c, level);
13346         btrfs_set_header_bytenr(c, c->start);
13347         btrfs_set_header_generation(c, trans->transid);
13348         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13349         btrfs_set_header_owner(c, root->root_key.objectid);
13350
13351         write_extent_buffer(c, root->fs_info->fsid,
13352                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
13353
13354         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13355                             btrfs_header_chunk_tree_uuid(c),
13356                             BTRFS_UUID_SIZE);
13357
13358         btrfs_mark_buffer_dirty(c);
13359         /*
13360          * this case can happen in the following case:
13361          *
13362          * 1.overwrite previous root.
13363          *
13364          * 2.reinit reloc data root, this is because we skip pin
13365          * down reloc data tree before which means we can allocate
13366          * same block bytenr here.
13367          */
13368         if (old->start == c->start) {
13369                 btrfs_set_root_generation(&root->root_item,
13370                                           trans->transid);
13371                 root->root_item.level = btrfs_header_level(root->node);
13372                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13373                                         &root->root_key, &root->root_item);
13374                 if (ret) {
13375                         free_extent_buffer(c);
13376                         return ret;
13377                 }
13378         }
13379         free_extent_buffer(old);
13380         root->node = c;
13381         add_root_to_dirty_list(root);
13382         return 0;
13383 }
13384
13385 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13386                                 struct extent_buffer *eb, int tree_root)
13387 {
13388         struct extent_buffer *tmp;
13389         struct btrfs_root_item *ri;
13390         struct btrfs_key key;
13391         u64 bytenr;
13392         int level = btrfs_header_level(eb);
13393         int nritems;
13394         int ret;
13395         int i;
13396
13397         /*
13398          * If we have pinned this block before, don't pin it again.
13399          * This can not only avoid forever loop with broken filesystem
13400          * but also give us some speedups.
13401          */
13402         if (test_range_bit(&fs_info->pinned_extents, eb->start,
13403                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13404                 return 0;
13405
13406         btrfs_pin_extent(fs_info, eb->start, eb->len);
13407
13408         nritems = btrfs_header_nritems(eb);
13409         for (i = 0; i < nritems; i++) {
13410                 if (level == 0) {
13411                         btrfs_item_key_to_cpu(eb, &key, i);
13412                         if (key.type != BTRFS_ROOT_ITEM_KEY)
13413                                 continue;
13414                         /* Skip the extent root and reloc roots */
13415                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13416                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13417                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13418                                 continue;
13419                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13420                         bytenr = btrfs_disk_root_bytenr(eb, ri);
13421
13422                         /*
13423                          * If at any point we start needing the real root we
13424                          * will have to build a stump root for the root we are
13425                          * in, but for now this doesn't actually use the root so
13426                          * just pass in extent_root.
13427                          */
13428                         tmp = read_tree_block(fs_info, bytenr, 0);
13429                         if (!extent_buffer_uptodate(tmp)) {
13430                                 fprintf(stderr, "Error reading root block\n");
13431                                 return -EIO;
13432                         }
13433                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
13434                         free_extent_buffer(tmp);
13435                         if (ret)
13436                                 return ret;
13437                 } else {
13438                         bytenr = btrfs_node_blockptr(eb, i);
13439
13440                         /* If we aren't the tree root don't read the block */
13441                         if (level == 1 && !tree_root) {
13442                                 btrfs_pin_extent(fs_info, bytenr,
13443                                                 fs_info->nodesize);
13444                                 continue;
13445                         }
13446
13447                         tmp = read_tree_block(fs_info, bytenr, 0);
13448                         if (!extent_buffer_uptodate(tmp)) {
13449                                 fprintf(stderr, "Error reading tree block\n");
13450                                 return -EIO;
13451                         }
13452                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13453                         free_extent_buffer(tmp);
13454                         if (ret)
13455                                 return ret;
13456                 }
13457         }
13458
13459         return 0;
13460 }
13461
13462 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13463 {
13464         int ret;
13465
13466         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13467         if (ret)
13468                 return ret;
13469
13470         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13471 }
13472
13473 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13474 {
13475         struct btrfs_block_group_cache *cache;
13476         struct btrfs_path path;
13477         struct extent_buffer *leaf;
13478         struct btrfs_chunk *chunk;
13479         struct btrfs_key key;
13480         int ret;
13481         u64 start;
13482
13483         btrfs_init_path(&path);
13484         key.objectid = 0;
13485         key.type = BTRFS_CHUNK_ITEM_KEY;
13486         key.offset = 0;
13487         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13488         if (ret < 0) {
13489                 btrfs_release_path(&path);
13490                 return ret;
13491         }
13492
13493         /*
13494          * We do this in case the block groups were screwed up and had alloc
13495          * bits that aren't actually set on the chunks.  This happens with
13496          * restored images every time and could happen in real life I guess.
13497          */
13498         fs_info->avail_data_alloc_bits = 0;
13499         fs_info->avail_metadata_alloc_bits = 0;
13500         fs_info->avail_system_alloc_bits = 0;
13501
13502         /* First we need to create the in-memory block groups */
13503         while (1) {
13504                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13505                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13506                         if (ret < 0) {
13507                                 btrfs_release_path(&path);
13508                                 return ret;
13509                         }
13510                         if (ret) {
13511                                 ret = 0;
13512                                 break;
13513                         }
13514                 }
13515                 leaf = path.nodes[0];
13516                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13517                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13518                         path.slots[0]++;
13519                         continue;
13520                 }
13521
13522                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13523                 btrfs_add_block_group(fs_info, 0,
13524                                       btrfs_chunk_type(leaf, chunk),
13525                                       key.objectid, key.offset,
13526                                       btrfs_chunk_length(leaf, chunk));
13527                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13528                                  key.offset + btrfs_chunk_length(leaf, chunk));
13529                 path.slots[0]++;
13530         }
13531         start = 0;
13532         while (1) {
13533                 cache = btrfs_lookup_first_block_group(fs_info, start);
13534                 if (!cache)
13535                         break;
13536                 cache->cached = 1;
13537                 start = cache->key.objectid + cache->key.offset;
13538         }
13539
13540         btrfs_release_path(&path);
13541         return 0;
13542 }
13543
13544 static int reset_balance(struct btrfs_trans_handle *trans,
13545                          struct btrfs_fs_info *fs_info)
13546 {
13547         struct btrfs_root *root = fs_info->tree_root;
13548         struct btrfs_path path;
13549         struct extent_buffer *leaf;
13550         struct btrfs_key key;
13551         int del_slot, del_nr = 0;
13552         int ret;
13553         int found = 0;
13554
13555         btrfs_init_path(&path);
13556         key.objectid = BTRFS_BALANCE_OBJECTID;
13557         key.type = BTRFS_BALANCE_ITEM_KEY;
13558         key.offset = 0;
13559         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13560         if (ret) {
13561                 if (ret > 0)
13562                         ret = 0;
13563                 if (!ret)
13564                         goto reinit_data_reloc;
13565                 else
13566                         goto out;
13567         }
13568
13569         ret = btrfs_del_item(trans, root, &path);
13570         if (ret)
13571                 goto out;
13572         btrfs_release_path(&path);
13573
13574         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13575         key.type = BTRFS_ROOT_ITEM_KEY;
13576         key.offset = 0;
13577         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13578         if (ret < 0)
13579                 goto out;
13580         while (1) {
13581                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13582                         if (!found)
13583                                 break;
13584
13585                         if (del_nr) {
13586                                 ret = btrfs_del_items(trans, root, &path,
13587                                                       del_slot, del_nr);
13588                                 del_nr = 0;
13589                                 if (ret)
13590                                         goto out;
13591                         }
13592                         key.offset++;
13593                         btrfs_release_path(&path);
13594
13595                         found = 0;
13596                         ret = btrfs_search_slot(trans, root, &key, &path,
13597                                                 -1, 1);
13598                         if (ret < 0)
13599                                 goto out;
13600                         continue;
13601                 }
13602                 found = 1;
13603                 leaf = path.nodes[0];
13604                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13605                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13606                         break;
13607                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13608                         path.slots[0]++;
13609                         continue;
13610                 }
13611                 if (!del_nr) {
13612                         del_slot = path.slots[0];
13613                         del_nr = 1;
13614                 } else {
13615                         del_nr++;
13616                 }
13617                 path.slots[0]++;
13618         }
13619
13620         if (del_nr) {
13621                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13622                 if (ret)
13623                         goto out;
13624         }
13625         btrfs_release_path(&path);
13626
13627 reinit_data_reloc:
13628         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13629         key.type = BTRFS_ROOT_ITEM_KEY;
13630         key.offset = (u64)-1;
13631         root = btrfs_read_fs_root(fs_info, &key);
13632         if (IS_ERR(root)) {
13633                 fprintf(stderr, "Error reading data reloc tree\n");
13634                 ret = PTR_ERR(root);
13635                 goto out;
13636         }
13637         record_root_in_trans(trans, root);
13638         ret = btrfs_fsck_reinit_root(trans, root, 0);
13639         if (ret)
13640                 goto out;
13641         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13642 out:
13643         btrfs_release_path(&path);
13644         return ret;
13645 }
13646
13647 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13648                               struct btrfs_fs_info *fs_info)
13649 {
13650         u64 start = 0;
13651         int ret;
13652
13653         /*
13654          * The only reason we don't do this is because right now we're just
13655          * walking the trees we find and pinning down their bytes, we don't look
13656          * at any of the leaves.  In order to do mixed groups we'd have to check
13657          * the leaves of any fs roots and pin down the bytes for any file
13658          * extents we find.  Not hard but why do it if we don't have to?
13659          */
13660         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13661                 fprintf(stderr, "We don't support re-initing the extent tree "
13662                         "for mixed block groups yet, please notify a btrfs "
13663                         "developer you want to do this so they can add this "
13664                         "functionality.\n");
13665                 return -EINVAL;
13666         }
13667
13668         /*
13669          * first we need to walk all of the trees except the extent tree and pin
13670          * down the bytes that are in use so we don't overwrite any existing
13671          * metadata.
13672          */
13673         ret = pin_metadata_blocks(fs_info);
13674         if (ret) {
13675                 fprintf(stderr, "error pinning down used bytes\n");
13676                 return ret;
13677         }
13678
13679         /*
13680          * Need to drop all the block groups since we're going to recreate all
13681          * of them again.
13682          */
13683         btrfs_free_block_groups(fs_info);
13684         ret = reset_block_groups(fs_info);
13685         if (ret) {
13686                 fprintf(stderr, "error resetting the block groups\n");
13687                 return ret;
13688         }
13689
13690         /* Ok we can allocate now, reinit the extent root */
13691         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13692         if (ret) {
13693                 fprintf(stderr, "extent root initialization failed\n");
13694                 /*
13695                  * When the transaction code is updated we should end the
13696                  * transaction, but for now progs only knows about commit so
13697                  * just return an error.
13698                  */
13699                 return ret;
13700         }
13701
13702         /*
13703          * Now we have all the in-memory block groups setup so we can make
13704          * allocations properly, and the metadata we care about is safe since we
13705          * pinned all of it above.
13706          */
13707         while (1) {
13708                 struct btrfs_block_group_cache *cache;
13709
13710                 cache = btrfs_lookup_first_block_group(fs_info, start);
13711                 if (!cache)
13712                         break;
13713                 start = cache->key.objectid + cache->key.offset;
13714                 ret = btrfs_insert_item(trans, fs_info->extent_root,
13715                                         &cache->key, &cache->item,
13716                                         sizeof(cache->item));
13717                 if (ret) {
13718                         fprintf(stderr, "Error adding block group\n");
13719                         return ret;
13720                 }
13721                 btrfs_extent_post_op(trans, fs_info->extent_root);
13722         }
13723
13724         ret = reset_balance(trans, fs_info);
13725         if (ret)
13726                 fprintf(stderr, "error resetting the pending balance\n");
13727
13728         return ret;
13729 }
13730
13731 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13732 {
13733         struct btrfs_path path;
13734         struct btrfs_trans_handle *trans;
13735         struct btrfs_key key;
13736         int ret;
13737
13738         printf("Recowing metadata block %llu\n", eb->start);
13739         key.objectid = btrfs_header_owner(eb);
13740         key.type = BTRFS_ROOT_ITEM_KEY;
13741         key.offset = (u64)-1;
13742
13743         root = btrfs_read_fs_root(root->fs_info, &key);
13744         if (IS_ERR(root)) {
13745                 fprintf(stderr, "Couldn't find owner root %llu\n",
13746                         key.objectid);
13747                 return PTR_ERR(root);
13748         }
13749
13750         trans = btrfs_start_transaction(root, 1);
13751         if (IS_ERR(trans))
13752                 return PTR_ERR(trans);
13753
13754         btrfs_init_path(&path);
13755         path.lowest_level = btrfs_header_level(eb);
13756         if (path.lowest_level)
13757                 btrfs_node_key_to_cpu(eb, &key, 0);
13758         else
13759                 btrfs_item_key_to_cpu(eb, &key, 0);
13760
13761         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13762         btrfs_commit_transaction(trans, root);
13763         btrfs_release_path(&path);
13764         return ret;
13765 }
13766
13767 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13768 {
13769         struct btrfs_path path;
13770         struct btrfs_trans_handle *trans;
13771         struct btrfs_key key;
13772         int ret;
13773
13774         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13775                bad->key.type, bad->key.offset);
13776         key.objectid = bad->root_id;
13777         key.type = BTRFS_ROOT_ITEM_KEY;
13778         key.offset = (u64)-1;
13779
13780         root = btrfs_read_fs_root(root->fs_info, &key);
13781         if (IS_ERR(root)) {
13782                 fprintf(stderr, "Couldn't find owner root %llu\n",
13783                         key.objectid);
13784                 return PTR_ERR(root);
13785         }
13786
13787         trans = btrfs_start_transaction(root, 1);
13788         if (IS_ERR(trans))
13789                 return PTR_ERR(trans);
13790
13791         btrfs_init_path(&path);
13792         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13793         if (ret) {
13794                 if (ret > 0)
13795                         ret = 0;
13796                 goto out;
13797         }
13798         ret = btrfs_del_item(trans, root, &path);
13799 out:
13800         btrfs_commit_transaction(trans, root);
13801         btrfs_release_path(&path);
13802         return ret;
13803 }
13804
13805 static int zero_log_tree(struct btrfs_root *root)
13806 {
13807         struct btrfs_trans_handle *trans;
13808         int ret;
13809
13810         trans = btrfs_start_transaction(root, 1);
13811         if (IS_ERR(trans)) {
13812                 ret = PTR_ERR(trans);
13813                 return ret;
13814         }
13815         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13816         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13817         ret = btrfs_commit_transaction(trans, root);
13818         return ret;
13819 }
13820
13821 static int populate_csum(struct btrfs_trans_handle *trans,
13822                          struct btrfs_root *csum_root, char *buf, u64 start,
13823                          u64 len)
13824 {
13825         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13826         u64 offset = 0;
13827         u64 sectorsize;
13828         int ret = 0;
13829
13830         while (offset < len) {
13831                 sectorsize = fs_info->sectorsize;
13832                 ret = read_extent_data(fs_info, buf, start + offset,
13833                                        &sectorsize, 0);
13834                 if (ret)
13835                         break;
13836                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13837                                             start + offset, buf, sectorsize);
13838                 if (ret)
13839                         break;
13840                 offset += sectorsize;
13841         }
13842         return ret;
13843 }
13844
13845 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13846                                       struct btrfs_root *csum_root,
13847                                       struct btrfs_root *cur_root)
13848 {
13849         struct btrfs_path path;
13850         struct btrfs_key key;
13851         struct extent_buffer *node;
13852         struct btrfs_file_extent_item *fi;
13853         char *buf = NULL;
13854         u64 start = 0;
13855         u64 len = 0;
13856         int slot = 0;
13857         int ret = 0;
13858
13859         buf = malloc(cur_root->fs_info->sectorsize);
13860         if (!buf)
13861                 return -ENOMEM;
13862
13863         btrfs_init_path(&path);
13864         key.objectid = 0;
13865         key.offset = 0;
13866         key.type = 0;
13867         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13868         if (ret < 0)
13869                 goto out;
13870         /* Iterate all regular file extents and fill its csum */
13871         while (1) {
13872                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13873
13874                 if (key.type != BTRFS_EXTENT_DATA_KEY)
13875                         goto next;
13876                 node = path.nodes[0];
13877                 slot = path.slots[0];
13878                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13879                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13880                         goto next;
13881                 start = btrfs_file_extent_disk_bytenr(node, fi);
13882                 len = btrfs_file_extent_disk_num_bytes(node, fi);
13883
13884                 ret = populate_csum(trans, csum_root, buf, start, len);
13885                 if (ret == -EEXIST)
13886                         ret = 0;
13887                 if (ret < 0)
13888                         goto out;
13889 next:
13890                 /*
13891                  * TODO: if next leaf is corrupted, jump to nearest next valid
13892                  * leaf.
13893                  */
13894                 ret = btrfs_next_item(cur_root, &path);
13895                 if (ret < 0)
13896                         goto out;
13897                 if (ret > 0) {
13898                         ret = 0;
13899                         goto out;
13900                 }
13901         }
13902
13903 out:
13904         btrfs_release_path(&path);
13905         free(buf);
13906         return ret;
13907 }
13908
13909 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13910                                   struct btrfs_root *csum_root)
13911 {
13912         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13913         struct btrfs_path path;
13914         struct btrfs_root *tree_root = fs_info->tree_root;
13915         struct btrfs_root *cur_root;
13916         struct extent_buffer *node;
13917         struct btrfs_key key;
13918         int slot = 0;
13919         int ret = 0;
13920
13921         btrfs_init_path(&path);
13922         key.objectid = BTRFS_FS_TREE_OBJECTID;
13923         key.offset = 0;
13924         key.type = BTRFS_ROOT_ITEM_KEY;
13925         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13926         if (ret < 0)
13927                 goto out;
13928         if (ret > 0) {
13929                 ret = -ENOENT;
13930                 goto out;
13931         }
13932
13933         while (1) {
13934                 node = path.nodes[0];
13935                 slot = path.slots[0];
13936                 btrfs_item_key_to_cpu(node, &key, slot);
13937                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13938                         goto out;
13939                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13940                         goto next;
13941                 if (!is_fstree(key.objectid))
13942                         goto next;
13943                 key.offset = (u64)-1;
13944
13945                 cur_root = btrfs_read_fs_root(fs_info, &key);
13946                 if (IS_ERR(cur_root) || !cur_root) {
13947                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13948                                 key.objectid);
13949                         goto out;
13950                 }
13951                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13952                                 cur_root);
13953                 if (ret < 0)
13954                         goto out;
13955 next:
13956                 ret = btrfs_next_item(tree_root, &path);
13957                 if (ret > 0) {
13958                         ret = 0;
13959                         goto out;
13960                 }
13961                 if (ret < 0)
13962                         goto out;
13963         }
13964
13965 out:
13966         btrfs_release_path(&path);
13967         return ret;
13968 }
13969
13970 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13971                                       struct btrfs_root *csum_root)
13972 {
13973         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13974         struct btrfs_path path;
13975         struct btrfs_extent_item *ei;
13976         struct extent_buffer *leaf;
13977         char *buf;
13978         struct btrfs_key key;
13979         int ret;
13980
13981         btrfs_init_path(&path);
13982         key.objectid = 0;
13983         key.type = BTRFS_EXTENT_ITEM_KEY;
13984         key.offset = 0;
13985         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13986         if (ret < 0) {
13987                 btrfs_release_path(&path);
13988                 return ret;
13989         }
13990
13991         buf = malloc(csum_root->fs_info->sectorsize);
13992         if (!buf) {
13993                 btrfs_release_path(&path);
13994                 return -ENOMEM;
13995         }
13996
13997         while (1) {
13998                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13999                         ret = btrfs_next_leaf(extent_root, &path);
14000                         if (ret < 0)
14001                                 break;
14002                         if (ret) {
14003                                 ret = 0;
14004                                 break;
14005                         }
14006                 }
14007                 leaf = path.nodes[0];
14008
14009                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
14010                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
14011                         path.slots[0]++;
14012                         continue;
14013                 }
14014
14015                 ei = btrfs_item_ptr(leaf, path.slots[0],
14016                                     struct btrfs_extent_item);
14017                 if (!(btrfs_extent_flags(leaf, ei) &
14018                       BTRFS_EXTENT_FLAG_DATA)) {
14019                         path.slots[0]++;
14020                         continue;
14021                 }
14022
14023                 ret = populate_csum(trans, csum_root, buf, key.objectid,
14024                                     key.offset);
14025                 if (ret)
14026                         break;
14027                 path.slots[0]++;
14028         }
14029
14030         btrfs_release_path(&path);
14031         free(buf);
14032         return ret;
14033 }
14034
14035 /*
14036  * Recalculate the csum and put it into the csum tree.
14037  *
14038  * Extent tree init will wipe out all the extent info, so in that case, we
14039  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
14040  * will use fs/subvol trees to init the csum tree.
14041  */
14042 static int fill_csum_tree(struct btrfs_trans_handle *trans,
14043                           struct btrfs_root *csum_root,
14044                           int search_fs_tree)
14045 {
14046         if (search_fs_tree)
14047                 return fill_csum_tree_from_fs(trans, csum_root);
14048         else
14049                 return fill_csum_tree_from_extent(trans, csum_root);
14050 }
14051
14052 static void free_roots_info_cache(void)
14053 {
14054         if (!roots_info_cache)
14055                 return;
14056
14057         while (!cache_tree_empty(roots_info_cache)) {
14058                 struct cache_extent *entry;
14059                 struct root_item_info *rii;
14060
14061                 entry = first_cache_extent(roots_info_cache);
14062                 if (!entry)
14063                         break;
14064                 remove_cache_extent(roots_info_cache, entry);
14065                 rii = container_of(entry, struct root_item_info, cache_extent);
14066                 free(rii);
14067         }
14068
14069         free(roots_info_cache);
14070         roots_info_cache = NULL;
14071 }
14072
14073 static int build_roots_info_cache(struct btrfs_fs_info *info)
14074 {
14075         int ret = 0;
14076         struct btrfs_key key;
14077         struct extent_buffer *leaf;
14078         struct btrfs_path path;
14079
14080         if (!roots_info_cache) {
14081                 roots_info_cache = malloc(sizeof(*roots_info_cache));
14082                 if (!roots_info_cache)
14083                         return -ENOMEM;
14084                 cache_tree_init(roots_info_cache);
14085         }
14086
14087         btrfs_init_path(&path);
14088         key.objectid = 0;
14089         key.type = BTRFS_EXTENT_ITEM_KEY;
14090         key.offset = 0;
14091         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
14092         if (ret < 0)
14093                 goto out;
14094         leaf = path.nodes[0];
14095
14096         while (1) {
14097                 struct btrfs_key found_key;
14098                 struct btrfs_extent_item *ei;
14099                 struct btrfs_extent_inline_ref *iref;
14100                 int slot = path.slots[0];
14101                 int type;
14102                 u64 flags;
14103                 u64 root_id;
14104                 u8 level;
14105                 struct cache_extent *entry;
14106                 struct root_item_info *rii;
14107
14108                 if (slot >= btrfs_header_nritems(leaf)) {
14109                         ret = btrfs_next_leaf(info->extent_root, &path);
14110                         if (ret < 0) {
14111                                 break;
14112                         } else if (ret) {
14113                                 ret = 0;
14114                                 break;
14115                         }
14116                         leaf = path.nodes[0];
14117                         slot = path.slots[0];
14118                 }
14119
14120                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14121
14122                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
14123                     found_key.type != BTRFS_METADATA_ITEM_KEY)
14124                         goto next;
14125
14126                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
14127                 flags = btrfs_extent_flags(leaf, ei);
14128
14129                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
14130                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
14131                         goto next;
14132
14133                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
14134                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
14135                         level = found_key.offset;
14136                 } else {
14137                         struct btrfs_tree_block_info *binfo;
14138
14139                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
14140                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
14141                         level = btrfs_tree_block_level(leaf, binfo);
14142                 }
14143
14144                 /*
14145                  * For a root extent, it must be of the following type and the
14146                  * first (and only one) iref in the item.
14147                  */
14148                 type = btrfs_extent_inline_ref_type(leaf, iref);
14149                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
14150                         goto next;
14151
14152                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
14153                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14154                 if (!entry) {
14155                         rii = malloc(sizeof(struct root_item_info));
14156                         if (!rii) {
14157                                 ret = -ENOMEM;
14158                                 goto out;
14159                         }
14160                         rii->cache_extent.start = root_id;
14161                         rii->cache_extent.size = 1;
14162                         rii->level = (u8)-1;
14163                         entry = &rii->cache_extent;
14164                         ret = insert_cache_extent(roots_info_cache, entry);
14165                         ASSERT(ret == 0);
14166                 } else {
14167                         rii = container_of(entry, struct root_item_info,
14168                                            cache_extent);
14169                 }
14170
14171                 ASSERT(rii->cache_extent.start == root_id);
14172                 ASSERT(rii->cache_extent.size == 1);
14173
14174                 if (level > rii->level || rii->level == (u8)-1) {
14175                         rii->level = level;
14176                         rii->bytenr = found_key.objectid;
14177                         rii->gen = btrfs_extent_generation(leaf, ei);
14178                         rii->node_count = 1;
14179                 } else if (level == rii->level) {
14180                         rii->node_count++;
14181                 }
14182 next:
14183                 path.slots[0]++;
14184         }
14185
14186 out:
14187         btrfs_release_path(&path);
14188
14189         return ret;
14190 }
14191
14192 static int maybe_repair_root_item(struct btrfs_path *path,
14193                                   const struct btrfs_key *root_key,
14194                                   const int read_only_mode)
14195 {
14196         const u64 root_id = root_key->objectid;
14197         struct cache_extent *entry;
14198         struct root_item_info *rii;
14199         struct btrfs_root_item ri;
14200         unsigned long offset;
14201
14202         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14203         if (!entry) {
14204                 fprintf(stderr,
14205                         "Error: could not find extent items for root %llu\n",
14206                         root_key->objectid);
14207                 return -ENOENT;
14208         }
14209
14210         rii = container_of(entry, struct root_item_info, cache_extent);
14211         ASSERT(rii->cache_extent.start == root_id);
14212         ASSERT(rii->cache_extent.size == 1);
14213
14214         if (rii->node_count != 1) {
14215                 fprintf(stderr,
14216                         "Error: could not find btree root extent for root %llu\n",
14217                         root_id);
14218                 return -ENOENT;
14219         }
14220
14221         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
14222         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
14223
14224         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
14225             btrfs_root_level(&ri) != rii->level ||
14226             btrfs_root_generation(&ri) != rii->gen) {
14227
14228                 /*
14229                  * If we're in repair mode but our caller told us to not update
14230                  * the root item, i.e. just check if it needs to be updated, don't
14231                  * print this message, since the caller will call us again shortly
14232                  * for the same root item without read only mode (the caller will
14233                  * open a transaction first).
14234                  */
14235                 if (!(read_only_mode && repair))
14236                         fprintf(stderr,
14237                                 "%sroot item for root %llu,"
14238                                 " current bytenr %llu, current gen %llu, current level %u,"
14239                                 " new bytenr %llu, new gen %llu, new level %u\n",
14240                                 (read_only_mode ? "" : "fixing "),
14241                                 root_id,
14242                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
14243                                 btrfs_root_level(&ri),
14244                                 rii->bytenr, rii->gen, rii->level);
14245
14246                 if (btrfs_root_generation(&ri) > rii->gen) {
14247                         fprintf(stderr,
14248                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
14249                                 root_id, btrfs_root_generation(&ri), rii->gen);
14250                         return -EINVAL;
14251                 }
14252
14253                 if (!read_only_mode) {
14254                         btrfs_set_root_bytenr(&ri, rii->bytenr);
14255                         btrfs_set_root_level(&ri, rii->level);
14256                         btrfs_set_root_generation(&ri, rii->gen);
14257                         write_extent_buffer(path->nodes[0], &ri,
14258                                             offset, sizeof(ri));
14259                 }
14260
14261                 return 1;
14262         }
14263
14264         return 0;
14265 }
14266
14267 /*
14268  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14269  * caused read-only snapshots to be corrupted if they were created at a moment
14270  * when the source subvolume/snapshot had orphan items. The issue was that the
14271  * on-disk root items became incorrect, referring to the pre orphan cleanup root
14272  * node instead of the post orphan cleanup root node.
14273  * So this function, and its callees, just detects and fixes those cases. Even
14274  * though the regression was for read-only snapshots, this function applies to
14275  * any snapshot/subvolume root.
14276  * This must be run before any other repair code - not doing it so, makes other
14277  * repair code delete or modify backrefs in the extent tree for example, which
14278  * will result in an inconsistent fs after repairing the root items.
14279  */
14280 static int repair_root_items(struct btrfs_fs_info *info)
14281 {
14282         struct btrfs_path path;
14283         struct btrfs_key key;
14284         struct extent_buffer *leaf;
14285         struct btrfs_trans_handle *trans = NULL;
14286         int ret = 0;
14287         int bad_roots = 0;
14288         int need_trans = 0;
14289
14290         btrfs_init_path(&path);
14291
14292         ret = build_roots_info_cache(info);
14293         if (ret)
14294                 goto out;
14295
14296         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14297         key.type = BTRFS_ROOT_ITEM_KEY;
14298         key.offset = 0;
14299
14300 again:
14301         /*
14302          * Avoid opening and committing transactions if a leaf doesn't have
14303          * any root items that need to be fixed, so that we avoid rotating
14304          * backup roots unnecessarily.
14305          */
14306         if (need_trans) {
14307                 trans = btrfs_start_transaction(info->tree_root, 1);
14308                 if (IS_ERR(trans)) {
14309                         ret = PTR_ERR(trans);
14310                         goto out;
14311                 }
14312         }
14313
14314         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14315                                 0, trans ? 1 : 0);
14316         if (ret < 0)
14317                 goto out;
14318         leaf = path.nodes[0];
14319
14320         while (1) {
14321                 struct btrfs_key found_key;
14322
14323                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14324                         int no_more_keys = find_next_key(&path, &key);
14325
14326                         btrfs_release_path(&path);
14327                         if (trans) {
14328                                 ret = btrfs_commit_transaction(trans,
14329                                                                info->tree_root);
14330                                 trans = NULL;
14331                                 if (ret < 0)
14332                                         goto out;
14333                         }
14334                         need_trans = 0;
14335                         if (no_more_keys)
14336                                 break;
14337                         goto again;
14338                 }
14339
14340                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14341
14342                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14343                         goto next;
14344                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14345                         goto next;
14346
14347                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14348                 if (ret < 0)
14349                         goto out;
14350                 if (ret) {
14351                         if (!trans && repair) {
14352                                 need_trans = 1;
14353                                 key = found_key;
14354                                 btrfs_release_path(&path);
14355                                 goto again;
14356                         }
14357                         bad_roots++;
14358                 }
14359 next:
14360                 path.slots[0]++;
14361         }
14362         ret = 0;
14363 out:
14364         free_roots_info_cache();
14365         btrfs_release_path(&path);
14366         if (trans)
14367                 btrfs_commit_transaction(trans, info->tree_root);
14368         if (ret < 0)
14369                 return ret;
14370
14371         return bad_roots;
14372 }
14373
14374 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14375 {
14376         struct btrfs_trans_handle *trans;
14377         struct btrfs_block_group_cache *bg_cache;
14378         u64 current = 0;
14379         int ret = 0;
14380
14381         /* Clear all free space cache inodes and its extent data */
14382         while (1) {
14383                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14384                 if (!bg_cache)
14385                         break;
14386                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14387                 if (ret < 0)
14388                         return ret;
14389                 current = bg_cache->key.objectid + bg_cache->key.offset;
14390         }
14391
14392         /* Don't forget to set cache_generation to -1 */
14393         trans = btrfs_start_transaction(fs_info->tree_root, 0);
14394         if (IS_ERR(trans)) {
14395                 error("failed to update super block cache generation");
14396                 return PTR_ERR(trans);
14397         }
14398         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14399         btrfs_commit_transaction(trans, fs_info->tree_root);
14400
14401         return ret;
14402 }
14403
14404 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14405                 int clear_version)
14406 {
14407         int ret = 0;
14408
14409         if (clear_version == 1) {
14410                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14411                         error(
14412                 "free space cache v2 detected, use --clear-space-cache v2");
14413                         ret = 1;
14414                         goto close_out;
14415                 }
14416                 printf("Clearing free space cache\n");
14417                 ret = clear_free_space_cache(fs_info);
14418                 if (ret) {
14419                         error("failed to clear free space cache");
14420                         ret = 1;
14421                 } else {
14422                         printf("Free space cache cleared\n");
14423                 }
14424         } else if (clear_version == 2) {
14425                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14426                         printf("no free space cache v2 to clear\n");
14427                         ret = 0;
14428                         goto close_out;
14429                 }
14430                 printf("Clear free space cache v2\n");
14431                 ret = btrfs_clear_free_space_tree(fs_info);
14432                 if (ret) {
14433                         error("failed to clear free space cache v2: %d", ret);
14434                         ret = 1;
14435                 } else {
14436                         printf("free space cache v2 cleared\n");
14437                 }
14438         }
14439 close_out:
14440         return ret;
14441 }
14442
14443 const char * const cmd_check_usage[] = {
14444         "btrfs check [options] <device>",
14445         "Check structural integrity of a filesystem (unmounted).",
14446         "Check structural integrity of an unmounted filesystem. Verify internal",
14447         "trees' consistency and item connectivity. In the repair mode try to",
14448         "fix the problems found. ",
14449         "WARNING: the repair mode is considered dangerous",
14450         "",
14451         "-s|--super <superblock>     use this superblock copy",
14452         "-b|--backup                 use the first valid backup root copy",
14453         "--force                     skip mount checks, repair is not possible",
14454         "--repair                    try to repair the filesystem",
14455         "--readonly                  run in read-only mode (default)",
14456         "--init-csum-tree            create a new CRC tree",
14457         "--init-extent-tree          create a new extent tree",
14458         "--mode <MODE>               allows choice of memory/IO trade-offs",
14459         "                            where MODE is one of:",
14460         "                            original - read inodes and extents to memory (requires",
14461         "                                       more memory, does less IO)",
14462         "                            lowmem   - try to use less memory but read blocks again",
14463         "                                       when needed",
14464         "--check-data-csum           verify checksums of data blocks",
14465         "-Q|--qgroup-report          print a report on qgroup consistency",
14466         "-E|--subvol-extents <subvolid>",
14467         "                            print subvolume extents and sharing state",
14468         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
14469         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
14470         "-p|--progress               indicate progress",
14471         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
14472         NULL
14473 };
14474
14475 int cmd_check(int argc, char **argv)
14476 {
14477         struct cache_tree root_cache;
14478         struct btrfs_root *root;
14479         struct btrfs_fs_info *info;
14480         u64 bytenr = 0;
14481         u64 subvolid = 0;
14482         u64 tree_root_bytenr = 0;
14483         u64 chunk_root_bytenr = 0;
14484         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14485         int ret = 0;
14486         int err = 0;
14487         u64 num;
14488         int init_csum_tree = 0;
14489         int readonly = 0;
14490         int clear_space_cache = 0;
14491         int qgroup_report = 0;
14492         int qgroups_repaired = 0;
14493         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14494         int force = 0;
14495
14496         while(1) {
14497                 int c;
14498                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14499                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14500                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14501                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14502                         GETOPT_VAL_FORCE };
14503                 static const struct option long_options[] = {
14504                         { "super", required_argument, NULL, 's' },
14505                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14506                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14507                         { "init-csum-tree", no_argument, NULL,
14508                                 GETOPT_VAL_INIT_CSUM },
14509                         { "init-extent-tree", no_argument, NULL,
14510                                 GETOPT_VAL_INIT_EXTENT },
14511                         { "check-data-csum", no_argument, NULL,
14512                                 GETOPT_VAL_CHECK_CSUM },
14513                         { "backup", no_argument, NULL, 'b' },
14514                         { "subvol-extents", required_argument, NULL, 'E' },
14515                         { "qgroup-report", no_argument, NULL, 'Q' },
14516                         { "tree-root", required_argument, NULL, 'r' },
14517                         { "chunk-root", required_argument, NULL,
14518                                 GETOPT_VAL_CHUNK_TREE },
14519                         { "progress", no_argument, NULL, 'p' },
14520                         { "mode", required_argument, NULL,
14521                                 GETOPT_VAL_MODE },
14522                         { "clear-space-cache", required_argument, NULL,
14523                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
14524                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14525                         { NULL, 0, NULL, 0}
14526                 };
14527
14528                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14529                 if (c < 0)
14530                         break;
14531                 switch(c) {
14532                         case 'a': /* ignored */ break;
14533                         case 'b':
14534                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14535                                 break;
14536                         case 's':
14537                                 num = arg_strtou64(optarg);
14538                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14539                                         error(
14540                                         "super mirror should be less than %d",
14541                                                 BTRFS_SUPER_MIRROR_MAX);
14542                                         exit(1);
14543                                 }
14544                                 bytenr = btrfs_sb_offset(((int)num));
14545                                 printf("using SB copy %llu, bytenr %llu\n", num,
14546                                        (unsigned long long)bytenr);
14547                                 break;
14548                         case 'Q':
14549                                 qgroup_report = 1;
14550                                 break;
14551                         case 'E':
14552                                 subvolid = arg_strtou64(optarg);
14553                                 break;
14554                         case 'r':
14555                                 tree_root_bytenr = arg_strtou64(optarg);
14556                                 break;
14557                         case GETOPT_VAL_CHUNK_TREE:
14558                                 chunk_root_bytenr = arg_strtou64(optarg);
14559                                 break;
14560                         case 'p':
14561                                 ctx.progress_enabled = true;
14562                                 break;
14563                         case '?':
14564                         case 'h':
14565                                 usage(cmd_check_usage);
14566                         case GETOPT_VAL_REPAIR:
14567                                 printf("enabling repair mode\n");
14568                                 repair = 1;
14569                                 ctree_flags |= OPEN_CTREE_WRITES;
14570                                 break;
14571                         case GETOPT_VAL_READONLY:
14572                                 readonly = 1;
14573                                 break;
14574                         case GETOPT_VAL_INIT_CSUM:
14575                                 printf("Creating a new CRC tree\n");
14576                                 init_csum_tree = 1;
14577                                 repair = 1;
14578                                 ctree_flags |= OPEN_CTREE_WRITES;
14579                                 break;
14580                         case GETOPT_VAL_INIT_EXTENT:
14581                                 init_extent_tree = 1;
14582                                 ctree_flags |= (OPEN_CTREE_WRITES |
14583                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
14584                                 repair = 1;
14585                                 break;
14586                         case GETOPT_VAL_CHECK_CSUM:
14587                                 check_data_csum = 1;
14588                                 break;
14589                         case GETOPT_VAL_MODE:
14590                                 check_mode = parse_check_mode(optarg);
14591                                 if (check_mode == CHECK_MODE_UNKNOWN) {
14592                                         error("unknown mode: %s", optarg);
14593                                         exit(1);
14594                                 }
14595                                 break;
14596                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
14597                                 if (strcmp(optarg, "v1") == 0) {
14598                                         clear_space_cache = 1;
14599                                 } else if (strcmp(optarg, "v2") == 0) {
14600                                         clear_space_cache = 2;
14601                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14602                                 } else {
14603                                         error(
14604                 "invalid argument to --clear-space-cache, must be v1 or v2");
14605                                         exit(1);
14606                                 }
14607                                 ctree_flags |= OPEN_CTREE_WRITES;
14608                                 break;
14609                         case GETOPT_VAL_FORCE:
14610                                 force = 1;
14611                                 break;
14612                 }
14613         }
14614
14615         if (check_argc_exact(argc - optind, 1))
14616                 usage(cmd_check_usage);
14617
14618         if (ctx.progress_enabled) {
14619                 ctx.tp = TASK_NOTHING;
14620                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14621         }
14622
14623         /* This check is the only reason for --readonly to exist */
14624         if (readonly && repair) {
14625                 error("repair options are not compatible with --readonly");
14626                 exit(1);
14627         }
14628
14629         /*
14630          * experimental and dangerous
14631          */
14632         if (repair && check_mode == CHECK_MODE_LOWMEM)
14633                 warning("low-memory mode repair support is only partial");
14634
14635         radix_tree_init();
14636         cache_tree_init(&root_cache);
14637
14638         ret = check_mounted(argv[optind]);
14639         if (!force) {
14640                 if (ret < 0) {
14641                         error("could not check mount status: %s",
14642                                         strerror(-ret));
14643                         err |= !!ret;
14644                         goto err_out;
14645                 } else if (ret) {
14646                         error(
14647 "%s is currently mounted, use --force if you really intend to check the filesystem",
14648                                 argv[optind]);
14649                         ret = -EBUSY;
14650                         err |= !!ret;
14651                         goto err_out;
14652                 }
14653         } else {
14654                 if (repair) {
14655                         error("repair and --force is not yet supported");
14656                         ret = 1;
14657                         err |= !!ret;
14658                         goto err_out;
14659                 }
14660                 if (ret < 0) {
14661                         warning(
14662 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14663                                 argv[optind]);
14664                 } else if (ret) {
14665                         warning(
14666                         "filesystem mounted, continuing because of --force");
14667                 }
14668                 /* A block device is mounted in exclusive mode by kernel */
14669                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14670         }
14671
14672         /* only allow partial opening under repair mode */
14673         if (repair)
14674                 ctree_flags |= OPEN_CTREE_PARTIAL;
14675
14676         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14677                                   chunk_root_bytenr, ctree_flags);
14678         if (!info) {
14679                 error("cannot open file system");
14680                 ret = -EIO;
14681                 err |= !!ret;
14682                 goto err_out;
14683         }
14684
14685         global_info = info;
14686         root = info->fs_root;
14687         uuid_unparse(info->super_copy->fsid, uuidbuf);
14688
14689         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14690
14691         /*
14692          * Check the bare minimum before starting anything else that could rely
14693          * on it, namely the tree roots, any local consistency checks
14694          */
14695         if (!extent_buffer_uptodate(info->tree_root->node) ||
14696             !extent_buffer_uptodate(info->dev_root->node) ||
14697             !extent_buffer_uptodate(info->chunk_root->node)) {
14698                 error("critical roots corrupted, unable to check the filesystem");
14699                 err |= !!ret;
14700                 ret = -EIO;
14701                 goto close_out;
14702         }
14703
14704         if (clear_space_cache) {
14705                 ret = do_clear_free_space_cache(info, clear_space_cache);
14706                 err |= !!ret;
14707                 goto close_out;
14708         }
14709
14710         /*
14711          * repair mode will force us to commit transaction which
14712          * will make us fail to load log tree when mounting.
14713          */
14714         if (repair && btrfs_super_log_root(info->super_copy)) {
14715                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14716                 if (!ret) {
14717                         ret = 1;
14718                         err |= !!ret;
14719                         goto close_out;
14720                 }
14721                 ret = zero_log_tree(root);
14722                 err |= !!ret;
14723                 if (ret) {
14724                         error("failed to zero log tree: %d", ret);
14725                         goto close_out;
14726                 }
14727         }
14728
14729         if (qgroup_report) {
14730                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14731                        uuidbuf);
14732                 ret = qgroup_verify_all(info);
14733                 err |= !!ret;
14734                 if (ret == 0)
14735                         report_qgroups(1);
14736                 goto close_out;
14737         }
14738         if (subvolid) {
14739                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14740                        subvolid, argv[optind], uuidbuf);
14741                 ret = print_extent_state(info, subvolid);
14742                 err |= !!ret;
14743                 goto close_out;
14744         }
14745
14746         if (init_extent_tree || init_csum_tree) {
14747                 struct btrfs_trans_handle *trans;
14748
14749                 trans = btrfs_start_transaction(info->extent_root, 0);
14750                 if (IS_ERR(trans)) {
14751                         error("error starting transaction");
14752                         ret = PTR_ERR(trans);
14753                         err |= !!ret;
14754                         goto close_out;
14755                 }
14756
14757                 if (init_extent_tree) {
14758                         printf("Creating a new extent tree\n");
14759                         ret = reinit_extent_tree(trans, info);
14760                         err |= !!ret;
14761                         if (ret)
14762                                 goto close_out;
14763                 }
14764
14765                 if (init_csum_tree) {
14766                         printf("Reinitialize checksum tree\n");
14767                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14768                         if (ret) {
14769                                 error("checksum tree initialization failed: %d",
14770                                                 ret);
14771                                 ret = -EIO;
14772                                 err |= !!ret;
14773                                 goto close_out;
14774                         }
14775
14776                         ret = fill_csum_tree(trans, info->csum_root,
14777                                              init_extent_tree);
14778                         err |= !!ret;
14779                         if (ret) {
14780                                 error("checksum tree refilling failed: %d", ret);
14781                                 return -EIO;
14782                         }
14783                 }
14784                 /*
14785                  * Ok now we commit and run the normal fsck, which will add
14786                  * extent entries for all of the items it finds.
14787                  */
14788                 ret = btrfs_commit_transaction(trans, info->extent_root);
14789                 err |= !!ret;
14790                 if (ret)
14791                         goto close_out;
14792         }
14793         if (!extent_buffer_uptodate(info->extent_root->node)) {
14794                 error("critical: extent_root, unable to check the filesystem");
14795                 ret = -EIO;
14796                 err |= !!ret;
14797                 goto close_out;
14798         }
14799         if (!extent_buffer_uptodate(info->csum_root->node)) {
14800                 error("critical: csum_root, unable to check the filesystem");
14801                 ret = -EIO;
14802                 err |= !!ret;
14803                 goto close_out;
14804         }
14805
14806         ret = do_check_chunks_and_extents(info);
14807         err |= !!ret;
14808         if (ret)
14809                 error(
14810                 "errors found in extent allocation tree or chunk allocation");
14811
14812         ret = repair_root_items(info);
14813         err |= !!ret;
14814         if (ret < 0) {
14815                 error("failed to repair root items: %s", strerror(-ret));
14816                 goto close_out;
14817         }
14818         if (repair) {
14819                 fprintf(stderr, "Fixed %d roots.\n", ret);
14820                 ret = 0;
14821         } else if (ret > 0) {
14822                 fprintf(stderr,
14823                        "Found %d roots with an outdated root item.\n",
14824                        ret);
14825                 fprintf(stderr,
14826                         "Please run a filesystem check with the option --repair to fix them.\n");
14827                 ret = 1;
14828                 err |= !!ret;
14829                 goto close_out;
14830         }
14831
14832         if (!ctx.progress_enabled) {
14833                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14834                         fprintf(stderr, "checking free space tree\n");
14835                 else
14836                         fprintf(stderr, "checking free space cache\n");
14837         }
14838         ret = check_space_cache(root);
14839         err |= !!ret;
14840         if (ret) {
14841                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14842                         error("errors found in free space tree");
14843                 else
14844                         error("errors found in free space cache");
14845                 goto out;
14846         }
14847
14848         /*
14849          * We used to have to have these hole extents in between our real
14850          * extents so if we don't have this flag set we need to make sure there
14851          * are no gaps in the file extents for inodes, otherwise we can just
14852          * ignore it when this happens.
14853          */
14854         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14855         ret = do_check_fs_roots(info, &root_cache);
14856         err |= !!ret;
14857         if (ret) {
14858                 error("errors found in fs roots");
14859                 goto out;
14860         }
14861
14862         fprintf(stderr, "checking csums\n");
14863         ret = check_csums(root);
14864         err |= !!ret;
14865         if (ret) {
14866                 error("errors found in csum tree");
14867                 goto out;
14868         }
14869
14870         fprintf(stderr, "checking root refs\n");
14871         /* For low memory mode, check_fs_roots_v2 handles root refs */
14872         if (check_mode != CHECK_MODE_LOWMEM) {
14873                 ret = check_root_refs(root, &root_cache);
14874                 err |= !!ret;
14875                 if (ret) {
14876                         error("errors found in root refs");
14877                         goto out;
14878                 }
14879         }
14880
14881         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14882                 struct extent_buffer *eb;
14883
14884                 eb = list_first_entry(&root->fs_info->recow_ebs,
14885                                       struct extent_buffer, recow);
14886                 list_del_init(&eb->recow);
14887                 ret = recow_extent_buffer(root, eb);
14888                 err |= !!ret;
14889                 if (ret) {
14890                         error("fails to fix transid errors");
14891                         break;
14892                 }
14893         }
14894
14895         while (!list_empty(&delete_items)) {
14896                 struct bad_item *bad;
14897
14898                 bad = list_first_entry(&delete_items, struct bad_item, list);
14899                 list_del_init(&bad->list);
14900                 if (repair) {
14901                         ret = delete_bad_item(root, bad);
14902                         err |= !!ret;
14903                 }
14904                 free(bad);
14905         }
14906
14907         if (info->quota_enabled) {
14908                 fprintf(stderr, "checking quota groups\n");
14909                 ret = qgroup_verify_all(info);
14910                 err |= !!ret;
14911                 if (ret) {
14912                         error("failed to check quota groups");
14913                         goto out;
14914                 }
14915                 report_qgroups(0);
14916                 ret = repair_qgroups(info, &qgroups_repaired);
14917                 err |= !!ret;
14918                 if (err) {
14919                         error("failed to repair quota groups");
14920                         goto out;
14921                 }
14922                 ret = 0;
14923         }
14924
14925         if (!list_empty(&root->fs_info->recow_ebs)) {
14926                 error("transid errors in file system");
14927                 ret = 1;
14928                 err |= !!ret;
14929         }
14930 out:
14931         printf("found %llu bytes used, ",
14932                (unsigned long long)bytes_used);
14933         if (err)
14934                 printf("error(s) found\n");
14935         else
14936                 printf("no error found\n");
14937         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14938         printf("total tree bytes: %llu\n",
14939                (unsigned long long)total_btree_bytes);
14940         printf("total fs tree bytes: %llu\n",
14941                (unsigned long long)total_fs_tree_bytes);
14942         printf("total extent tree bytes: %llu\n",
14943                (unsigned long long)total_extent_tree_bytes);
14944         printf("btree space waste bytes: %llu\n",
14945                (unsigned long long)btree_space_waste);
14946         printf("file data blocks allocated: %llu\n referenced %llu\n",
14947                 (unsigned long long)data_bytes_allocated,
14948                 (unsigned long long)data_bytes_referenced);
14949
14950         free_qgroup_counts();
14951         free_root_recs_tree(&root_cache);
14952 close_out:
14953         close_ctree(root);
14954 err_out:
14955         if (ctx.progress_enabled)
14956                 task_deinit(ctx.info);
14957
14958         return err;
14959 }