btrfs-progs: check: punch_extent_hole in lowmem
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING       (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH      (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN         (1<<20) /* DIR isize should be recalculated */
136
137 static inline struct data_backref* to_data_backref(struct extent_backref *back)
138 {
139         return container_of(back, struct data_backref, node);
140 }
141
142 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
143 {
144         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
145         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
146         struct data_backref *back1 = to_data_backref(ext1);
147         struct data_backref *back2 = to_data_backref(ext2);
148
149         WARN_ON(!ext1->is_data);
150         WARN_ON(!ext2->is_data);
151
152         /* parent and root are a union, so this covers both */
153         if (back1->parent > back2->parent)
154                 return 1;
155         if (back1->parent < back2->parent)
156                 return -1;
157
158         /* This is a full backref and the parents match. */
159         if (back1->node.full_backref)
160                 return 0;
161
162         if (back1->owner > back2->owner)
163                 return 1;
164         if (back1->owner < back2->owner)
165                 return -1;
166
167         if (back1->offset > back2->offset)
168                 return 1;
169         if (back1->offset < back2->offset)
170                 return -1;
171
172         if (back1->found_ref && back2->found_ref) {
173                 if (back1->disk_bytenr > back2->disk_bytenr)
174                         return 1;
175                 if (back1->disk_bytenr < back2->disk_bytenr)
176                         return -1;
177
178                 if (back1->bytes > back2->bytes)
179                         return 1;
180                 if (back1->bytes < back2->bytes)
181                         return -1;
182         }
183
184         return 0;
185 }
186
187 /*
188  * Much like data_backref, just removed the undetermined members
189  * and change it to use list_head.
190  * During extent scan, it is stored in root->orphan_data_extent.
191  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
192  */
193 struct orphan_data_extent {
194         struct list_head list;
195         u64 root;
196         u64 objectid;
197         u64 offset;
198         u64 disk_bytenr;
199         u64 disk_len;
200 };
201
202 struct tree_backref {
203         struct extent_backref node;
204         union {
205                 u64 parent;
206                 u64 root;
207         };
208 };
209
210 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
211 {
212         return container_of(back, struct tree_backref, node);
213 }
214
215 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
216 {
217         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
218         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
219         struct tree_backref *back1 = to_tree_backref(ext1);
220         struct tree_backref *back2 = to_tree_backref(ext2);
221
222         WARN_ON(ext1->is_data);
223         WARN_ON(ext2->is_data);
224
225         /* parent and root are a union, so this covers both */
226         if (back1->parent > back2->parent)
227                 return 1;
228         if (back1->parent < back2->parent)
229                 return -1;
230
231         return 0;
232 }
233
234 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
235 {
236         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
237         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
238
239         if (ext1->is_data > ext2->is_data)
240                 return 1;
241
242         if (ext1->is_data < ext2->is_data)
243                 return -1;
244
245         if (ext1->full_backref > ext2->full_backref)
246                 return 1;
247         if (ext1->full_backref < ext2->full_backref)
248                 return -1;
249
250         if (ext1->is_data)
251                 return compare_data_backref(node1, node2);
252         else
253                 return compare_tree_backref(node1, node2);
254 }
255
256 /* Explicit initialization for extent_record::flag_block_full_backref */
257 enum { FLAG_UNSET = 2 };
258
259 struct extent_record {
260         struct list_head backrefs;
261         struct list_head dups;
262         struct rb_root backref_tree;
263         struct list_head list;
264         struct cache_extent cache;
265         struct btrfs_disk_key parent_key;
266         u64 start;
267         u64 max_size;
268         u64 nr;
269         u64 refs;
270         u64 extent_item_refs;
271         u64 generation;
272         u64 parent_generation;
273         u64 info_objectid;
274         u32 num_duplicates;
275         u8 info_level;
276         unsigned int flag_block_full_backref:2;
277         unsigned int found_rec:1;
278         unsigned int content_checked:1;
279         unsigned int owner_ref_checked:1;
280         unsigned int is_root:1;
281         unsigned int metadata:1;
282         unsigned int bad_full_backref:1;
283         unsigned int crossing_stripes:1;
284         unsigned int wrong_chunk_type:1;
285 };
286
287 static inline struct extent_record* to_extent_record(struct list_head *entry)
288 {
289         return container_of(entry, struct extent_record, list);
290 }
291
292 struct inode_backref {
293         struct list_head list;
294         unsigned int found_dir_item:1;
295         unsigned int found_dir_index:1;
296         unsigned int found_inode_ref:1;
297         u8 filetype;
298         u8 ref_type;
299         int errors;
300         u64 dir;
301         u64 index;
302         u16 namelen;
303         char name[0];
304 };
305
306 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
307 {
308         return list_entry(entry, struct inode_backref, list);
309 }
310
311 struct root_item_record {
312         struct list_head list;
313         u64 objectid;
314         u64 bytenr;
315         u64 last_snapshot;
316         u8 level;
317         u8 drop_level;
318         struct btrfs_key drop_key;
319 };
320
321 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
322 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
323 #define REF_ERR_NO_INODE_REF            (1 << 2)
324 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
325 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
326 #define REF_ERR_DUP_INODE_REF           (1 << 5)
327 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
328 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
329 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
330 #define REF_ERR_NO_ROOT_REF             (1 << 9)
331 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
332 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
333 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
334
335 struct file_extent_hole {
336         struct rb_node node;
337         u64 start;
338         u64 len;
339 };
340
341 struct inode_record {
342         struct list_head backrefs;
343         unsigned int checked:1;
344         unsigned int merging:1;
345         unsigned int found_inode_item:1;
346         unsigned int found_dir_item:1;
347         unsigned int found_file_extent:1;
348         unsigned int found_csum_item:1;
349         unsigned int some_csum_missing:1;
350         unsigned int nodatasum:1;
351         int errors;
352
353         u64 ino;
354         u32 nlink;
355         u32 imode;
356         u64 isize;
357         u64 nbytes;
358
359         u32 found_link;
360         u64 found_size;
361         u64 extent_start;
362         u64 extent_end;
363         struct rb_root holes;
364         struct list_head orphan_extents;
365
366         u32 refs;
367 };
368
369 #define I_ERR_NO_INODE_ITEM             (1 << 0)
370 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
371 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
372 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
373 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
374 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
375 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
376 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
377 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
378 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
379 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
380 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
381 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
382 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
383 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
384
385 struct root_backref {
386         struct list_head list;
387         unsigned int found_dir_item:1;
388         unsigned int found_dir_index:1;
389         unsigned int found_back_ref:1;
390         unsigned int found_forward_ref:1;
391         unsigned int reachable:1;
392         int errors;
393         u64 ref_root;
394         u64 dir;
395         u64 index;
396         u16 namelen;
397         char name[0];
398 };
399
400 static inline struct root_backref* to_root_backref(struct list_head *entry)
401 {
402         return list_entry(entry, struct root_backref, list);
403 }
404
405 struct root_record {
406         struct list_head backrefs;
407         struct cache_extent cache;
408         unsigned int found_root_item:1;
409         u64 objectid;
410         u32 found_ref;
411 };
412
413 struct ptr_node {
414         struct cache_extent cache;
415         void *data;
416 };
417
418 struct shared_node {
419         struct cache_extent cache;
420         struct cache_tree root_cache;
421         struct cache_tree inode_cache;
422         struct inode_record *current;
423         u32 refs;
424 };
425
426 struct block_info {
427         u64 start;
428         u32 size;
429 };
430
431 struct walk_control {
432         struct cache_tree shared;
433         struct shared_node *nodes[BTRFS_MAX_LEVEL];
434         int active_node;
435         int root_level;
436 };
437
438 struct bad_item {
439         struct btrfs_key key;
440         u64 root_id;
441         struct list_head list;
442 };
443
444 struct extent_entry {
445         u64 bytenr;
446         u64 bytes;
447         int count;
448         int broken;
449         struct list_head list;
450 };
451
452 struct root_item_info {
453         /* level of the root */
454         u8 level;
455         /* number of nodes at this level, must be 1 for a root */
456         int node_count;
457         u64 bytenr;
458         u64 gen;
459         struct cache_extent cache_extent;
460 };
461
462 /*
463  * Error bit for low memory mode check.
464  *
465  * Currently no caller cares about it yet.  Just internal use for error
466  * classification.
467  */
468 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
469 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
470 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
471 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
472 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
473 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
474 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
475 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
476 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
477 #define CHUNK_TYPE_MISMATCH     (1 << 8)
478
479 static void *print_status_check(void *p)
480 {
481         struct task_ctx *priv = p;
482         const char work_indicator[] = { '.', 'o', 'O', 'o' };
483         uint32_t count = 0;
484         static char *task_position_string[] = {
485                 "checking extents",
486                 "checking free space cache",
487                 "checking fs roots",
488         };
489
490         task_period_start(priv->info, 1000 /* 1s */);
491
492         if (priv->tp == TASK_NOTHING)
493                 return NULL;
494
495         while (1) {
496                 printf("%s [%c]\r", task_position_string[priv->tp],
497                                 work_indicator[count % 4]);
498                 count++;
499                 fflush(stdout);
500                 task_period_wait(priv->info);
501         }
502         return NULL;
503 }
504
505 static int print_status_return(void *p)
506 {
507         printf("\n");
508         fflush(stdout);
509
510         return 0;
511 }
512
513 static enum btrfs_check_mode parse_check_mode(const char *str)
514 {
515         if (strcmp(str, "lowmem") == 0)
516                 return CHECK_MODE_LOWMEM;
517         if (strcmp(str, "orig") == 0)
518                 return CHECK_MODE_ORIGINAL;
519         if (strcmp(str, "original") == 0)
520                 return CHECK_MODE_ORIGINAL;
521
522         return CHECK_MODE_UNKNOWN;
523 }
524
525 /* Compatible function to allow reuse of old codes */
526 static u64 first_extent_gap(struct rb_root *holes)
527 {
528         struct file_extent_hole *hole;
529
530         if (RB_EMPTY_ROOT(holes))
531                 return (u64)-1;
532
533         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
534         return hole->start;
535 }
536
537 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
538 {
539         struct file_extent_hole *hole1;
540         struct file_extent_hole *hole2;
541
542         hole1 = rb_entry(node1, struct file_extent_hole, node);
543         hole2 = rb_entry(node2, struct file_extent_hole, node);
544
545         if (hole1->start > hole2->start)
546                 return -1;
547         if (hole1->start < hole2->start)
548                 return 1;
549         /* Now hole1->start == hole2->start */
550         if (hole1->len >= hole2->len)
551                 /*
552                  * Hole 1 will be merge center
553                  * Same hole will be merged later
554                  */
555                 return -1;
556         /* Hole 2 will be merge center */
557         return 1;
558 }
559
560 /*
561  * Add a hole to the record
562  *
563  * This will do hole merge for copy_file_extent_holes(),
564  * which will ensure there won't be continuous holes.
565  */
566 static int add_file_extent_hole(struct rb_root *holes,
567                                 u64 start, u64 len)
568 {
569         struct file_extent_hole *hole;
570         struct file_extent_hole *prev = NULL;
571         struct file_extent_hole *next = NULL;
572
573         hole = malloc(sizeof(*hole));
574         if (!hole)
575                 return -ENOMEM;
576         hole->start = start;
577         hole->len = len;
578         /* Since compare will not return 0, no -EEXIST will happen */
579         rb_insert(holes, &hole->node, compare_hole);
580
581         /* simple merge with previous hole */
582         if (rb_prev(&hole->node))
583                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
584                                 node);
585         if (prev && prev->start + prev->len >= hole->start) {
586                 hole->len = hole->start + hole->len - prev->start;
587                 hole->start = prev->start;
588                 rb_erase(&prev->node, holes);
589                 free(prev);
590                 prev = NULL;
591         }
592
593         /* iterate merge with next holes */
594         while (1) {
595                 if (!rb_next(&hole->node))
596                         break;
597                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
598                                         node);
599                 if (hole->start + hole->len >= next->start) {
600                         if (hole->start + hole->len <= next->start + next->len)
601                                 hole->len = next->start + next->len -
602                                             hole->start;
603                         rb_erase(&next->node, holes);
604                         free(next);
605                         next = NULL;
606                 } else
607                         break;
608         }
609         return 0;
610 }
611
612 static int compare_hole_range(struct rb_node *node, void *data)
613 {
614         struct file_extent_hole *hole;
615         u64 start;
616
617         hole = (struct file_extent_hole *)data;
618         start = hole->start;
619
620         hole = rb_entry(node, struct file_extent_hole, node);
621         if (start < hole->start)
622                 return -1;
623         if (start >= hole->start && start < hole->start + hole->len)
624                 return 0;
625         return 1;
626 }
627
628 /*
629  * Delete a hole in the record
630  *
631  * This will do the hole split and is much restrict than add.
632  */
633 static int del_file_extent_hole(struct rb_root *holes,
634                                 u64 start, u64 len)
635 {
636         struct file_extent_hole *hole;
637         struct file_extent_hole tmp;
638         u64 prev_start = 0;
639         u64 prev_len = 0;
640         u64 next_start = 0;
641         u64 next_len = 0;
642         struct rb_node *node;
643         int have_prev = 0;
644         int have_next = 0;
645         int ret = 0;
646
647         tmp.start = start;
648         tmp.len = len;
649         node = rb_search(holes, &tmp, compare_hole_range, NULL);
650         if (!node)
651                 return -EEXIST;
652         hole = rb_entry(node, struct file_extent_hole, node);
653         if (start + len > hole->start + hole->len)
654                 return -EEXIST;
655
656         /*
657          * Now there will be no overlap, delete the hole and re-add the
658          * split(s) if they exists.
659          */
660         if (start > hole->start) {
661                 prev_start = hole->start;
662                 prev_len = start - hole->start;
663                 have_prev = 1;
664         }
665         if (hole->start + hole->len > start + len) {
666                 next_start = start + len;
667                 next_len = hole->start + hole->len - start - len;
668                 have_next = 1;
669         }
670         rb_erase(node, holes);
671         free(hole);
672         if (have_prev) {
673                 ret = add_file_extent_hole(holes, prev_start, prev_len);
674                 if (ret < 0)
675                         return ret;
676         }
677         if (have_next) {
678                 ret = add_file_extent_hole(holes, next_start, next_len);
679                 if (ret < 0)
680                         return ret;
681         }
682         return 0;
683 }
684
685 static int copy_file_extent_holes(struct rb_root *dst,
686                                   struct rb_root *src)
687 {
688         struct file_extent_hole *hole;
689         struct rb_node *node;
690         int ret = 0;
691
692         node = rb_first(src);
693         while (node) {
694                 hole = rb_entry(node, struct file_extent_hole, node);
695                 ret = add_file_extent_hole(dst, hole->start, hole->len);
696                 if (ret)
697                         break;
698                 node = rb_next(node);
699         }
700         return ret;
701 }
702
703 static void free_file_extent_holes(struct rb_root *holes)
704 {
705         struct rb_node *node;
706         struct file_extent_hole *hole;
707
708         node = rb_first(holes);
709         while (node) {
710                 hole = rb_entry(node, struct file_extent_hole, node);
711                 rb_erase(node, holes);
712                 free(hole);
713                 node = rb_first(holes);
714         }
715 }
716
717 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
718
719 static void record_root_in_trans(struct btrfs_trans_handle *trans,
720                                  struct btrfs_root *root)
721 {
722         if (root->last_trans != trans->transid) {
723                 root->track_dirty = 1;
724                 root->last_trans = trans->transid;
725                 root->commit_root = root->node;
726                 extent_buffer_get(root->node);
727         }
728 }
729
730 static u8 imode_to_type(u32 imode)
731 {
732 #define S_SHIFT 12
733         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
734                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
735                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
736                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
737                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
738                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
739                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
740                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
741         };
742
743         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
744 #undef S_SHIFT
745 }
746
747 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
748 {
749         struct device_record *rec1;
750         struct device_record *rec2;
751
752         rec1 = rb_entry(node1, struct device_record, node);
753         rec2 = rb_entry(node2, struct device_record, node);
754         if (rec1->devid > rec2->devid)
755                 return -1;
756         else if (rec1->devid < rec2->devid)
757                 return 1;
758         else
759                 return 0;
760 }
761
762 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
763 {
764         struct inode_record *rec;
765         struct inode_backref *backref;
766         struct inode_backref *orig;
767         struct inode_backref *tmp;
768         struct orphan_data_extent *src_orphan;
769         struct orphan_data_extent *dst_orphan;
770         struct rb_node *rb;
771         size_t size;
772         int ret;
773
774         rec = malloc(sizeof(*rec));
775         if (!rec)
776                 return ERR_PTR(-ENOMEM);
777         memcpy(rec, orig_rec, sizeof(*rec));
778         rec->refs = 1;
779         INIT_LIST_HEAD(&rec->backrefs);
780         INIT_LIST_HEAD(&rec->orphan_extents);
781         rec->holes = RB_ROOT;
782
783         list_for_each_entry(orig, &orig_rec->backrefs, list) {
784                 size = sizeof(*orig) + orig->namelen + 1;
785                 backref = malloc(size);
786                 if (!backref) {
787                         ret = -ENOMEM;
788                         goto cleanup;
789                 }
790                 memcpy(backref, orig, size);
791                 list_add_tail(&backref->list, &rec->backrefs);
792         }
793         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
794                 dst_orphan = malloc(sizeof(*dst_orphan));
795                 if (!dst_orphan) {
796                         ret = -ENOMEM;
797                         goto cleanup;
798                 }
799                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
800                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
801         }
802         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
803         if (ret < 0)
804                 goto cleanup_rb;
805
806         return rec;
807
808 cleanup_rb:
809         rb = rb_first(&rec->holes);
810         while (rb) {
811                 struct file_extent_hole *hole;
812
813                 hole = rb_entry(rb, struct file_extent_hole, node);
814                 rb = rb_next(rb);
815                 free(hole);
816         }
817
818 cleanup:
819         if (!list_empty(&rec->backrefs))
820                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
821                         list_del(&orig->list);
822                         free(orig);
823                 }
824
825         if (!list_empty(&rec->orphan_extents))
826                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
827                         list_del(&orig->list);
828                         free(orig);
829                 }
830
831         free(rec);
832
833         return ERR_PTR(ret);
834 }
835
836 static void print_orphan_data_extents(struct list_head *orphan_extents,
837                                       u64 objectid)
838 {
839         struct orphan_data_extent *orphan;
840
841         if (list_empty(orphan_extents))
842                 return;
843         printf("The following data extent is lost in tree %llu:\n",
844                objectid);
845         list_for_each_entry(orphan, orphan_extents, list) {
846                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
847                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
848                        orphan->disk_len);
849         }
850 }
851
852 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
853 {
854         u64 root_objectid = root->root_key.objectid;
855         int errors = rec->errors;
856
857         if (!errors)
858                 return;
859         /* reloc root errors, we print its corresponding fs root objectid*/
860         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
861                 root_objectid = root->root_key.offset;
862                 fprintf(stderr, "reloc");
863         }
864         fprintf(stderr, "root %llu inode %llu errors %x",
865                 (unsigned long long) root_objectid,
866                 (unsigned long long) rec->ino, rec->errors);
867
868         if (errors & I_ERR_NO_INODE_ITEM)
869                 fprintf(stderr, ", no inode item");
870         if (errors & I_ERR_NO_ORPHAN_ITEM)
871                 fprintf(stderr, ", no orphan item");
872         if (errors & I_ERR_DUP_INODE_ITEM)
873                 fprintf(stderr, ", dup inode item");
874         if (errors & I_ERR_DUP_DIR_INDEX)
875                 fprintf(stderr, ", dup dir index");
876         if (errors & I_ERR_ODD_DIR_ITEM)
877                 fprintf(stderr, ", odd dir item");
878         if (errors & I_ERR_ODD_FILE_EXTENT)
879                 fprintf(stderr, ", odd file extent");
880         if (errors & I_ERR_BAD_FILE_EXTENT)
881                 fprintf(stderr, ", bad file extent");
882         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
883                 fprintf(stderr, ", file extent overlap");
884         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
885                 fprintf(stderr, ", file extent discount");
886         if (errors & I_ERR_DIR_ISIZE_WRONG)
887                 fprintf(stderr, ", dir isize wrong");
888         if (errors & I_ERR_FILE_NBYTES_WRONG)
889                 fprintf(stderr, ", nbytes wrong");
890         if (errors & I_ERR_ODD_CSUM_ITEM)
891                 fprintf(stderr, ", odd csum item");
892         if (errors & I_ERR_SOME_CSUM_MISSING)
893                 fprintf(stderr, ", some csum missing");
894         if (errors & I_ERR_LINK_COUNT_WRONG)
895                 fprintf(stderr, ", link count wrong");
896         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
897                 fprintf(stderr, ", orphan file extent");
898         fprintf(stderr, "\n");
899         /* Print the orphan extents if needed */
900         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
901                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
902
903         /* Print the holes if needed */
904         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
905                 struct file_extent_hole *hole;
906                 struct rb_node *node;
907                 int found = 0;
908
909                 node = rb_first(&rec->holes);
910                 fprintf(stderr, "Found file extent holes:\n");
911                 while (node) {
912                         found = 1;
913                         hole = rb_entry(node, struct file_extent_hole, node);
914                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
915                                 hole->start, hole->len);
916                         node = rb_next(node);
917                 }
918                 if (!found)
919                         fprintf(stderr, "\tstart: 0, len: %llu\n",
920                                 round_up(rec->isize,
921                                          root->fs_info->sectorsize));
922         }
923 }
924
925 static void print_ref_error(int errors)
926 {
927         if (errors & REF_ERR_NO_DIR_ITEM)
928                 fprintf(stderr, ", no dir item");
929         if (errors & REF_ERR_NO_DIR_INDEX)
930                 fprintf(stderr, ", no dir index");
931         if (errors & REF_ERR_NO_INODE_REF)
932                 fprintf(stderr, ", no inode ref");
933         if (errors & REF_ERR_DUP_DIR_ITEM)
934                 fprintf(stderr, ", dup dir item");
935         if (errors & REF_ERR_DUP_DIR_INDEX)
936                 fprintf(stderr, ", dup dir index");
937         if (errors & REF_ERR_DUP_INODE_REF)
938                 fprintf(stderr, ", dup inode ref");
939         if (errors & REF_ERR_INDEX_UNMATCH)
940                 fprintf(stderr, ", index mismatch");
941         if (errors & REF_ERR_FILETYPE_UNMATCH)
942                 fprintf(stderr, ", filetype mismatch");
943         if (errors & REF_ERR_NAME_TOO_LONG)
944                 fprintf(stderr, ", name too long");
945         if (errors & REF_ERR_NO_ROOT_REF)
946                 fprintf(stderr, ", no root ref");
947         if (errors & REF_ERR_NO_ROOT_BACKREF)
948                 fprintf(stderr, ", no root backref");
949         if (errors & REF_ERR_DUP_ROOT_REF)
950                 fprintf(stderr, ", dup root ref");
951         if (errors & REF_ERR_DUP_ROOT_BACKREF)
952                 fprintf(stderr, ", dup root backref");
953         fprintf(stderr, "\n");
954 }
955
956 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
957                                           u64 ino, int mod)
958 {
959         struct ptr_node *node;
960         struct cache_extent *cache;
961         struct inode_record *rec = NULL;
962         int ret;
963
964         cache = lookup_cache_extent(inode_cache, ino, 1);
965         if (cache) {
966                 node = container_of(cache, struct ptr_node, cache);
967                 rec = node->data;
968                 if (mod && rec->refs > 1) {
969                         node->data = clone_inode_rec(rec);
970                         if (IS_ERR(node->data))
971                                 return node->data;
972                         rec->refs--;
973                         rec = node->data;
974                 }
975         } else if (mod) {
976                 rec = calloc(1, sizeof(*rec));
977                 if (!rec)
978                         return ERR_PTR(-ENOMEM);
979                 rec->ino = ino;
980                 rec->extent_start = (u64)-1;
981                 rec->refs = 1;
982                 INIT_LIST_HEAD(&rec->backrefs);
983                 INIT_LIST_HEAD(&rec->orphan_extents);
984                 rec->holes = RB_ROOT;
985
986                 node = malloc(sizeof(*node));
987                 if (!node) {
988                         free(rec);
989                         return ERR_PTR(-ENOMEM);
990                 }
991                 node->cache.start = ino;
992                 node->cache.size = 1;
993                 node->data = rec;
994
995                 if (ino == BTRFS_FREE_INO_OBJECTID)
996                         rec->found_link = 1;
997
998                 ret = insert_cache_extent(inode_cache, &node->cache);
999                 if (ret)
1000                         return ERR_PTR(-EEXIST);
1001         }
1002         return rec;
1003 }
1004
1005 static void free_orphan_data_extents(struct list_head *orphan_extents)
1006 {
1007         struct orphan_data_extent *orphan;
1008
1009         while (!list_empty(orphan_extents)) {
1010                 orphan = list_entry(orphan_extents->next,
1011                                     struct orphan_data_extent, list);
1012                 list_del(&orphan->list);
1013                 free(orphan);
1014         }
1015 }
1016
1017 static void free_inode_rec(struct inode_record *rec)
1018 {
1019         struct inode_backref *backref;
1020
1021         if (--rec->refs > 0)
1022                 return;
1023
1024         while (!list_empty(&rec->backrefs)) {
1025                 backref = to_inode_backref(rec->backrefs.next);
1026                 list_del(&backref->list);
1027                 free(backref);
1028         }
1029         free_orphan_data_extents(&rec->orphan_extents);
1030         free_file_extent_holes(&rec->holes);
1031         free(rec);
1032 }
1033
1034 static int can_free_inode_rec(struct inode_record *rec)
1035 {
1036         if (!rec->errors && rec->checked && rec->found_inode_item &&
1037             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1038                 return 1;
1039         return 0;
1040 }
1041
1042 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1043                                  struct inode_record *rec)
1044 {
1045         struct cache_extent *cache;
1046         struct inode_backref *tmp, *backref;
1047         struct ptr_node *node;
1048         u8 filetype;
1049
1050         if (!rec->found_inode_item)
1051                 return;
1052
1053         filetype = imode_to_type(rec->imode);
1054         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1055                 if (backref->found_dir_item && backref->found_dir_index) {
1056                         if (backref->filetype != filetype)
1057                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1058                         if (!backref->errors && backref->found_inode_ref &&
1059                             rec->nlink == rec->found_link) {
1060                                 list_del(&backref->list);
1061                                 free(backref);
1062                         }
1063                 }
1064         }
1065
1066         if (!rec->checked || rec->merging)
1067                 return;
1068
1069         if (S_ISDIR(rec->imode)) {
1070                 if (rec->found_size != rec->isize)
1071                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1072                 if (rec->found_file_extent)
1073                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1074         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1075                 if (rec->found_dir_item)
1076                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1077                 if (rec->found_size != rec->nbytes)
1078                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1079                 if (rec->nlink > 0 && !no_holes &&
1080                     (rec->extent_end < rec->isize ||
1081                      first_extent_gap(&rec->holes) < rec->isize))
1082                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1083         }
1084
1085         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1086                 if (rec->found_csum_item && rec->nodatasum)
1087                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1088                 if (rec->some_csum_missing && !rec->nodatasum)
1089                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1090         }
1091
1092         BUG_ON(rec->refs != 1);
1093         if (can_free_inode_rec(rec)) {
1094                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1095                 node = container_of(cache, struct ptr_node, cache);
1096                 BUG_ON(node->data != rec);
1097                 remove_cache_extent(inode_cache, &node->cache);
1098                 free(node);
1099                 free_inode_rec(rec);
1100         }
1101 }
1102
1103 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1104 {
1105         struct btrfs_path path;
1106         struct btrfs_key key;
1107         int ret;
1108
1109         key.objectid = BTRFS_ORPHAN_OBJECTID;
1110         key.type = BTRFS_ORPHAN_ITEM_KEY;
1111         key.offset = ino;
1112
1113         btrfs_init_path(&path);
1114         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1115         btrfs_release_path(&path);
1116         if (ret > 0)
1117                 ret = -ENOENT;
1118         return ret;
1119 }
1120
1121 static int process_inode_item(struct extent_buffer *eb,
1122                               int slot, struct btrfs_key *key,
1123                               struct shared_node *active_node)
1124 {
1125         struct inode_record *rec;
1126         struct btrfs_inode_item *item;
1127
1128         rec = active_node->current;
1129         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1130         if (rec->found_inode_item) {
1131                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1132                 return 1;
1133         }
1134         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1135         rec->nlink = btrfs_inode_nlink(eb, item);
1136         rec->isize = btrfs_inode_size(eb, item);
1137         rec->nbytes = btrfs_inode_nbytes(eb, item);
1138         rec->imode = btrfs_inode_mode(eb, item);
1139         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1140                 rec->nodatasum = 1;
1141         rec->found_inode_item = 1;
1142         if (rec->nlink == 0)
1143                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1144         maybe_free_inode_rec(&active_node->inode_cache, rec);
1145         return 0;
1146 }
1147
1148 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1149                                                 const char *name,
1150                                                 int namelen, u64 dir)
1151 {
1152         struct inode_backref *backref;
1153
1154         list_for_each_entry(backref, &rec->backrefs, list) {
1155                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1156                         break;
1157                 if (backref->dir != dir || backref->namelen != namelen)
1158                         continue;
1159                 if (memcmp(name, backref->name, namelen))
1160                         continue;
1161                 return backref;
1162         }
1163
1164         backref = malloc(sizeof(*backref) + namelen + 1);
1165         if (!backref)
1166                 return NULL;
1167         memset(backref, 0, sizeof(*backref));
1168         backref->dir = dir;
1169         backref->namelen = namelen;
1170         memcpy(backref->name, name, namelen);
1171         backref->name[namelen] = '\0';
1172         list_add_tail(&backref->list, &rec->backrefs);
1173         return backref;
1174 }
1175
1176 static int add_inode_backref(struct cache_tree *inode_cache,
1177                              u64 ino, u64 dir, u64 index,
1178                              const char *name, int namelen,
1179                              u8 filetype, u8 itemtype, int errors)
1180 {
1181         struct inode_record *rec;
1182         struct inode_backref *backref;
1183
1184         rec = get_inode_rec(inode_cache, ino, 1);
1185         BUG_ON(IS_ERR(rec));
1186         backref = get_inode_backref(rec, name, namelen, dir);
1187         BUG_ON(!backref);
1188         if (errors)
1189                 backref->errors |= errors;
1190         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1191                 if (backref->found_dir_index)
1192                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1193                 if (backref->found_inode_ref && backref->index != index)
1194                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1195                 if (backref->found_dir_item && backref->filetype != filetype)
1196                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1197
1198                 backref->index = index;
1199                 backref->filetype = filetype;
1200                 backref->found_dir_index = 1;
1201         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1202                 rec->found_link++;
1203                 if (backref->found_dir_item)
1204                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1205                 if (backref->found_dir_index && backref->filetype != filetype)
1206                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1207
1208                 backref->filetype = filetype;
1209                 backref->found_dir_item = 1;
1210         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1211                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1212                 if (backref->found_inode_ref)
1213                         backref->errors |= REF_ERR_DUP_INODE_REF;
1214                 if (backref->found_dir_index && backref->index != index)
1215                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1216                 else
1217                         backref->index = index;
1218
1219                 backref->ref_type = itemtype;
1220                 backref->found_inode_ref = 1;
1221         } else {
1222                 BUG_ON(1);
1223         }
1224
1225         maybe_free_inode_rec(inode_cache, rec);
1226         return 0;
1227 }
1228
1229 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1230                             struct cache_tree *dst_cache)
1231 {
1232         struct inode_backref *backref;
1233         u32 dir_count = 0;
1234         int ret = 0;
1235
1236         dst->merging = 1;
1237         list_for_each_entry(backref, &src->backrefs, list) {
1238                 if (backref->found_dir_index) {
1239                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1240                                         backref->index, backref->name,
1241                                         backref->namelen, backref->filetype,
1242                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1243                 }
1244                 if (backref->found_dir_item) {
1245                         dir_count++;
1246                         add_inode_backref(dst_cache, dst->ino,
1247                                         backref->dir, 0, backref->name,
1248                                         backref->namelen, backref->filetype,
1249                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1250                 }
1251                 if (backref->found_inode_ref) {
1252                         add_inode_backref(dst_cache, dst->ino,
1253                                         backref->dir, backref->index,
1254                                         backref->name, backref->namelen, 0,
1255                                         backref->ref_type, backref->errors);
1256                 }
1257         }
1258
1259         if (src->found_dir_item)
1260                 dst->found_dir_item = 1;
1261         if (src->found_file_extent)
1262                 dst->found_file_extent = 1;
1263         if (src->found_csum_item)
1264                 dst->found_csum_item = 1;
1265         if (src->some_csum_missing)
1266                 dst->some_csum_missing = 1;
1267         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1268                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1269                 if (ret < 0)
1270                         return ret;
1271         }
1272
1273         BUG_ON(src->found_link < dir_count);
1274         dst->found_link += src->found_link - dir_count;
1275         dst->found_size += src->found_size;
1276         if (src->extent_start != (u64)-1) {
1277                 if (dst->extent_start == (u64)-1) {
1278                         dst->extent_start = src->extent_start;
1279                         dst->extent_end = src->extent_end;
1280                 } else {
1281                         if (dst->extent_end > src->extent_start)
1282                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1283                         else if (dst->extent_end < src->extent_start) {
1284                                 ret = add_file_extent_hole(&dst->holes,
1285                                         dst->extent_end,
1286                                         src->extent_start - dst->extent_end);
1287                         }
1288                         if (dst->extent_end < src->extent_end)
1289                                 dst->extent_end = src->extent_end;
1290                 }
1291         }
1292
1293         dst->errors |= src->errors;
1294         if (src->found_inode_item) {
1295                 if (!dst->found_inode_item) {
1296                         dst->nlink = src->nlink;
1297                         dst->isize = src->isize;
1298                         dst->nbytes = src->nbytes;
1299                         dst->imode = src->imode;
1300                         dst->nodatasum = src->nodatasum;
1301                         dst->found_inode_item = 1;
1302                 } else {
1303                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1304                 }
1305         }
1306         dst->merging = 0;
1307
1308         return 0;
1309 }
1310
1311 static int splice_shared_node(struct shared_node *src_node,
1312                               struct shared_node *dst_node)
1313 {
1314         struct cache_extent *cache;
1315         struct ptr_node *node, *ins;
1316         struct cache_tree *src, *dst;
1317         struct inode_record *rec, *conflict;
1318         u64 current_ino = 0;
1319         int splice = 0;
1320         int ret;
1321
1322         if (--src_node->refs == 0)
1323                 splice = 1;
1324         if (src_node->current)
1325                 current_ino = src_node->current->ino;
1326
1327         src = &src_node->root_cache;
1328         dst = &dst_node->root_cache;
1329 again:
1330         cache = search_cache_extent(src, 0);
1331         while (cache) {
1332                 node = container_of(cache, struct ptr_node, cache);
1333                 rec = node->data;
1334                 cache = next_cache_extent(cache);
1335
1336                 if (splice) {
1337                         remove_cache_extent(src, &node->cache);
1338                         ins = node;
1339                 } else {
1340                         ins = malloc(sizeof(*ins));
1341                         BUG_ON(!ins);
1342                         ins->cache.start = node->cache.start;
1343                         ins->cache.size = node->cache.size;
1344                         ins->data = rec;
1345                         rec->refs++;
1346                 }
1347                 ret = insert_cache_extent(dst, &ins->cache);
1348                 if (ret == -EEXIST) {
1349                         conflict = get_inode_rec(dst, rec->ino, 1);
1350                         BUG_ON(IS_ERR(conflict));
1351                         merge_inode_recs(rec, conflict, dst);
1352                         if (rec->checked) {
1353                                 conflict->checked = 1;
1354                                 if (dst_node->current == conflict)
1355                                         dst_node->current = NULL;
1356                         }
1357                         maybe_free_inode_rec(dst, conflict);
1358                         free_inode_rec(rec);
1359                         free(ins);
1360                 } else {
1361                         BUG_ON(ret);
1362                 }
1363         }
1364
1365         if (src == &src_node->root_cache) {
1366                 src = &src_node->inode_cache;
1367                 dst = &dst_node->inode_cache;
1368                 goto again;
1369         }
1370
1371         if (current_ino > 0 && (!dst_node->current ||
1372             current_ino > dst_node->current->ino)) {
1373                 if (dst_node->current) {
1374                         dst_node->current->checked = 1;
1375                         maybe_free_inode_rec(dst, dst_node->current);
1376                 }
1377                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1378                 BUG_ON(IS_ERR(dst_node->current));
1379         }
1380         return 0;
1381 }
1382
1383 static void free_inode_ptr(struct cache_extent *cache)
1384 {
1385         struct ptr_node *node;
1386         struct inode_record *rec;
1387
1388         node = container_of(cache, struct ptr_node, cache);
1389         rec = node->data;
1390         free_inode_rec(rec);
1391         free(node);
1392 }
1393
1394 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1395
1396 static struct shared_node *find_shared_node(struct cache_tree *shared,
1397                                             u64 bytenr)
1398 {
1399         struct cache_extent *cache;
1400         struct shared_node *node;
1401
1402         cache = lookup_cache_extent(shared, bytenr, 1);
1403         if (cache) {
1404                 node = container_of(cache, struct shared_node, cache);
1405                 return node;
1406         }
1407         return NULL;
1408 }
1409
1410 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1411 {
1412         int ret;
1413         struct shared_node *node;
1414
1415         node = calloc(1, sizeof(*node));
1416         if (!node)
1417                 return -ENOMEM;
1418         node->cache.start = bytenr;
1419         node->cache.size = 1;
1420         cache_tree_init(&node->root_cache);
1421         cache_tree_init(&node->inode_cache);
1422         node->refs = refs;
1423
1424         ret = insert_cache_extent(shared, &node->cache);
1425
1426         return ret;
1427 }
1428
1429 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1430                              struct walk_control *wc, int level)
1431 {
1432         struct shared_node *node;
1433         struct shared_node *dest;
1434         int ret;
1435
1436         if (level == wc->active_node)
1437                 return 0;
1438
1439         BUG_ON(wc->active_node <= level);
1440         node = find_shared_node(&wc->shared, bytenr);
1441         if (!node) {
1442                 ret = add_shared_node(&wc->shared, bytenr, refs);
1443                 BUG_ON(ret);
1444                 node = find_shared_node(&wc->shared, bytenr);
1445                 wc->nodes[level] = node;
1446                 wc->active_node = level;
1447                 return 0;
1448         }
1449
1450         if (wc->root_level == wc->active_node &&
1451             btrfs_root_refs(&root->root_item) == 0) {
1452                 if (--node->refs == 0) {
1453                         free_inode_recs_tree(&node->root_cache);
1454                         free_inode_recs_tree(&node->inode_cache);
1455                         remove_cache_extent(&wc->shared, &node->cache);
1456                         free(node);
1457                 }
1458                 return 1;
1459         }
1460
1461         dest = wc->nodes[wc->active_node];
1462         splice_shared_node(node, dest);
1463         if (node->refs == 0) {
1464                 remove_cache_extent(&wc->shared, &node->cache);
1465                 free(node);
1466         }
1467         return 1;
1468 }
1469
1470 static int leave_shared_node(struct btrfs_root *root,
1471                              struct walk_control *wc, int level)
1472 {
1473         struct shared_node *node;
1474         struct shared_node *dest;
1475         int i;
1476
1477         if (level == wc->root_level)
1478                 return 0;
1479
1480         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1481                 if (wc->nodes[i])
1482                         break;
1483         }
1484         BUG_ON(i >= BTRFS_MAX_LEVEL);
1485
1486         node = wc->nodes[wc->active_node];
1487         wc->nodes[wc->active_node] = NULL;
1488         wc->active_node = i;
1489
1490         dest = wc->nodes[wc->active_node];
1491         if (wc->active_node < wc->root_level ||
1492             btrfs_root_refs(&root->root_item) > 0) {
1493                 BUG_ON(node->refs <= 1);
1494                 splice_shared_node(node, dest);
1495         } else {
1496                 BUG_ON(node->refs < 2);
1497                 node->refs--;
1498         }
1499         return 0;
1500 }
1501
1502 /*
1503  * Returns:
1504  * < 0 - on error
1505  * 1   - if the root with id child_root_id is a child of root parent_root_id
1506  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1507  *       has other root(s) as parent(s)
1508  * 2   - if the root child_root_id doesn't have any parent roots
1509  */
1510 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1511                          u64 child_root_id)
1512 {
1513         struct btrfs_path path;
1514         struct btrfs_key key;
1515         struct extent_buffer *leaf;
1516         int has_parent = 0;
1517         int ret;
1518
1519         btrfs_init_path(&path);
1520
1521         key.objectid = parent_root_id;
1522         key.type = BTRFS_ROOT_REF_KEY;
1523         key.offset = child_root_id;
1524         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1525                                 0, 0);
1526         if (ret < 0)
1527                 return ret;
1528         btrfs_release_path(&path);
1529         if (!ret)
1530                 return 1;
1531
1532         key.objectid = child_root_id;
1533         key.type = BTRFS_ROOT_BACKREF_KEY;
1534         key.offset = 0;
1535         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1536                                 0, 0);
1537         if (ret < 0)
1538                 goto out;
1539
1540         while (1) {
1541                 leaf = path.nodes[0];
1542                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1543                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1544                         if (ret)
1545                                 break;
1546                         leaf = path.nodes[0];
1547                 }
1548
1549                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1550                 if (key.objectid != child_root_id ||
1551                     key.type != BTRFS_ROOT_BACKREF_KEY)
1552                         break;
1553
1554                 has_parent = 1;
1555
1556                 if (key.offset == parent_root_id) {
1557                         btrfs_release_path(&path);
1558                         return 1;
1559                 }
1560
1561                 path.slots[0]++;
1562         }
1563 out:
1564         btrfs_release_path(&path);
1565         if (ret < 0)
1566                 return ret;
1567         return has_parent ? 0 : 2;
1568 }
1569
1570 static int process_dir_item(struct extent_buffer *eb,
1571                             int slot, struct btrfs_key *key,
1572                             struct shared_node *active_node)
1573 {
1574         u32 total;
1575         u32 cur = 0;
1576         u32 len;
1577         u32 name_len;
1578         u32 data_len;
1579         int error;
1580         int nritems = 0;
1581         u8 filetype;
1582         struct btrfs_dir_item *di;
1583         struct inode_record *rec;
1584         struct cache_tree *root_cache;
1585         struct cache_tree *inode_cache;
1586         struct btrfs_key location;
1587         char namebuf[BTRFS_NAME_LEN];
1588
1589         root_cache = &active_node->root_cache;
1590         inode_cache = &active_node->inode_cache;
1591         rec = active_node->current;
1592         rec->found_dir_item = 1;
1593
1594         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1595         total = btrfs_item_size_nr(eb, slot);
1596         while (cur < total) {
1597                 nritems++;
1598                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1599                 name_len = btrfs_dir_name_len(eb, di);
1600                 data_len = btrfs_dir_data_len(eb, di);
1601                 filetype = btrfs_dir_type(eb, di);
1602
1603                 rec->found_size += name_len;
1604                 if (cur + sizeof(*di) + name_len > total ||
1605                     name_len > BTRFS_NAME_LEN) {
1606                         error = REF_ERR_NAME_TOO_LONG;
1607
1608                         if (cur + sizeof(*di) > total)
1609                                 break;
1610                         len = min_t(u32, total - cur - sizeof(*di),
1611                                     BTRFS_NAME_LEN);
1612                 } else {
1613                         len = name_len;
1614                         error = 0;
1615                 }
1616
1617                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1618
1619                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1620                     key->offset != btrfs_name_hash(namebuf, len)) {
1621                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1622                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1623                         key->objectid, key->offset, namebuf, len, filetype,
1624                         key->offset, btrfs_name_hash(namebuf, len));
1625                 }
1626
1627                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1628                         add_inode_backref(inode_cache, location.objectid,
1629                                           key->objectid, key->offset, namebuf,
1630                                           len, filetype, key->type, error);
1631                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1632                         add_inode_backref(root_cache, location.objectid,
1633                                           key->objectid, key->offset,
1634                                           namebuf, len, filetype,
1635                                           key->type, error);
1636                 } else {
1637                         fprintf(stderr, "invalid location in dir item %u\n",
1638                                 location.type);
1639                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1640                                           key->objectid, key->offset, namebuf,
1641                                           len, filetype, key->type, error);
1642                 }
1643
1644                 len = sizeof(*di) + name_len + data_len;
1645                 di = (struct btrfs_dir_item *)((char *)di + len);
1646                 cur += len;
1647         }
1648         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1649                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1650
1651         return 0;
1652 }
1653
1654 static int process_inode_ref(struct extent_buffer *eb,
1655                              int slot, struct btrfs_key *key,
1656                              struct shared_node *active_node)
1657 {
1658         u32 total;
1659         u32 cur = 0;
1660         u32 len;
1661         u32 name_len;
1662         u64 index;
1663         int error;
1664         struct cache_tree *inode_cache;
1665         struct btrfs_inode_ref *ref;
1666         char namebuf[BTRFS_NAME_LEN];
1667
1668         inode_cache = &active_node->inode_cache;
1669
1670         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1671         total = btrfs_item_size_nr(eb, slot);
1672         while (cur < total) {
1673                 name_len = btrfs_inode_ref_name_len(eb, ref);
1674                 index = btrfs_inode_ref_index(eb, ref);
1675
1676                 /* inode_ref + namelen should not cross item boundary */
1677                 if (cur + sizeof(*ref) + name_len > total ||
1678                     name_len > BTRFS_NAME_LEN) {
1679                         if (total < cur + sizeof(*ref))
1680                                 break;
1681
1682                         /* Still try to read out the remaining part */
1683                         len = min_t(u32, total - cur - sizeof(*ref),
1684                                     BTRFS_NAME_LEN);
1685                         error = REF_ERR_NAME_TOO_LONG;
1686                 } else {
1687                         len = name_len;
1688                         error = 0;
1689                 }
1690
1691                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1692                 add_inode_backref(inode_cache, key->objectid, key->offset,
1693                                   index, namebuf, len, 0, key->type, error);
1694
1695                 len = sizeof(*ref) + name_len;
1696                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1697                 cur += len;
1698         }
1699         return 0;
1700 }
1701
1702 static int process_inode_extref(struct extent_buffer *eb,
1703                                 int slot, struct btrfs_key *key,
1704                                 struct shared_node *active_node)
1705 {
1706         u32 total;
1707         u32 cur = 0;
1708         u32 len;
1709         u32 name_len;
1710         u64 index;
1711         u64 parent;
1712         int error;
1713         struct cache_tree *inode_cache;
1714         struct btrfs_inode_extref *extref;
1715         char namebuf[BTRFS_NAME_LEN];
1716
1717         inode_cache = &active_node->inode_cache;
1718
1719         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1720         total = btrfs_item_size_nr(eb, slot);
1721         while (cur < total) {
1722                 name_len = btrfs_inode_extref_name_len(eb, extref);
1723                 index = btrfs_inode_extref_index(eb, extref);
1724                 parent = btrfs_inode_extref_parent(eb, extref);
1725                 if (name_len <= BTRFS_NAME_LEN) {
1726                         len = name_len;
1727                         error = 0;
1728                 } else {
1729                         len = BTRFS_NAME_LEN;
1730                         error = REF_ERR_NAME_TOO_LONG;
1731                 }
1732                 read_extent_buffer(eb, namebuf,
1733                                    (unsigned long)(extref + 1), len);
1734                 add_inode_backref(inode_cache, key->objectid, parent,
1735                                   index, namebuf, len, 0, key->type, error);
1736
1737                 len = sizeof(*extref) + name_len;
1738                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1739                 cur += len;
1740         }
1741         return 0;
1742
1743 }
1744
1745 static int count_csum_range(struct btrfs_root *root, u64 start,
1746                             u64 len, u64 *found)
1747 {
1748         struct btrfs_key key;
1749         struct btrfs_path path;
1750         struct extent_buffer *leaf;
1751         int ret;
1752         size_t size;
1753         *found = 0;
1754         u64 csum_end;
1755         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1756
1757         btrfs_init_path(&path);
1758
1759         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1760         key.offset = start;
1761         key.type = BTRFS_EXTENT_CSUM_KEY;
1762
1763         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1764                                 &key, &path, 0, 0);
1765         if (ret < 0)
1766                 goto out;
1767         if (ret > 0 && path.slots[0] > 0) {
1768                 leaf = path.nodes[0];
1769                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1770                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1771                     key.type == BTRFS_EXTENT_CSUM_KEY)
1772                         path.slots[0]--;
1773         }
1774
1775         while (len > 0) {
1776                 leaf = path.nodes[0];
1777                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1778                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1779                         if (ret > 0)
1780                                 break;
1781                         else if (ret < 0)
1782                                 goto out;
1783                         leaf = path.nodes[0];
1784                 }
1785
1786                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1787                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1788                     key.type != BTRFS_EXTENT_CSUM_KEY)
1789                         break;
1790
1791                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1792                 if (key.offset >= start + len)
1793                         break;
1794
1795                 if (key.offset > start)
1796                         start = key.offset;
1797
1798                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1799                 csum_end = key.offset + (size / csum_size) *
1800                            root->fs_info->sectorsize;
1801                 if (csum_end > start) {
1802                         size = min(csum_end - start, len);
1803                         len -= size;
1804                         start += size;
1805                         *found += size;
1806                 }
1807
1808                 path.slots[0]++;
1809         }
1810 out:
1811         btrfs_release_path(&path);
1812         if (ret < 0)
1813                 return ret;
1814         return 0;
1815 }
1816
1817 static int process_file_extent(struct btrfs_root *root,
1818                                 struct extent_buffer *eb,
1819                                 int slot, struct btrfs_key *key,
1820                                 struct shared_node *active_node)
1821 {
1822         struct inode_record *rec;
1823         struct btrfs_file_extent_item *fi;
1824         u64 num_bytes = 0;
1825         u64 disk_bytenr = 0;
1826         u64 extent_offset = 0;
1827         u64 mask = root->fs_info->sectorsize - 1;
1828         int extent_type;
1829         int ret;
1830
1831         rec = active_node->current;
1832         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1833         rec->found_file_extent = 1;
1834
1835         if (rec->extent_start == (u64)-1) {
1836                 rec->extent_start = key->offset;
1837                 rec->extent_end = key->offset;
1838         }
1839
1840         if (rec->extent_end > key->offset)
1841                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1842         else if (rec->extent_end < key->offset) {
1843                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1844                                            key->offset - rec->extent_end);
1845                 if (ret < 0)
1846                         return ret;
1847         }
1848
1849         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1850         extent_type = btrfs_file_extent_type(eb, fi);
1851
1852         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1853                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1854                 if (num_bytes == 0)
1855                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1856                 rec->found_size += num_bytes;
1857                 num_bytes = (num_bytes + mask) & ~mask;
1858         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1859                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1860                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1861                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1862                 extent_offset = btrfs_file_extent_offset(eb, fi);
1863                 if (num_bytes == 0 || (num_bytes & mask))
1864                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1865                 if (num_bytes + extent_offset >
1866                     btrfs_file_extent_ram_bytes(eb, fi))
1867                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1868                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1869                     (btrfs_file_extent_compression(eb, fi) ||
1870                      btrfs_file_extent_encryption(eb, fi) ||
1871                      btrfs_file_extent_other_encoding(eb, fi)))
1872                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1873                 if (disk_bytenr > 0)
1874                         rec->found_size += num_bytes;
1875         } else {
1876                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1877         }
1878         rec->extent_end = key->offset + num_bytes;
1879
1880         /*
1881          * The data reloc tree will copy full extents into its inode and then
1882          * copy the corresponding csums.  Because the extent it copied could be
1883          * a preallocated extent that hasn't been written to yet there may be no
1884          * csums to copy, ergo we won't have csums for our file extent.  This is
1885          * ok so just don't bother checking csums if the inode belongs to the
1886          * data reloc tree.
1887          */
1888         if (disk_bytenr > 0 &&
1889             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1890                 u64 found;
1891                 if (btrfs_file_extent_compression(eb, fi))
1892                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1893                 else
1894                         disk_bytenr += extent_offset;
1895
1896                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1897                 if (ret < 0)
1898                         return ret;
1899                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1900                         if (found > 0)
1901                                 rec->found_csum_item = 1;
1902                         if (found < num_bytes)
1903                                 rec->some_csum_missing = 1;
1904                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1905                         if (found > 0)
1906                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1907                 }
1908         }
1909         return 0;
1910 }
1911
1912 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1913                             struct walk_control *wc)
1914 {
1915         struct btrfs_key key;
1916         u32 nritems;
1917         int i;
1918         int ret = 0;
1919         struct cache_tree *inode_cache;
1920         struct shared_node *active_node;
1921
1922         if (wc->root_level == wc->active_node &&
1923             btrfs_root_refs(&root->root_item) == 0)
1924                 return 0;
1925
1926         active_node = wc->nodes[wc->active_node];
1927         inode_cache = &active_node->inode_cache;
1928         nritems = btrfs_header_nritems(eb);
1929         for (i = 0; i < nritems; i++) {
1930                 btrfs_item_key_to_cpu(eb, &key, i);
1931
1932                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1933                         continue;
1934                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1935                         continue;
1936
1937                 if (active_node->current == NULL ||
1938                     active_node->current->ino < key.objectid) {
1939                         if (active_node->current) {
1940                                 active_node->current->checked = 1;
1941                                 maybe_free_inode_rec(inode_cache,
1942                                                      active_node->current);
1943                         }
1944                         active_node->current = get_inode_rec(inode_cache,
1945                                                              key.objectid, 1);
1946                         BUG_ON(IS_ERR(active_node->current));
1947                 }
1948                 switch (key.type) {
1949                 case BTRFS_DIR_ITEM_KEY:
1950                 case BTRFS_DIR_INDEX_KEY:
1951                         ret = process_dir_item(eb, i, &key, active_node);
1952                         break;
1953                 case BTRFS_INODE_REF_KEY:
1954                         ret = process_inode_ref(eb, i, &key, active_node);
1955                         break;
1956                 case BTRFS_INODE_EXTREF_KEY:
1957                         ret = process_inode_extref(eb, i, &key, active_node);
1958                         break;
1959                 case BTRFS_INODE_ITEM_KEY:
1960                         ret = process_inode_item(eb, i, &key, active_node);
1961                         break;
1962                 case BTRFS_EXTENT_DATA_KEY:
1963                         ret = process_file_extent(root, eb, i, &key,
1964                                                   active_node);
1965                         break;
1966                 default:
1967                         break;
1968                 };
1969         }
1970         return ret;
1971 }
1972
1973 struct node_refs {
1974         u64 bytenr[BTRFS_MAX_LEVEL];
1975         u64 refs[BTRFS_MAX_LEVEL];
1976         int need_check[BTRFS_MAX_LEVEL];
1977 };
1978
1979 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1980                              struct node_refs *nrefs, u64 level);
1981 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1982                             unsigned int ext_ref);
1983
1984 /*
1985  * Returns >0  Found error, not fatal, should continue
1986  * Returns <0  Fatal error, must exit the whole check
1987  * Returns 0   No errors found
1988  */
1989 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1990                                struct node_refs *nrefs, int *level, int ext_ref)
1991 {
1992         struct extent_buffer *cur = path->nodes[0];
1993         struct btrfs_key key;
1994         u64 cur_bytenr;
1995         u32 nritems;
1996         u64 first_ino = 0;
1997         int root_level = btrfs_header_level(root->node);
1998         int i;
1999         int ret = 0; /* Final return value */
2000         int err = 0; /* Positive error bitmap */
2001
2002         cur_bytenr = cur->start;
2003
2004         /* skip to first inode item or the first inode number change */
2005         nritems = btrfs_header_nritems(cur);
2006         for (i = 0; i < nritems; i++) {
2007                 btrfs_item_key_to_cpu(cur, &key, i);
2008                 if (i == 0)
2009                         first_ino = key.objectid;
2010                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2011                     (first_ino && first_ino != key.objectid))
2012                         break;
2013         }
2014         if (i == nritems) {
2015                 path->slots[0] = nritems;
2016                 return 0;
2017         }
2018         path->slots[0] = i;
2019
2020 again:
2021         err |= check_inode_item(root, path, ext_ref);
2022
2023         /* modify cur since check_inode_item may change path */
2024         cur = path->nodes[0];
2025
2026         if (err & LAST_ITEM)
2027                 goto out;
2028
2029         /* still have inode items in thie leaf */
2030         if (cur->start == cur_bytenr)
2031                 goto again;
2032
2033         /*
2034          * we have switched to another leaf, above nodes may
2035          * have changed, here walk down the path, if a node
2036          * or leaf is shared, check whether we can skip this
2037          * node or leaf.
2038          */
2039         for (i = root_level; i >= 0; i--) {
2040                 if (path->nodes[i]->start == nrefs->bytenr[i])
2041                         continue;
2042
2043                 ret = update_nodes_refs(root,
2044                                 path->nodes[i]->start,
2045                                 nrefs, i);
2046                 if (ret)
2047                         goto out;
2048
2049                 if (!nrefs->need_check[i]) {
2050                         *level += 1;
2051                         break;
2052                 }
2053         }
2054
2055         for (i = 0; i < *level; i++) {
2056                 free_extent_buffer(path->nodes[i]);
2057                 path->nodes[i] = NULL;
2058         }
2059 out:
2060         err &= ~LAST_ITEM;
2061         if (err && !ret)
2062                 ret = err;
2063         return ret;
2064 }
2065
2066 static void reada_walk_down(struct btrfs_root *root,
2067                             struct extent_buffer *node, int slot)
2068 {
2069         struct btrfs_fs_info *fs_info = root->fs_info;
2070         u64 bytenr;
2071         u64 ptr_gen;
2072         u32 nritems;
2073         int i;
2074         int level;
2075
2076         level = btrfs_header_level(node);
2077         if (level != 1)
2078                 return;
2079
2080         nritems = btrfs_header_nritems(node);
2081         for (i = slot; i < nritems; i++) {
2082                 bytenr = btrfs_node_blockptr(node, i);
2083                 ptr_gen = btrfs_node_ptr_generation(node, i);
2084                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2085         }
2086 }
2087
2088 /*
2089  * Check the child node/leaf by the following condition:
2090  * 1. the first item key of the node/leaf should be the same with the one
2091  *    in parent.
2092  * 2. block in parent node should match the child node/leaf.
2093  * 3. generation of parent node and child's header should be consistent.
2094  *
2095  * Or the child node/leaf pointed by the key in parent is not valid.
2096  *
2097  * We hope to check leaf owner too, but since subvol may share leaves,
2098  * which makes leaf owner check not so strong, key check should be
2099  * sufficient enough for that case.
2100  */
2101 static int check_child_node(struct extent_buffer *parent, int slot,
2102                             struct extent_buffer *child)
2103 {
2104         struct btrfs_key parent_key;
2105         struct btrfs_key child_key;
2106         int ret = 0;
2107
2108         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2109         if (btrfs_header_level(child) == 0)
2110                 btrfs_item_key_to_cpu(child, &child_key, 0);
2111         else
2112                 btrfs_node_key_to_cpu(child, &child_key, 0);
2113
2114         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2115                 ret = -EINVAL;
2116                 fprintf(stderr,
2117                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2118                         parent_key.objectid, parent_key.type, parent_key.offset,
2119                         child_key.objectid, child_key.type, child_key.offset);
2120         }
2121         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2122                 ret = -EINVAL;
2123                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2124                         btrfs_node_blockptr(parent, slot),
2125                         btrfs_header_bytenr(child));
2126         }
2127         if (btrfs_node_ptr_generation(parent, slot) !=
2128             btrfs_header_generation(child)) {
2129                 ret = -EINVAL;
2130                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2131                         btrfs_header_generation(child),
2132                         btrfs_node_ptr_generation(parent, slot));
2133         }
2134         return ret;
2135 }
2136
2137 /*
2138  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2139  * in every fs or file tree check. Here we find its all root ids, and only check
2140  * it in the fs or file tree which has the smallest root id.
2141  */
2142 static int need_check(struct btrfs_root *root, struct ulist *roots)
2143 {
2144         struct rb_node *node;
2145         struct ulist_node *u;
2146
2147         if (roots->nnodes == 1)
2148                 return 1;
2149
2150         node = rb_first(&roots->root);
2151         u = rb_entry(node, struct ulist_node, rb_node);
2152         /*
2153          * current root id is not smallest, we skip it and let it be checked
2154          * in the fs or file tree who hash the smallest root id.
2155          */
2156         if (root->objectid != u->val)
2157                 return 0;
2158
2159         return 1;
2160 }
2161
2162 /*
2163  * for a tree node or leaf, we record its reference count, so later if we still
2164  * process this node or leaf, don't need to compute its reference count again.
2165  */
2166 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2167                              struct node_refs *nrefs, u64 level)
2168 {
2169         int check, ret;
2170         u64 refs;
2171         struct ulist *roots;
2172
2173         if (nrefs->bytenr[level] != bytenr) {
2174                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2175                                        level, 1, &refs, NULL);
2176                 if (ret < 0)
2177                         return ret;
2178
2179                 nrefs->bytenr[level] = bytenr;
2180                 nrefs->refs[level] = refs;
2181                 if (refs > 1) {
2182                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2183                                                    0, &roots);
2184                         if (ret)
2185                                 return -EIO;
2186
2187                         check = need_check(root, roots);
2188                         ulist_free(roots);
2189                         nrefs->need_check[level] = check;
2190                 } else {
2191                         nrefs->need_check[level] = 1;
2192                 }
2193         }
2194
2195         return 0;
2196 }
2197
2198 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2199                           struct walk_control *wc, int *level,
2200                           struct node_refs *nrefs)
2201 {
2202         enum btrfs_tree_block_status status;
2203         u64 bytenr;
2204         u64 ptr_gen;
2205         struct btrfs_fs_info *fs_info = root->fs_info;
2206         struct extent_buffer *next;
2207         struct extent_buffer *cur;
2208         int ret, err = 0;
2209         u64 refs;
2210
2211         WARN_ON(*level < 0);
2212         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2213
2214         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2215                 refs = nrefs->refs[*level];
2216                 ret = 0;
2217         } else {
2218                 ret = btrfs_lookup_extent_info(NULL, root,
2219                                        path->nodes[*level]->start,
2220                                        *level, 1, &refs, NULL);
2221                 if (ret < 0) {
2222                         err = ret;
2223                         goto out;
2224                 }
2225                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2226                 nrefs->refs[*level] = refs;
2227         }
2228
2229         if (refs > 1) {
2230                 ret = enter_shared_node(root, path->nodes[*level]->start,
2231                                         refs, wc, *level);
2232                 if (ret > 0) {
2233                         err = ret;
2234                         goto out;
2235                 }
2236         }
2237
2238         while (*level >= 0) {
2239                 WARN_ON(*level < 0);
2240                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2241                 cur = path->nodes[*level];
2242
2243                 if (btrfs_header_level(cur) != *level)
2244                         WARN_ON(1);
2245
2246                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2247                         break;
2248                 if (*level == 0) {
2249                         ret = process_one_leaf(root, cur, wc);
2250                         if (ret < 0)
2251                                 err = ret;
2252                         break;
2253                 }
2254                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2255                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2256
2257                 if (bytenr == nrefs->bytenr[*level - 1]) {
2258                         refs = nrefs->refs[*level - 1];
2259                 } else {
2260                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2261                                         *level - 1, 1, &refs, NULL);
2262                         if (ret < 0) {
2263                                 refs = 0;
2264                         } else {
2265                                 nrefs->bytenr[*level - 1] = bytenr;
2266                                 nrefs->refs[*level - 1] = refs;
2267                         }
2268                 }
2269
2270                 if (refs > 1) {
2271                         ret = enter_shared_node(root, bytenr, refs,
2272                                                 wc, *level - 1);
2273                         if (ret > 0) {
2274                                 path->slots[*level]++;
2275                                 continue;
2276                         }
2277                 }
2278
2279                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2280                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2281                         free_extent_buffer(next);
2282                         reada_walk_down(root, cur, path->slots[*level]);
2283                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2284                         if (!extent_buffer_uptodate(next)) {
2285                                 struct btrfs_key node_key;
2286
2287                                 btrfs_node_key_to_cpu(path->nodes[*level],
2288                                                       &node_key,
2289                                                       path->slots[*level]);
2290                                 btrfs_add_corrupt_extent_record(root->fs_info,
2291                                                 &node_key,
2292                                                 path->nodes[*level]->start,
2293                                                 root->fs_info->nodesize,
2294                                                 *level);
2295                                 err = -EIO;
2296                                 goto out;
2297                         }
2298                 }
2299
2300                 ret = check_child_node(cur, path->slots[*level], next);
2301                 if (ret) {
2302                         free_extent_buffer(next);
2303                         err = ret;
2304                         goto out;
2305                 }
2306
2307                 if (btrfs_is_leaf(next))
2308                         status = btrfs_check_leaf(root, NULL, next);
2309                 else
2310                         status = btrfs_check_node(root, NULL, next);
2311                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2312                         free_extent_buffer(next);
2313                         err = -EIO;
2314                         goto out;
2315                 }
2316
2317                 *level = *level - 1;
2318                 free_extent_buffer(path->nodes[*level]);
2319                 path->nodes[*level] = next;
2320                 path->slots[*level] = 0;
2321         }
2322 out:
2323         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2324         return err;
2325 }
2326
2327 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2328                             unsigned int ext_ref);
2329
2330 /*
2331  * Returns >0  Found error, should continue
2332  * Returns <0  Fatal error, must exit the whole check
2333  * Returns 0   No errors found
2334  */
2335 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2336                              int *level, struct node_refs *nrefs, int ext_ref)
2337 {
2338         enum btrfs_tree_block_status status;
2339         u64 bytenr;
2340         u64 ptr_gen;
2341         struct btrfs_fs_info *fs_info = root->fs_info;
2342         struct extent_buffer *next;
2343         struct extent_buffer *cur;
2344         int ret;
2345
2346         WARN_ON(*level < 0);
2347         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2348
2349         ret = update_nodes_refs(root, path->nodes[*level]->start,
2350                                 nrefs, *level);
2351         if (ret < 0)
2352                 return ret;
2353
2354         while (*level >= 0) {
2355                 WARN_ON(*level < 0);
2356                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2357                 cur = path->nodes[*level];
2358
2359                 if (btrfs_header_level(cur) != *level)
2360                         WARN_ON(1);
2361
2362                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2363                         break;
2364                 /* Don't forgot to check leaf/node validation */
2365                 if (*level == 0) {
2366                         ret = btrfs_check_leaf(root, NULL, cur);
2367                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2368                                 ret = -EIO;
2369                                 break;
2370                         }
2371                         ret = process_one_leaf_v2(root, path, nrefs,
2372                                                   level, ext_ref);
2373                         cur = path->nodes[*level];
2374                         break;
2375                 } else {
2376                         ret = btrfs_check_node(root, NULL, cur);
2377                         if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2378                                 ret = -EIO;
2379                                 break;
2380                         }
2381                 }
2382                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2383                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2384
2385                 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2386                 if (ret)
2387                         break;
2388                 if (!nrefs->need_check[*level - 1]) {
2389                         path->slots[*level]++;
2390                         continue;
2391                 }
2392
2393                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2394                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2395                         free_extent_buffer(next);
2396                         reada_walk_down(root, cur, path->slots[*level]);
2397                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2398                         if (!extent_buffer_uptodate(next)) {
2399                                 struct btrfs_key node_key;
2400
2401                                 btrfs_node_key_to_cpu(path->nodes[*level],
2402                                                       &node_key,
2403                                                       path->slots[*level]);
2404                                 btrfs_add_corrupt_extent_record(fs_info,
2405                                                 &node_key,
2406                                                 path->nodes[*level]->start,
2407                                                 fs_info->nodesize,
2408                                                 *level);
2409                                 ret = -EIO;
2410                                 break;
2411                         }
2412                 }
2413
2414                 ret = check_child_node(cur, path->slots[*level], next);
2415                 if (ret < 0) 
2416                         break;
2417
2418                 if (btrfs_is_leaf(next))
2419                         status = btrfs_check_leaf(root, NULL, next);
2420                 else
2421                         status = btrfs_check_node(root, NULL, next);
2422                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2423                         free_extent_buffer(next);
2424                         ret = -EIO;
2425                         break;
2426                 }
2427
2428                 *level = *level - 1;
2429                 free_extent_buffer(path->nodes[*level]);
2430                 path->nodes[*level] = next;
2431                 path->slots[*level] = 0;
2432         }
2433         return ret;
2434 }
2435
2436 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2437                         struct walk_control *wc, int *level)
2438 {
2439         int i;
2440         struct extent_buffer *leaf;
2441
2442         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2443                 leaf = path->nodes[i];
2444                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2445                         path->slots[i]++;
2446                         *level = i;
2447                         return 0;
2448                 } else {
2449                         free_extent_buffer(path->nodes[*level]);
2450                         path->nodes[*level] = NULL;
2451                         BUG_ON(*level > wc->active_node);
2452                         if (*level == wc->active_node)
2453                                 leave_shared_node(root, wc, *level);
2454                         *level = i + 1;
2455                 }
2456         }
2457         return 1;
2458 }
2459
2460 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2461                            int *level)
2462 {
2463         int i;
2464         struct extent_buffer *leaf;
2465
2466         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2467                 leaf = path->nodes[i];
2468                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2469                         path->slots[i]++;
2470                         *level = i;
2471                         return 0;
2472                 } else {
2473                         free_extent_buffer(path->nodes[*level]);
2474                         path->nodes[*level] = NULL;
2475                         *level = i + 1;
2476                 }
2477         }
2478         return 1;
2479 }
2480
2481 static int check_root_dir(struct inode_record *rec)
2482 {
2483         struct inode_backref *backref;
2484         int ret = -1;
2485
2486         if (!rec->found_inode_item || rec->errors)
2487                 goto out;
2488         if (rec->nlink != 1 || rec->found_link != 0)
2489                 goto out;
2490         if (list_empty(&rec->backrefs))
2491                 goto out;
2492         backref = to_inode_backref(rec->backrefs.next);
2493         if (!backref->found_inode_ref)
2494                 goto out;
2495         if (backref->index != 0 || backref->namelen != 2 ||
2496             memcmp(backref->name, "..", 2))
2497                 goto out;
2498         if (backref->found_dir_index || backref->found_dir_item)
2499                 goto out;
2500         ret = 0;
2501 out:
2502         return ret;
2503 }
2504
2505 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2506                               struct btrfs_root *root, struct btrfs_path *path,
2507                               struct inode_record *rec)
2508 {
2509         struct btrfs_inode_item *ei;
2510         struct btrfs_key key;
2511         int ret;
2512
2513         key.objectid = rec->ino;
2514         key.type = BTRFS_INODE_ITEM_KEY;
2515         key.offset = (u64)-1;
2516
2517         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2518         if (ret < 0)
2519                 goto out;
2520         if (ret) {
2521                 if (!path->slots[0]) {
2522                         ret = -ENOENT;
2523                         goto out;
2524                 }
2525                 path->slots[0]--;
2526                 ret = 0;
2527         }
2528         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2529         if (key.objectid != rec->ino) {
2530                 ret = -ENOENT;
2531                 goto out;
2532         }
2533
2534         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2535                             struct btrfs_inode_item);
2536         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2537         btrfs_mark_buffer_dirty(path->nodes[0]);
2538         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2539         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2540                root->root_key.objectid);
2541 out:
2542         btrfs_release_path(path);
2543         return ret;
2544 }
2545
2546 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2547                                     struct btrfs_root *root,
2548                                     struct btrfs_path *path,
2549                                     struct inode_record *rec)
2550 {
2551         int ret;
2552
2553         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2554         btrfs_release_path(path);
2555         if (!ret)
2556                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2557         return ret;
2558 }
2559
2560 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2561                                struct btrfs_root *root,
2562                                struct btrfs_path *path,
2563                                struct inode_record *rec)
2564 {
2565         struct btrfs_inode_item *ei;
2566         struct btrfs_key key;
2567         int ret = 0;
2568
2569         key.objectid = rec->ino;
2570         key.type = BTRFS_INODE_ITEM_KEY;
2571         key.offset = 0;
2572
2573         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2574         if (ret) {
2575                 if (ret > 0)
2576                         ret = -ENOENT;
2577                 goto out;
2578         }
2579
2580         /* Since ret == 0, no need to check anything */
2581         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2582                             struct btrfs_inode_item);
2583         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2584         btrfs_mark_buffer_dirty(path->nodes[0]);
2585         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2586         printf("reset nbytes for ino %llu root %llu\n",
2587                rec->ino, root->root_key.objectid);
2588 out:
2589         btrfs_release_path(path);
2590         return ret;
2591 }
2592
2593 static int add_missing_dir_index(struct btrfs_root *root,
2594                                  struct cache_tree *inode_cache,
2595                                  struct inode_record *rec,
2596                                  struct inode_backref *backref)
2597 {
2598         struct btrfs_path path;
2599         struct btrfs_trans_handle *trans;
2600         struct btrfs_dir_item *dir_item;
2601         struct extent_buffer *leaf;
2602         struct btrfs_key key;
2603         struct btrfs_disk_key disk_key;
2604         struct inode_record *dir_rec;
2605         unsigned long name_ptr;
2606         u32 data_size = sizeof(*dir_item) + backref->namelen;
2607         int ret;
2608
2609         trans = btrfs_start_transaction(root, 1);
2610         if (IS_ERR(trans))
2611                 return PTR_ERR(trans);
2612
2613         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2614                 (unsigned long long)rec->ino);
2615
2616         btrfs_init_path(&path);
2617         key.objectid = backref->dir;
2618         key.type = BTRFS_DIR_INDEX_KEY;
2619         key.offset = backref->index;
2620         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2621         BUG_ON(ret);
2622
2623         leaf = path.nodes[0];
2624         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2625
2626         disk_key.objectid = cpu_to_le64(rec->ino);
2627         disk_key.type = BTRFS_INODE_ITEM_KEY;
2628         disk_key.offset = 0;
2629
2630         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2631         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2632         btrfs_set_dir_data_len(leaf, dir_item, 0);
2633         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2634         name_ptr = (unsigned long)(dir_item + 1);
2635         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2636         btrfs_mark_buffer_dirty(leaf);
2637         btrfs_release_path(&path);
2638         btrfs_commit_transaction(trans, root);
2639
2640         backref->found_dir_index = 1;
2641         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2642         BUG_ON(IS_ERR(dir_rec));
2643         if (!dir_rec)
2644                 return 0;
2645         dir_rec->found_size += backref->namelen;
2646         if (dir_rec->found_size == dir_rec->isize &&
2647             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2648                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2649         if (dir_rec->found_size != dir_rec->isize)
2650                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2651
2652         return 0;
2653 }
2654
2655 static int delete_dir_index(struct btrfs_root *root,
2656                             struct inode_backref *backref)
2657 {
2658         struct btrfs_trans_handle *trans;
2659         struct btrfs_dir_item *di;
2660         struct btrfs_path path;
2661         int ret = 0;
2662
2663         trans = btrfs_start_transaction(root, 1);
2664         if (IS_ERR(trans))
2665                 return PTR_ERR(trans);
2666
2667         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2668                 (unsigned long long)backref->dir,
2669                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2670                 (unsigned long long)root->objectid);
2671
2672         btrfs_init_path(&path);
2673         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2674                                     backref->name, backref->namelen,
2675                                     backref->index, -1);
2676         if (IS_ERR(di)) {
2677                 ret = PTR_ERR(di);
2678                 btrfs_release_path(&path);
2679                 btrfs_commit_transaction(trans, root);
2680                 if (ret == -ENOENT)
2681                         return 0;
2682                 return ret;
2683         }
2684
2685         if (!di)
2686                 ret = btrfs_del_item(trans, root, &path);
2687         else
2688                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2689         BUG_ON(ret);
2690         btrfs_release_path(&path);
2691         btrfs_commit_transaction(trans, root);
2692         return ret;
2693 }
2694
2695 static int __create_inode_item(struct btrfs_trans_handle *trans,
2696                                struct btrfs_root *root, u64 ino, u64 size,
2697                                u64 nbytes, u64 nlink, u32 mode)
2698 {
2699         struct btrfs_inode_item ii;
2700         time_t now = time(NULL);
2701         int ret;
2702
2703         btrfs_set_stack_inode_size(&ii, size);
2704         btrfs_set_stack_inode_nbytes(&ii, nbytes);
2705         btrfs_set_stack_inode_nlink(&ii, nlink);
2706         btrfs_set_stack_inode_mode(&ii, mode);
2707         btrfs_set_stack_inode_generation(&ii, trans->transid);
2708         btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2709         btrfs_set_stack_timespec_sec(&ii.ctime, now);
2710         btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2711         btrfs_set_stack_timespec_sec(&ii.mtime, now);
2712         btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2713         btrfs_set_stack_timespec_sec(&ii.otime, 0);
2714         btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2715
2716         ret = btrfs_insert_inode(trans, root, ino, &ii);
2717         ASSERT(!ret);
2718
2719         warning("root %llu inode %llu recreating inode item, this may "
2720                 "be incomplete, please check permissions and content after "
2721                 "the fsck completes.\n", (unsigned long long)root->objectid,
2722                 (unsigned long long)ino);
2723
2724         return 0;
2725 }
2726
2727 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2728                                     struct btrfs_root *root, u64 ino,
2729                                     u8 filetype)
2730 {
2731         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2732
2733         return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2734 }
2735
2736 static int create_inode_item(struct btrfs_root *root,
2737                              struct inode_record *rec, int root_dir)
2738 {
2739         struct btrfs_trans_handle *trans;
2740         u64 nlink = 0;
2741         u32 mode = 0;
2742         u64 size = 0;
2743         int ret;
2744
2745         trans = btrfs_start_transaction(root, 1);
2746         if (IS_ERR(trans)) {
2747                 ret = PTR_ERR(trans);
2748                 return ret;
2749         }
2750
2751         nlink = root_dir ? 1 : rec->found_link;
2752         if (rec->found_dir_item) {
2753                 if (rec->found_file_extent)
2754                         fprintf(stderr, "root %llu inode %llu has both a dir "
2755                                 "item and extents, unsure if it is a dir or a "
2756                                 "regular file so setting it as a directory\n",
2757                                 (unsigned long long)root->objectid,
2758                                 (unsigned long long)rec->ino);
2759                 mode = S_IFDIR | 0755;
2760                 size = rec->found_size;
2761         } else if (!rec->found_dir_item) {
2762                 size = rec->extent_end;
2763                 mode =  S_IFREG | 0755;
2764         }
2765
2766         ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
2767                                   nlink, mode);
2768         btrfs_commit_transaction(trans, root);
2769         return 0;
2770 }
2771
2772 static int repair_inode_backrefs(struct btrfs_root *root,
2773                                  struct inode_record *rec,
2774                                  struct cache_tree *inode_cache,
2775                                  int delete)
2776 {
2777         struct inode_backref *tmp, *backref;
2778         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2779         int ret = 0;
2780         int repaired = 0;
2781
2782         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2783                 if (!delete && rec->ino == root_dirid) {
2784                         if (!rec->found_inode_item) {
2785                                 ret = create_inode_item(root, rec, 1);
2786                                 if (ret)
2787                                         break;
2788                                 repaired++;
2789                         }
2790                 }
2791
2792                 /* Index 0 for root dir's are special, don't mess with it */
2793                 if (rec->ino == root_dirid && backref->index == 0)
2794                         continue;
2795
2796                 if (delete &&
2797                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2798                      (backref->found_dir_index && backref->found_inode_ref &&
2799                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2800                         ret = delete_dir_index(root, backref);
2801                         if (ret)
2802                                 break;
2803                         repaired++;
2804                         list_del(&backref->list);
2805                         free(backref);
2806                         continue;
2807                 }
2808
2809                 if (!delete && !backref->found_dir_index &&
2810                     backref->found_dir_item && backref->found_inode_ref) {
2811                         ret = add_missing_dir_index(root, inode_cache, rec,
2812                                                     backref);
2813                         if (ret)
2814                                 break;
2815                         repaired++;
2816                         if (backref->found_dir_item &&
2817                             backref->found_dir_index) {
2818                                 if (!backref->errors &&
2819                                     backref->found_inode_ref) {
2820                                         list_del(&backref->list);
2821                                         free(backref);
2822                                         continue;
2823                                 }
2824                         }
2825                 }
2826
2827                 if (!delete && (!backref->found_dir_index &&
2828                                 !backref->found_dir_item &&
2829                                 backref->found_inode_ref)) {
2830                         struct btrfs_trans_handle *trans;
2831                         struct btrfs_key location;
2832
2833                         ret = check_dir_conflict(root, backref->name,
2834                                                  backref->namelen,
2835                                                  backref->dir,
2836                                                  backref->index);
2837                         if (ret) {
2838                                 /*
2839                                  * let nlink fixing routine to handle it,
2840                                  * which can do it better.
2841                                  */
2842                                 ret = 0;
2843                                 break;
2844                         }
2845                         location.objectid = rec->ino;
2846                         location.type = BTRFS_INODE_ITEM_KEY;
2847                         location.offset = 0;
2848
2849                         trans = btrfs_start_transaction(root, 1);
2850                         if (IS_ERR(trans)) {
2851                                 ret = PTR_ERR(trans);
2852                                 break;
2853                         }
2854                         fprintf(stderr, "adding missing dir index/item pair "
2855                                 "for inode %llu\n",
2856                                 (unsigned long long)rec->ino);
2857                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2858                                                     backref->namelen,
2859                                                     backref->dir, &location,
2860                                                     imode_to_type(rec->imode),
2861                                                     backref->index);
2862                         BUG_ON(ret);
2863                         btrfs_commit_transaction(trans, root);
2864                         repaired++;
2865                 }
2866
2867                 if (!delete && (backref->found_inode_ref &&
2868                                 backref->found_dir_index &&
2869                                 backref->found_dir_item &&
2870                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2871                                 !rec->found_inode_item)) {
2872                         ret = create_inode_item(root, rec, 0);
2873                         if (ret)
2874                                 break;
2875                         repaired++;
2876                 }
2877
2878         }
2879         return ret ? ret : repaired;
2880 }
2881
2882 /*
2883  * To determine the file type for nlink/inode_item repair
2884  *
2885  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2886  * Return -ENOENT if file type is not found.
2887  */
2888 static int find_file_type(struct inode_record *rec, u8 *type)
2889 {
2890         struct inode_backref *backref;
2891
2892         /* For inode item recovered case */
2893         if (rec->found_inode_item) {
2894                 *type = imode_to_type(rec->imode);
2895                 return 0;
2896         }
2897
2898         list_for_each_entry(backref, &rec->backrefs, list) {
2899                 if (backref->found_dir_index || backref->found_dir_item) {
2900                         *type = backref->filetype;
2901                         return 0;
2902                 }
2903         }
2904         return -ENOENT;
2905 }
2906
2907 /*
2908  * To determine the file name for nlink repair
2909  *
2910  * Return 0 if file name is found, set name and namelen.
2911  * Return -ENOENT if file name is not found.
2912  */
2913 static int find_file_name(struct inode_record *rec,
2914                           char *name, int *namelen)
2915 {
2916         struct inode_backref *backref;
2917
2918         list_for_each_entry(backref, &rec->backrefs, list) {
2919                 if (backref->found_dir_index || backref->found_dir_item ||
2920                     backref->found_inode_ref) {
2921                         memcpy(name, backref->name, backref->namelen);
2922                         *namelen = backref->namelen;
2923                         return 0;
2924                 }
2925         }
2926         return -ENOENT;
2927 }
2928
2929 /* Reset the nlink of the inode to the correct one */
2930 static int reset_nlink(struct btrfs_trans_handle *trans,
2931                        struct btrfs_root *root,
2932                        struct btrfs_path *path,
2933                        struct inode_record *rec)
2934 {
2935         struct inode_backref *backref;
2936         struct inode_backref *tmp;
2937         struct btrfs_key key;
2938         struct btrfs_inode_item *inode_item;
2939         int ret = 0;
2940
2941         /* We don't believe this either, reset it and iterate backref */
2942         rec->found_link = 0;
2943
2944         /* Remove all backref including the valid ones */
2945         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2946                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2947                                    backref->index, backref->name,
2948                                    backref->namelen, 0);
2949                 if (ret < 0)
2950                         goto out;
2951
2952                 /* remove invalid backref, so it won't be added back */
2953                 if (!(backref->found_dir_index &&
2954                       backref->found_dir_item &&
2955                       backref->found_inode_ref)) {
2956                         list_del(&backref->list);
2957                         free(backref);
2958                 } else {
2959                         rec->found_link++;
2960                 }
2961         }
2962
2963         /* Set nlink to 0 */
2964         key.objectid = rec->ino;
2965         key.type = BTRFS_INODE_ITEM_KEY;
2966         key.offset = 0;
2967         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2968         if (ret < 0)
2969                 goto out;
2970         if (ret > 0) {
2971                 ret = -ENOENT;
2972                 goto out;
2973         }
2974         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2975                                     struct btrfs_inode_item);
2976         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2977         btrfs_mark_buffer_dirty(path->nodes[0]);
2978         btrfs_release_path(path);
2979
2980         /*
2981          * Add back valid inode_ref/dir_item/dir_index,
2982          * add_link() will handle the nlink inc, so new nlink must be correct
2983          */
2984         list_for_each_entry(backref, &rec->backrefs, list) {
2985                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2986                                      backref->name, backref->namelen,
2987                                      backref->filetype, &backref->index, 1, 0);
2988                 if (ret < 0)
2989                         goto out;
2990         }
2991 out:
2992         btrfs_release_path(path);
2993         return ret;
2994 }
2995
2996 static int get_highest_inode(struct btrfs_trans_handle *trans,
2997                                 struct btrfs_root *root,
2998                                 struct btrfs_path *path,
2999                                 u64 *highest_ino)
3000 {
3001         struct btrfs_key key, found_key;
3002         int ret;
3003
3004         btrfs_init_path(path);
3005         key.objectid = BTRFS_LAST_FREE_OBJECTID;
3006         key.offset = -1;
3007         key.type = BTRFS_INODE_ITEM_KEY;
3008         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3009         if (ret == 1) {
3010                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3011                                 path->slots[0] - 1);
3012                 *highest_ino = found_key.objectid;
3013                 ret = 0;
3014         }
3015         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3016                 ret = -EOVERFLOW;
3017         btrfs_release_path(path);
3018         return ret;
3019 }
3020
3021 /*
3022  * Link inode to dir 'lost+found'. Increase @ref_count.
3023  *
3024  * Returns 0 means success.
3025  * Returns <0 means failure.
3026  */
3027 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3028                                    struct btrfs_root *root,
3029                                    struct btrfs_path *path,
3030                                    u64 ino, char *namebuf, u32 name_len,
3031                                    u8 filetype, u64 *ref_count)
3032 {
3033         char *dir_name = "lost+found";
3034         u64 lost_found_ino;
3035         int ret;
3036         u32 mode = 0700;
3037
3038         btrfs_release_path(path);
3039         ret = get_highest_inode(trans, root, path, &lost_found_ino);
3040         if (ret < 0)
3041                 goto out;
3042         lost_found_ino++;
3043
3044         ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3045                           BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3046                           mode);
3047         if (ret < 0) {
3048                 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3049                 goto out;
3050         }
3051         ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3052                              namebuf, name_len, filetype, NULL, 1, 0);
3053         /*
3054          * Add ".INO" suffix several times to handle case where
3055          * "FILENAME.INO" is already taken by another file.
3056          */
3057         while (ret == -EEXIST) {
3058                 /*
3059                  * Conflicting file name, add ".INO" as suffix * +1 for '.'
3060                  */
3061                 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3062                         ret = -EFBIG;
3063                         goto out;
3064                 }
3065                 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3066                          ".%llu", ino);
3067                 name_len += count_digits(ino) + 1;
3068                 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3069                                      name_len, filetype, NULL, 1, 0);
3070         }
3071         if (ret < 0) {
3072                 error("failed to link the inode %llu to %s dir: %s",
3073                       ino, dir_name, strerror(-ret));
3074                 goto out;
3075         }
3076
3077         ++*ref_count;
3078         printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3079                name_len, namebuf, dir_name);
3080 out:
3081         btrfs_release_path(path);
3082         if (ret)
3083                 error("failed to move file '%.*s' to '%s' dir", name_len,
3084                                 namebuf, dir_name);
3085         return ret;
3086 }
3087
3088 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3089                                struct btrfs_root *root,
3090                                struct btrfs_path *path,
3091                                struct inode_record *rec)
3092 {
3093         char namebuf[BTRFS_NAME_LEN] = {0};
3094         u8 type = 0;
3095         int namelen = 0;
3096         int name_recovered = 0;
3097         int type_recovered = 0;
3098         int ret = 0;
3099
3100         /*
3101          * Get file name and type first before these invalid inode ref
3102          * are deleted by remove_all_invalid_backref()
3103          */
3104         name_recovered = !find_file_name(rec, namebuf, &namelen);
3105         type_recovered = !find_file_type(rec, &type);
3106
3107         if (!name_recovered) {
3108                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3109                        rec->ino, rec->ino);
3110                 namelen = count_digits(rec->ino);
3111                 sprintf(namebuf, "%llu", rec->ino);
3112                 name_recovered = 1;
3113         }
3114         if (!type_recovered) {
3115                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3116                        rec->ino);
3117                 type = BTRFS_FT_REG_FILE;
3118                 type_recovered = 1;
3119         }
3120
3121         ret = reset_nlink(trans, root, path, rec);
3122         if (ret < 0) {
3123                 fprintf(stderr,
3124                         "Failed to reset nlink for inode %llu: %s\n",
3125                         rec->ino, strerror(-ret));
3126                 goto out;
3127         }
3128
3129         if (rec->found_link == 0) {
3130                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3131                                               namebuf, namelen, type,
3132                                               (u64 *)&rec->found_link);
3133                 if (ret)
3134                         goto out;
3135         }
3136         printf("Fixed the nlink of inode %llu\n", rec->ino);
3137 out:
3138         /*
3139          * Clear the flag anyway, or we will loop forever for the same inode
3140          * as it will not be removed from the bad inode list and the dead loop
3141          * happens.
3142          */
3143         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3144         btrfs_release_path(path);
3145         return ret;
3146 }
3147
3148 /*
3149  * Check if there is any normal(reg or prealloc) file extent for given
3150  * ino.
3151  * This is used to determine the file type when neither its dir_index/item or
3152  * inode_item exists.
3153  *
3154  * This will *NOT* report error, if any error happens, just consider it does
3155  * not have any normal file extent.
3156  */
3157 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3158 {
3159         struct btrfs_path path;
3160         struct btrfs_key key;
3161         struct btrfs_key found_key;
3162         struct btrfs_file_extent_item *fi;
3163         u8 type;
3164         int ret = 0;
3165
3166         btrfs_init_path(&path);
3167         key.objectid = ino;
3168         key.type = BTRFS_EXTENT_DATA_KEY;
3169         key.offset = 0;
3170
3171         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3172         if (ret < 0) {
3173                 ret = 0;
3174                 goto out;
3175         }
3176         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3177                 ret = btrfs_next_leaf(root, &path);
3178                 if (ret) {
3179                         ret = 0;
3180                         goto out;
3181                 }
3182         }
3183         while (1) {
3184                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3185                                       path.slots[0]);
3186                 if (found_key.objectid != ino ||
3187                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3188                         break;
3189                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3190                                     struct btrfs_file_extent_item);
3191                 type = btrfs_file_extent_type(path.nodes[0], fi);
3192                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3193                         ret = 1;
3194                         goto out;
3195                 }
3196         }
3197 out:
3198         btrfs_release_path(&path);
3199         return ret;
3200 }
3201
3202 static u32 btrfs_type_to_imode(u8 type)
3203 {
3204         static u32 imode_by_btrfs_type[] = {
3205                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3206                 [BTRFS_FT_DIR]          = S_IFDIR,
3207                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3208                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3209                 [BTRFS_FT_FIFO]         = S_IFIFO,
3210                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3211                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3212         };
3213
3214         return imode_by_btrfs_type[(type)];
3215 }
3216
3217 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3218                                 struct btrfs_root *root,
3219                                 struct btrfs_path *path,
3220                                 struct inode_record *rec)
3221 {
3222         u8 filetype;
3223         u32 mode = 0700;
3224         int type_recovered = 0;
3225         int ret = 0;
3226
3227         printf("Trying to rebuild inode:%llu\n", rec->ino);
3228
3229         type_recovered = !find_file_type(rec, &filetype);
3230
3231         /*
3232          * Try to determine inode type if type not found.
3233          *
3234          * For found regular file extent, it must be FILE.
3235          * For found dir_item/index, it must be DIR.
3236          *
3237          * For undetermined one, use FILE as fallback.
3238          *
3239          * TODO:
3240          * 1. If found backref(inode_index/item is already handled) to it,
3241          *    it must be DIR.
3242          *    Need new inode-inode ref structure to allow search for that.
3243          */
3244         if (!type_recovered) {
3245                 if (rec->found_file_extent &&
3246                     find_normal_file_extent(root, rec->ino)) {
3247                         type_recovered = 1;
3248                         filetype = BTRFS_FT_REG_FILE;
3249                 } else if (rec->found_dir_item) {
3250                         type_recovered = 1;
3251                         filetype = BTRFS_FT_DIR;
3252                 } else if (!list_empty(&rec->orphan_extents)) {
3253                         type_recovered = 1;
3254                         filetype = BTRFS_FT_REG_FILE;
3255                 } else{
3256                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3257                                rec->ino);
3258                         type_recovered = 1;
3259                         filetype = BTRFS_FT_REG_FILE;
3260                 }
3261         }
3262
3263         ret = btrfs_new_inode(trans, root, rec->ino,
3264                               mode | btrfs_type_to_imode(filetype));
3265         if (ret < 0)
3266                 goto out;
3267
3268         /*
3269          * Here inode rebuild is done, we only rebuild the inode item,
3270          * don't repair the nlink(like move to lost+found).
3271          * That is the job of nlink repair.
3272          *
3273          * We just fill the record and return
3274          */
3275         rec->found_dir_item = 1;
3276         rec->imode = mode | btrfs_type_to_imode(filetype);
3277         rec->nlink = 0;
3278         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3279         /* Ensure the inode_nlinks repair function will be called */
3280         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3281 out:
3282         return ret;
3283 }
3284
3285 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3286                                       struct btrfs_root *root,
3287                                       struct btrfs_path *path,
3288                                       struct inode_record *rec)
3289 {
3290         struct orphan_data_extent *orphan;
3291         struct orphan_data_extent *tmp;
3292         int ret = 0;
3293
3294         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3295                 /*
3296                  * Check for conflicting file extents
3297                  *
3298                  * Here we don't know whether the extents is compressed or not,
3299                  * so we can only assume it not compressed nor data offset,
3300                  * and use its disk_len as extent length.
3301                  */
3302                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3303                                        orphan->offset, orphan->disk_len, 0);
3304                 btrfs_release_path(path);
3305                 if (ret < 0)
3306                         goto out;
3307                 if (!ret) {
3308                         fprintf(stderr,
3309                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3310                                 orphan->disk_bytenr, orphan->disk_len);
3311                         ret = btrfs_free_extent(trans,
3312                                         root->fs_info->extent_root,
3313                                         orphan->disk_bytenr, orphan->disk_len,
3314                                         0, root->objectid, orphan->objectid,
3315                                         orphan->offset);
3316                         if (ret < 0)
3317                                 goto out;
3318                 }
3319                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3320                                 orphan->offset, orphan->disk_bytenr,
3321                                 orphan->disk_len, orphan->disk_len);
3322                 if (ret < 0)
3323                         goto out;
3324
3325                 /* Update file size info */
3326                 rec->found_size += orphan->disk_len;
3327                 if (rec->found_size == rec->nbytes)
3328                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3329
3330                 /* Update the file extent hole info too */
3331                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3332                                            orphan->disk_len);
3333                 if (ret < 0)
3334                         goto out;
3335                 if (RB_EMPTY_ROOT(&rec->holes))
3336                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3337
3338                 list_del(&orphan->list);
3339                 free(orphan);
3340         }
3341         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3342 out:
3343         return ret;
3344 }
3345
3346 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3347                                         struct btrfs_root *root,
3348                                         struct btrfs_path *path,
3349                                         struct inode_record *rec)
3350 {
3351         struct rb_node *node;
3352         struct file_extent_hole *hole;
3353         int found = 0;
3354         int ret = 0;
3355
3356         node = rb_first(&rec->holes);
3357
3358         while (node) {
3359                 found = 1;
3360                 hole = rb_entry(node, struct file_extent_hole, node);
3361                 ret = btrfs_punch_hole(trans, root, rec->ino,
3362                                        hole->start, hole->len);
3363                 if (ret < 0)
3364                         goto out;
3365                 ret = del_file_extent_hole(&rec->holes, hole->start,
3366                                            hole->len);
3367                 if (ret < 0)
3368                         goto out;
3369                 if (RB_EMPTY_ROOT(&rec->holes))
3370                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3371                 node = rb_first(&rec->holes);
3372         }
3373         /* special case for a file losing all its file extent */
3374         if (!found) {
3375                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3376                                        round_up(rec->isize,
3377                                                 root->fs_info->sectorsize));
3378                 if (ret < 0)
3379                         goto out;
3380         }
3381         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3382                rec->ino, root->objectid);
3383 out:
3384         return ret;
3385 }
3386
3387 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3388 {
3389         struct btrfs_trans_handle *trans;
3390         struct btrfs_path path;
3391         int ret = 0;
3392
3393         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3394                              I_ERR_NO_ORPHAN_ITEM |
3395                              I_ERR_LINK_COUNT_WRONG |
3396                              I_ERR_NO_INODE_ITEM |
3397                              I_ERR_FILE_EXTENT_ORPHAN |
3398                              I_ERR_FILE_EXTENT_DISCOUNT|
3399                              I_ERR_FILE_NBYTES_WRONG)))
3400                 return rec->errors;
3401
3402         /*
3403          * For nlink repair, it may create a dir and add link, so
3404          * 2 for parent(256)'s dir_index and dir_item
3405          * 2 for lost+found dir's inode_item and inode_ref
3406          * 1 for the new inode_ref of the file
3407          * 2 for lost+found dir's dir_index and dir_item for the file
3408          */
3409         trans = btrfs_start_transaction(root, 7);
3410         if (IS_ERR(trans))
3411                 return PTR_ERR(trans);
3412
3413         btrfs_init_path(&path);
3414         if (rec->errors & I_ERR_NO_INODE_ITEM)
3415                 ret = repair_inode_no_item(trans, root, &path, rec);
3416         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3417                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3418         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3419                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3420         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3421                 ret = repair_inode_isize(trans, root, &path, rec);
3422         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3423                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3424         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3425                 ret = repair_inode_nlinks(trans, root, &path, rec);
3426         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3427                 ret = repair_inode_nbytes(trans, root, &path, rec);
3428         btrfs_commit_transaction(trans, root);
3429         btrfs_release_path(&path);
3430         return ret;
3431 }
3432
3433 static int check_inode_recs(struct btrfs_root *root,
3434                             struct cache_tree *inode_cache)
3435 {
3436         struct cache_extent *cache;
3437         struct ptr_node *node;
3438         struct inode_record *rec;
3439         struct inode_backref *backref;
3440         int stage = 0;
3441         int ret = 0;
3442         int err = 0;
3443         u64 error = 0;
3444         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3445
3446         if (btrfs_root_refs(&root->root_item) == 0) {
3447                 if (!cache_tree_empty(inode_cache))
3448                         fprintf(stderr, "warning line %d\n", __LINE__);
3449                 return 0;
3450         }
3451
3452         /*
3453          * We need to repair backrefs first because we could change some of the
3454          * errors in the inode recs.
3455          *
3456          * We also need to go through and delete invalid backrefs first and then
3457          * add the correct ones second.  We do this because we may get EEXIST
3458          * when adding back the correct index because we hadn't yet deleted the
3459          * invalid index.
3460          *
3461          * For example, if we were missing a dir index then the directories
3462          * isize would be wrong, so if we fixed the isize to what we thought it
3463          * would be and then fixed the backref we'd still have a invalid fs, so
3464          * we need to add back the dir index and then check to see if the isize
3465          * is still wrong.
3466          */
3467         while (stage < 3) {
3468                 stage++;
3469                 if (stage == 3 && !err)
3470                         break;
3471
3472                 cache = search_cache_extent(inode_cache, 0);
3473                 while (repair && cache) {
3474                         node = container_of(cache, struct ptr_node, cache);
3475                         rec = node->data;
3476                         cache = next_cache_extent(cache);
3477
3478                         /* Need to free everything up and rescan */
3479                         if (stage == 3) {
3480                                 remove_cache_extent(inode_cache, &node->cache);
3481                                 free(node);
3482                                 free_inode_rec(rec);
3483                                 continue;
3484                         }
3485
3486                         if (list_empty(&rec->backrefs))
3487                                 continue;
3488
3489                         ret = repair_inode_backrefs(root, rec, inode_cache,
3490                                                     stage == 1);
3491                         if (ret < 0) {
3492                                 err = ret;
3493                                 stage = 2;
3494                                 break;
3495                         } if (ret > 0) {
3496                                 err = -EAGAIN;
3497                         }
3498                 }
3499         }
3500         if (err)
3501                 return err;
3502
3503         rec = get_inode_rec(inode_cache, root_dirid, 0);
3504         BUG_ON(IS_ERR(rec));
3505         if (rec) {
3506                 ret = check_root_dir(rec);
3507                 if (ret) {
3508                         fprintf(stderr, "root %llu root dir %llu error\n",
3509                                 (unsigned long long)root->root_key.objectid,
3510                                 (unsigned long long)root_dirid);
3511                         print_inode_error(root, rec);
3512                         error++;
3513                 }
3514         } else {
3515                 if (repair) {
3516                         struct btrfs_trans_handle *trans;
3517
3518                         trans = btrfs_start_transaction(root, 1);
3519                         if (IS_ERR(trans)) {
3520                                 err = PTR_ERR(trans);
3521                                 return err;
3522                         }
3523
3524                         fprintf(stderr,
3525                                 "root %llu missing its root dir, recreating\n",
3526                                 (unsigned long long)root->objectid);
3527
3528                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3529                         BUG_ON(ret);
3530
3531                         btrfs_commit_transaction(trans, root);
3532                         return -EAGAIN;
3533                 }
3534
3535                 fprintf(stderr, "root %llu root dir %llu not found\n",
3536                         (unsigned long long)root->root_key.objectid,
3537                         (unsigned long long)root_dirid);
3538         }
3539
3540         while (1) {
3541                 cache = search_cache_extent(inode_cache, 0);
3542                 if (!cache)
3543                         break;
3544                 node = container_of(cache, struct ptr_node, cache);
3545                 rec = node->data;
3546                 remove_cache_extent(inode_cache, &node->cache);
3547                 free(node);
3548                 if (rec->ino == root_dirid ||
3549                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3550                         free_inode_rec(rec);
3551                         continue;
3552                 }
3553
3554                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3555                         ret = check_orphan_item(root, rec->ino);
3556                         if (ret == 0)
3557                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3558                         if (can_free_inode_rec(rec)) {
3559                                 free_inode_rec(rec);
3560                                 continue;
3561                         }
3562                 }
3563
3564                 if (!rec->found_inode_item)
3565                         rec->errors |= I_ERR_NO_INODE_ITEM;
3566                 if (rec->found_link != rec->nlink)
3567                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3568                 if (repair) {
3569                         ret = try_repair_inode(root, rec);
3570                         if (ret == 0 && can_free_inode_rec(rec)) {
3571                                 free_inode_rec(rec);
3572                                 continue;
3573                         }
3574                         ret = 0;
3575                 }
3576
3577                 if (!(repair && ret == 0))
3578                         error++;
3579                 print_inode_error(root, rec);
3580                 list_for_each_entry(backref, &rec->backrefs, list) {
3581                         if (!backref->found_dir_item)
3582                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3583                         if (!backref->found_dir_index)
3584                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3585                         if (!backref->found_inode_ref)
3586                                 backref->errors |= REF_ERR_NO_INODE_REF;
3587                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3588                                 " namelen %u name %s filetype %d errors %x",
3589                                 (unsigned long long)backref->dir,
3590                                 (unsigned long long)backref->index,
3591                                 backref->namelen, backref->name,
3592                                 backref->filetype, backref->errors);
3593                         print_ref_error(backref->errors);
3594                 }
3595                 free_inode_rec(rec);
3596         }
3597         return (error > 0) ? -1 : 0;
3598 }
3599
3600 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3601                                         u64 objectid)
3602 {
3603         struct cache_extent *cache;
3604         struct root_record *rec = NULL;
3605         int ret;
3606
3607         cache = lookup_cache_extent(root_cache, objectid, 1);
3608         if (cache) {
3609                 rec = container_of(cache, struct root_record, cache);
3610         } else {
3611                 rec = calloc(1, sizeof(*rec));
3612                 if (!rec)
3613                         return ERR_PTR(-ENOMEM);
3614                 rec->objectid = objectid;
3615                 INIT_LIST_HEAD(&rec->backrefs);
3616                 rec->cache.start = objectid;
3617                 rec->cache.size = 1;
3618
3619                 ret = insert_cache_extent(root_cache, &rec->cache);
3620                 if (ret)
3621                         return ERR_PTR(-EEXIST);
3622         }
3623         return rec;
3624 }
3625
3626 static struct root_backref *get_root_backref(struct root_record *rec,
3627                                              u64 ref_root, u64 dir, u64 index,
3628                                              const char *name, int namelen)
3629 {
3630         struct root_backref *backref;
3631
3632         list_for_each_entry(backref, &rec->backrefs, list) {
3633                 if (backref->ref_root != ref_root || backref->dir != dir ||
3634                     backref->namelen != namelen)
3635                         continue;
3636                 if (memcmp(name, backref->name, namelen))
3637                         continue;
3638                 return backref;
3639         }
3640
3641         backref = calloc(1, sizeof(*backref) + namelen + 1);
3642         if (!backref)
3643                 return NULL;
3644         backref->ref_root = ref_root;
3645         backref->dir = dir;
3646         backref->index = index;
3647         backref->namelen = namelen;
3648         memcpy(backref->name, name, namelen);
3649         backref->name[namelen] = '\0';
3650         list_add_tail(&backref->list, &rec->backrefs);
3651         return backref;
3652 }
3653
3654 static void free_root_record(struct cache_extent *cache)
3655 {
3656         struct root_record *rec;
3657         struct root_backref *backref;
3658
3659         rec = container_of(cache, struct root_record, cache);
3660         while (!list_empty(&rec->backrefs)) {
3661                 backref = to_root_backref(rec->backrefs.next);
3662                 list_del(&backref->list);
3663                 free(backref);
3664         }
3665
3666         free(rec);
3667 }
3668
3669 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3670
3671 static int add_root_backref(struct cache_tree *root_cache,
3672                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3673                             const char *name, int namelen,
3674                             int item_type, int errors)
3675 {
3676         struct root_record *rec;
3677         struct root_backref *backref;
3678
3679         rec = get_root_rec(root_cache, root_id);
3680         BUG_ON(IS_ERR(rec));
3681         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3682         BUG_ON(!backref);
3683
3684         backref->errors |= errors;
3685
3686         if (item_type != BTRFS_DIR_ITEM_KEY) {
3687                 if (backref->found_dir_index || backref->found_back_ref ||
3688                     backref->found_forward_ref) {
3689                         if (backref->index != index)
3690                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3691                 } else {
3692                         backref->index = index;
3693                 }
3694         }
3695
3696         if (item_type == BTRFS_DIR_ITEM_KEY) {
3697                 if (backref->found_forward_ref)
3698                         rec->found_ref++;
3699                 backref->found_dir_item = 1;
3700         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3701                 backref->found_dir_index = 1;
3702         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3703                 if (backref->found_forward_ref)
3704                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3705                 else if (backref->found_dir_item)
3706                         rec->found_ref++;
3707                 backref->found_forward_ref = 1;
3708         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3709                 if (backref->found_back_ref)
3710                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3711                 backref->found_back_ref = 1;
3712         } else {
3713                 BUG_ON(1);
3714         }
3715
3716         if (backref->found_forward_ref && backref->found_dir_item)
3717                 backref->reachable = 1;
3718         return 0;
3719 }
3720
3721 static int merge_root_recs(struct btrfs_root *root,
3722                            struct cache_tree *src_cache,
3723                            struct cache_tree *dst_cache)
3724 {
3725         struct cache_extent *cache;
3726         struct ptr_node *node;
3727         struct inode_record *rec;
3728         struct inode_backref *backref;
3729         int ret = 0;
3730
3731         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3732                 free_inode_recs_tree(src_cache);
3733                 return 0;
3734         }
3735
3736         while (1) {
3737                 cache = search_cache_extent(src_cache, 0);
3738                 if (!cache)
3739                         break;
3740                 node = container_of(cache, struct ptr_node, cache);
3741                 rec = node->data;
3742                 remove_cache_extent(src_cache, &node->cache);
3743                 free(node);
3744
3745                 ret = is_child_root(root, root->objectid, rec->ino);
3746                 if (ret < 0)
3747                         break;
3748                 else if (ret == 0)
3749                         goto skip;
3750
3751                 list_for_each_entry(backref, &rec->backrefs, list) {
3752                         BUG_ON(backref->found_inode_ref);
3753                         if (backref->found_dir_item)
3754                                 add_root_backref(dst_cache, rec->ino,
3755                                         root->root_key.objectid, backref->dir,
3756                                         backref->index, backref->name,
3757                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3758                                         backref->errors);
3759                         if (backref->found_dir_index)
3760                                 add_root_backref(dst_cache, rec->ino,
3761                                         root->root_key.objectid, backref->dir,
3762                                         backref->index, backref->name,
3763                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3764                                         backref->errors);
3765                 }
3766 skip:
3767                 free_inode_rec(rec);
3768         }
3769         if (ret < 0)
3770                 return ret;
3771         return 0;
3772 }
3773
3774 static int check_root_refs(struct btrfs_root *root,
3775                            struct cache_tree *root_cache)
3776 {
3777         struct root_record *rec;
3778         struct root_record *ref_root;
3779         struct root_backref *backref;
3780         struct cache_extent *cache;
3781         int loop = 1;
3782         int ret;
3783         int error;
3784         int errors = 0;
3785
3786         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3787         BUG_ON(IS_ERR(rec));
3788         rec->found_ref = 1;
3789
3790         /* fixme: this can not detect circular references */
3791         while (loop) {
3792                 loop = 0;
3793                 cache = search_cache_extent(root_cache, 0);
3794                 while (1) {
3795                         if (!cache)
3796                                 break;
3797                         rec = container_of(cache, struct root_record, cache);
3798                         cache = next_cache_extent(cache);
3799
3800                         if (rec->found_ref == 0)
3801                                 continue;
3802
3803                         list_for_each_entry(backref, &rec->backrefs, list) {
3804                                 if (!backref->reachable)
3805                                         continue;
3806
3807                                 ref_root = get_root_rec(root_cache,
3808                                                         backref->ref_root);
3809                                 BUG_ON(IS_ERR(ref_root));
3810                                 if (ref_root->found_ref > 0)
3811                                         continue;
3812
3813                                 backref->reachable = 0;
3814                                 rec->found_ref--;
3815                                 if (rec->found_ref == 0)
3816                                         loop = 1;
3817                         }
3818                 }
3819         }
3820
3821         cache = search_cache_extent(root_cache, 0);
3822         while (1) {
3823                 if (!cache)
3824                         break;
3825                 rec = container_of(cache, struct root_record, cache);
3826                 cache = next_cache_extent(cache);
3827
3828                 if (rec->found_ref == 0 &&
3829                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3830                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3831                         ret = check_orphan_item(root->fs_info->tree_root,
3832                                                 rec->objectid);
3833                         if (ret == 0)
3834                                 continue;
3835
3836                         /*
3837                          * If we don't have a root item then we likely just have
3838                          * a dir item in a snapshot for this root but no actual
3839                          * ref key or anything so it's meaningless.
3840                          */
3841                         if (!rec->found_root_item)
3842                                 continue;
3843                         errors++;
3844                         fprintf(stderr, "fs tree %llu not referenced\n",
3845                                 (unsigned long long)rec->objectid);
3846                 }
3847
3848                 error = 0;
3849                 if (rec->found_ref > 0 && !rec->found_root_item)
3850                         error = 1;
3851                 list_for_each_entry(backref, &rec->backrefs, list) {
3852                         if (!backref->found_dir_item)
3853                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3854                         if (!backref->found_dir_index)
3855                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3856                         if (!backref->found_back_ref)
3857                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3858                         if (!backref->found_forward_ref)
3859                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3860                         if (backref->reachable && backref->errors)
3861                                 error = 1;
3862                 }
3863                 if (!error)
3864                         continue;
3865
3866                 errors++;
3867                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3868                         (unsigned long long)rec->objectid, rec->found_ref,
3869                          rec->found_root_item ? "" : "not found");
3870
3871                 list_for_each_entry(backref, &rec->backrefs, list) {
3872                         if (!backref->reachable)
3873                                 continue;
3874                         if (!backref->errors && rec->found_root_item)
3875                                 continue;
3876                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3877                                 " index %llu namelen %u name %s errors %x\n",
3878                                 (unsigned long long)backref->ref_root,
3879                                 (unsigned long long)backref->dir,
3880                                 (unsigned long long)backref->index,
3881                                 backref->namelen, backref->name,
3882                                 backref->errors);
3883                         print_ref_error(backref->errors);
3884                 }
3885         }
3886         return errors > 0 ? 1 : 0;
3887 }
3888
3889 static int process_root_ref(struct extent_buffer *eb, int slot,
3890                             struct btrfs_key *key,
3891                             struct cache_tree *root_cache)
3892 {
3893         u64 dirid;
3894         u64 index;
3895         u32 len;
3896         u32 name_len;
3897         struct btrfs_root_ref *ref;
3898         char namebuf[BTRFS_NAME_LEN];
3899         int error;
3900
3901         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3902
3903         dirid = btrfs_root_ref_dirid(eb, ref);
3904         index = btrfs_root_ref_sequence(eb, ref);
3905         name_len = btrfs_root_ref_name_len(eb, ref);
3906
3907         if (name_len <= BTRFS_NAME_LEN) {
3908                 len = name_len;
3909                 error = 0;
3910         } else {
3911                 len = BTRFS_NAME_LEN;
3912                 error = REF_ERR_NAME_TOO_LONG;
3913         }
3914         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3915
3916         if (key->type == BTRFS_ROOT_REF_KEY) {
3917                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3918                                  index, namebuf, len, key->type, error);
3919         } else {
3920                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3921                                  index, namebuf, len, key->type, error);
3922         }
3923         return 0;
3924 }
3925
3926 static void free_corrupt_block(struct cache_extent *cache)
3927 {
3928         struct btrfs_corrupt_block *corrupt;
3929
3930         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3931         free(corrupt);
3932 }
3933
3934 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3935
3936 /*
3937  * Repair the btree of the given root.
3938  *
3939  * The fix is to remove the node key in corrupt_blocks cache_tree.
3940  * and rebalance the tree.
3941  * After the fix, the btree should be writeable.
3942  */
3943 static int repair_btree(struct btrfs_root *root,
3944                         struct cache_tree *corrupt_blocks)
3945 {
3946         struct btrfs_trans_handle *trans;
3947         struct btrfs_path path;
3948         struct btrfs_corrupt_block *corrupt;
3949         struct cache_extent *cache;
3950         struct btrfs_key key;
3951         u64 offset;
3952         int level;
3953         int ret = 0;
3954
3955         if (cache_tree_empty(corrupt_blocks))
3956                 return 0;
3957
3958         trans = btrfs_start_transaction(root, 1);
3959         if (IS_ERR(trans)) {
3960                 ret = PTR_ERR(trans);
3961                 fprintf(stderr, "Error starting transaction: %s\n",
3962                         strerror(-ret));
3963                 return ret;
3964         }
3965         btrfs_init_path(&path);
3966         cache = first_cache_extent(corrupt_blocks);
3967         while (cache) {
3968                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3969                                        cache);
3970                 level = corrupt->level;
3971                 path.lowest_level = level;
3972                 key.objectid = corrupt->key.objectid;
3973                 key.type = corrupt->key.type;
3974                 key.offset = corrupt->key.offset;
3975
3976                 /*
3977                  * Here we don't want to do any tree balance, since it may
3978                  * cause a balance with corrupted brother leaf/node,
3979                  * so ins_len set to 0 here.
3980                  * Balance will be done after all corrupt node/leaf is deleted.
3981                  */
3982                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3983                 if (ret < 0)
3984                         goto out;
3985                 offset = btrfs_node_blockptr(path.nodes[level],
3986                                              path.slots[level]);
3987
3988                 /* Remove the ptr */
3989                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3990                 if (ret < 0)
3991                         goto out;
3992                 /*
3993                  * Remove the corresponding extent
3994                  * return value is not concerned.
3995                  */
3996                 btrfs_release_path(&path);
3997                 ret = btrfs_free_extent(trans, root, offset,
3998                                 root->fs_info->nodesize, 0,
3999                                 root->root_key.objectid, level - 1, 0);
4000                 cache = next_cache_extent(cache);
4001         }
4002
4003         /* Balance the btree using btrfs_search_slot() */
4004         cache = first_cache_extent(corrupt_blocks);
4005         while (cache) {
4006                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4007                                        cache);
4008                 memcpy(&key, &corrupt->key, sizeof(key));
4009                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4010                 if (ret < 0)
4011                         goto out;
4012                 /* return will always >0 since it won't find the item */
4013                 ret = 0;
4014                 btrfs_release_path(&path);
4015                 cache = next_cache_extent(cache);
4016         }
4017 out:
4018         btrfs_commit_transaction(trans, root);
4019         btrfs_release_path(&path);
4020         return ret;
4021 }
4022
4023 static int check_fs_root(struct btrfs_root *root,
4024                          struct cache_tree *root_cache,
4025                          struct walk_control *wc)
4026 {
4027         int ret = 0;
4028         int err = 0;
4029         int wret;
4030         int level;
4031         struct btrfs_path path;
4032         struct shared_node root_node;
4033         struct root_record *rec;
4034         struct btrfs_root_item *root_item = &root->root_item;
4035         struct cache_tree corrupt_blocks;
4036         struct orphan_data_extent *orphan;
4037         struct orphan_data_extent *tmp;
4038         enum btrfs_tree_block_status status;
4039         struct node_refs nrefs;
4040
4041         /*
4042          * Reuse the corrupt_block cache tree to record corrupted tree block
4043          *
4044          * Unlike the usage in extent tree check, here we do it in a per
4045          * fs/subvol tree base.
4046          */
4047         cache_tree_init(&corrupt_blocks);
4048         root->fs_info->corrupt_blocks = &corrupt_blocks;
4049
4050         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4051                 rec = get_root_rec(root_cache, root->root_key.objectid);
4052                 BUG_ON(IS_ERR(rec));
4053                 if (btrfs_root_refs(root_item) > 0)
4054                         rec->found_root_item = 1;
4055         }
4056
4057         btrfs_init_path(&path);
4058         memset(&root_node, 0, sizeof(root_node));
4059         cache_tree_init(&root_node.root_cache);
4060         cache_tree_init(&root_node.inode_cache);
4061         memset(&nrefs, 0, sizeof(nrefs));
4062
4063         /* Move the orphan extent record to corresponding inode_record */
4064         list_for_each_entry_safe(orphan, tmp,
4065                                  &root->orphan_data_extents, list) {
4066                 struct inode_record *inode;
4067
4068                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4069                                       1);
4070                 BUG_ON(IS_ERR(inode));
4071                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4072                 list_move(&orphan->list, &inode->orphan_extents);
4073         }
4074
4075         level = btrfs_header_level(root->node);
4076         memset(wc->nodes, 0, sizeof(wc->nodes));
4077         wc->nodes[level] = &root_node;
4078         wc->active_node = level;
4079         wc->root_level = level;
4080
4081         /* We may not have checked the root block, lets do that now */
4082         if (btrfs_is_leaf(root->node))
4083                 status = btrfs_check_leaf(root, NULL, root->node);
4084         else
4085                 status = btrfs_check_node(root, NULL, root->node);
4086         if (status != BTRFS_TREE_BLOCK_CLEAN)
4087                 return -EIO;
4088
4089         if (btrfs_root_refs(root_item) > 0 ||
4090             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4091                 path.nodes[level] = root->node;
4092                 extent_buffer_get(root->node);
4093                 path.slots[level] = 0;
4094         } else {
4095                 struct btrfs_key key;
4096                 struct btrfs_disk_key found_key;
4097
4098                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4099                 level = root_item->drop_level;
4100                 path.lowest_level = level;
4101                 if (level > btrfs_header_level(root->node) ||
4102                     level >= BTRFS_MAX_LEVEL) {
4103                         error("ignoring invalid drop level: %u", level);
4104                         goto skip_walking;
4105                 }
4106                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4107                 if (wret < 0)
4108                         goto skip_walking;
4109                 btrfs_node_key(path.nodes[level], &found_key,
4110                                 path.slots[level]);
4111                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4112                                         sizeof(found_key)));
4113         }
4114
4115         while (1) {
4116                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4117                 if (wret < 0)
4118                         ret = wret;
4119                 if (wret != 0)
4120                         break;
4121
4122                 wret = walk_up_tree(root, &path, wc, &level);
4123                 if (wret < 0)
4124                         ret = wret;
4125                 if (wret != 0)
4126                         break;
4127         }
4128 skip_walking:
4129         btrfs_release_path(&path);
4130
4131         if (!cache_tree_empty(&corrupt_blocks)) {
4132                 struct cache_extent *cache;
4133                 struct btrfs_corrupt_block *corrupt;
4134
4135                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4136                        root->root_key.objectid);
4137                 cache = first_cache_extent(&corrupt_blocks);
4138                 while (cache) {
4139                         corrupt = container_of(cache,
4140                                                struct btrfs_corrupt_block,
4141                                                cache);
4142                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4143                                cache->start, corrupt->level,
4144                                corrupt->key.objectid, corrupt->key.type,
4145                                corrupt->key.offset);
4146                         cache = next_cache_extent(cache);
4147                 }
4148                 if (repair) {
4149                         printf("Try to repair the btree for root %llu\n",
4150                                root->root_key.objectid);
4151                         ret = repair_btree(root, &corrupt_blocks);
4152                         if (ret < 0)
4153                                 fprintf(stderr, "Failed to repair btree: %s\n",
4154                                         strerror(-ret));
4155                         if (!ret)
4156                                 printf("Btree for root %llu is fixed\n",
4157                                        root->root_key.objectid);
4158                 }
4159         }
4160
4161         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4162         if (err < 0)
4163                 ret = err;
4164
4165         if (root_node.current) {
4166                 root_node.current->checked = 1;
4167                 maybe_free_inode_rec(&root_node.inode_cache,
4168                                 root_node.current);
4169         }
4170
4171         err = check_inode_recs(root, &root_node.inode_cache);
4172         if (!ret)
4173                 ret = err;
4174
4175         free_corrupt_blocks_tree(&corrupt_blocks);
4176         root->fs_info->corrupt_blocks = NULL;
4177         free_orphan_data_extents(&root->orphan_data_extents);
4178         return ret;
4179 }
4180
4181 static int fs_root_objectid(u64 objectid)
4182 {
4183         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4184             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4185                 return 1;
4186         return is_fstree(objectid);
4187 }
4188
4189 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4190                           struct cache_tree *root_cache)
4191 {
4192         struct btrfs_path path;
4193         struct btrfs_key key;
4194         struct walk_control wc;
4195         struct extent_buffer *leaf, *tree_node;
4196         struct btrfs_root *tmp_root;
4197         struct btrfs_root *tree_root = fs_info->tree_root;
4198         int ret;
4199         int err = 0;
4200
4201         if (ctx.progress_enabled) {
4202                 ctx.tp = TASK_FS_ROOTS;
4203                 task_start(ctx.info);
4204         }
4205
4206         /*
4207          * Just in case we made any changes to the extent tree that weren't
4208          * reflected into the free space cache yet.
4209          */
4210         if (repair)
4211                 reset_cached_block_groups(fs_info);
4212         memset(&wc, 0, sizeof(wc));
4213         cache_tree_init(&wc.shared);
4214         btrfs_init_path(&path);
4215
4216 again:
4217         key.offset = 0;
4218         key.objectid = 0;
4219         key.type = BTRFS_ROOT_ITEM_KEY;
4220         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4221         if (ret < 0) {
4222                 err = 1;
4223                 goto out;
4224         }
4225         tree_node = tree_root->node;
4226         while (1) {
4227                 if (tree_node != tree_root->node) {
4228                         free_root_recs_tree(root_cache);
4229                         btrfs_release_path(&path);
4230                         goto again;
4231                 }
4232                 leaf = path.nodes[0];
4233                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4234                         ret = btrfs_next_leaf(tree_root, &path);
4235                         if (ret) {
4236                                 if (ret < 0)
4237                                         err = 1;
4238                                 break;
4239                         }
4240                         leaf = path.nodes[0];
4241                 }
4242                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4243                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4244                     fs_root_objectid(key.objectid)) {
4245                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4246                                 tmp_root = btrfs_read_fs_root_no_cache(
4247                                                 fs_info, &key);
4248                         } else {
4249                                 key.offset = (u64)-1;
4250                                 tmp_root = btrfs_read_fs_root(
4251                                                 fs_info, &key);
4252                         }
4253                         if (IS_ERR(tmp_root)) {
4254                                 err = 1;
4255                                 goto next;
4256                         }
4257                         ret = check_fs_root(tmp_root, root_cache, &wc);
4258                         if (ret == -EAGAIN) {
4259                                 free_root_recs_tree(root_cache);
4260                                 btrfs_release_path(&path);
4261                                 goto again;
4262                         }
4263                         if (ret)
4264                                 err = 1;
4265                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4266                                 btrfs_free_fs_root(tmp_root);
4267                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4268                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4269                         process_root_ref(leaf, path.slots[0], &key,
4270                                          root_cache);
4271                 }
4272 next:
4273                 path.slots[0]++;
4274         }
4275 out:
4276         btrfs_release_path(&path);
4277         if (err)
4278                 free_extent_cache_tree(&wc.shared);
4279         if (!cache_tree_empty(&wc.shared))
4280                 fprintf(stderr, "warning line %d\n", __LINE__);
4281
4282         task_stop(ctx.info);
4283
4284         return err;
4285 }
4286
4287 /*
4288  * Find the @index according by @ino and name.
4289  * Notice:time efficiency is O(N)
4290  *
4291  * @root:       the root of the fs/file tree
4292  * @index_ret:  the index as return value
4293  * @namebuf:    the name to match
4294  * @name_len:   the length of name to match
4295  * @file_type:  the file_type of INODE_ITEM to match
4296  *
4297  * Returns 0 if found and *@index_ret will be modified with right value
4298  * Returns< 0 not found and *@index_ret will be (u64)-1
4299  */
4300 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4301                           u64 *index_ret, char *namebuf, u32 name_len,
4302                           u8 file_type)
4303 {
4304         struct btrfs_path path;
4305         struct extent_buffer *node;
4306         struct btrfs_dir_item *di;
4307         struct btrfs_key key;
4308         struct btrfs_key location;
4309         char name[BTRFS_NAME_LEN] = {0};
4310
4311         u32 total;
4312         u32 cur = 0;
4313         u32 len;
4314         u32 data_len;
4315         u8 filetype;
4316         int slot;
4317         int ret;
4318
4319         ASSERT(index_ret);
4320
4321         /* search from the last index */
4322         key.objectid = dirid;
4323         key.offset = (u64)-1;
4324         key.type = BTRFS_DIR_INDEX_KEY;
4325
4326         btrfs_init_path(&path);
4327         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4328         if (ret < 0)
4329                 return ret;
4330
4331 loop:
4332         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4333         if (ret) {
4334                 ret = -ENOENT;
4335                 *index_ret = (64)-1;
4336                 goto out;
4337         }
4338         /* Check whether inode_id/filetype/name match */
4339         node = path.nodes[0];
4340         slot = path.slots[0];
4341         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4342         total = btrfs_item_size_nr(node, slot);
4343         while (cur < total) {
4344                 ret = -ENOENT;
4345                 len = btrfs_dir_name_len(node, di);
4346                 data_len = btrfs_dir_data_len(node, di);
4347
4348                 btrfs_dir_item_key_to_cpu(node, di, &location);
4349                 if (location.objectid != location_id ||
4350                     location.type != BTRFS_INODE_ITEM_KEY ||
4351                     location.offset != 0)
4352                         goto next;
4353
4354                 filetype = btrfs_dir_type(node, di);
4355                 if (file_type != filetype)
4356                         goto next;
4357
4358                 if (len > BTRFS_NAME_LEN)
4359                         len = BTRFS_NAME_LEN;
4360
4361                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4362                 if (len != name_len || strncmp(namebuf, name, len))
4363                         goto next;
4364
4365                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4366                 *index_ret = key.offset;
4367                 ret = 0;
4368                 goto out;
4369 next:
4370                 len += sizeof(*di) + data_len;
4371                 di = (struct btrfs_dir_item *)((char *)di + len);
4372                 cur += len;
4373         }
4374         goto loop;
4375
4376 out:
4377         btrfs_release_path(&path);
4378         return ret;
4379 }
4380
4381 /*
4382  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4383  * INODE_REF/INODE_EXTREF match.
4384  *
4385  * @root:       the root of the fs/file tree
4386  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4387  *              value while find index
4388  * @location_key: location key of the struct btrfs_dir_item to match
4389  * @name:       the name to match
4390  * @namelen:    the length of name
4391  * @file_type:  the type of file to math
4392  *
4393  * Return 0 if no error occurred.
4394  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4395  * DIR_ITEM/DIR_INDEX
4396  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4397  * and DIR_ITEM/DIR_INDEX mismatch
4398  */
4399 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4400                          struct btrfs_key *location_key, char *name,
4401                          u32 namelen, u8 file_type)
4402 {
4403         struct btrfs_path path;
4404         struct extent_buffer *node;
4405         struct btrfs_dir_item *di;
4406         struct btrfs_key location;
4407         char namebuf[BTRFS_NAME_LEN] = {0};
4408         u32 total;
4409         u32 cur = 0;
4410         u32 len;
4411         u32 data_len;
4412         u8 filetype;
4413         int slot;
4414         int ret;
4415
4416         /* get the index by traversing all index */
4417         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4418                 ret = find_dir_index(root, key->objectid,
4419                                      location_key->objectid, &key->offset,
4420                                      name, namelen, file_type);
4421                 if (ret)
4422                         ret = DIR_INDEX_MISSING;
4423                 return ret;
4424         }
4425
4426         btrfs_init_path(&path);
4427         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4428         if (ret) {
4429                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4430                         DIR_INDEX_MISSING;
4431                 goto out;
4432         }
4433
4434         /* Check whether inode_id/filetype/name match */
4435         node = path.nodes[0];
4436         slot = path.slots[0];
4437         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4438         total = btrfs_item_size_nr(node, slot);
4439         while (cur < total) {
4440                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4441                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4442
4443                 len = btrfs_dir_name_len(node, di);
4444                 data_len = btrfs_dir_data_len(node, di);
4445
4446                 btrfs_dir_item_key_to_cpu(node, di, &location);
4447                 if (location.objectid != location_key->objectid ||
4448                     location.type != location_key->type ||
4449                     location.offset != location_key->offset)
4450                         goto next;
4451
4452                 filetype = btrfs_dir_type(node, di);
4453                 if (file_type != filetype)
4454                         goto next;
4455
4456                 if (len > BTRFS_NAME_LEN) {
4457                         len = BTRFS_NAME_LEN;
4458                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4459                         root->objectid,
4460                         key->type == BTRFS_DIR_ITEM_KEY ?
4461                         "DIR_ITEM" : "DIR_INDEX",
4462                         key->objectid, key->offset, len);
4463                 }
4464                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4465                                    len);
4466                 if (len != namelen || strncmp(namebuf, name, len))
4467                         goto next;
4468
4469                 ret = 0;
4470                 goto out;
4471 next:
4472                 len += sizeof(*di) + data_len;
4473                 di = (struct btrfs_dir_item *)((char *)di + len);
4474                 cur += len;
4475         }
4476
4477 out:
4478         btrfs_release_path(&path);
4479         return ret;
4480 }
4481
4482 /*
4483  * Prints inode ref error message
4484  */
4485 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4486                                 u64 index, const char *namebuf, int name_len,
4487                                 u8 filetype, int err)
4488 {
4489         if (!err)
4490                 return;
4491
4492         /* root dir error */
4493         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4494                 error(
4495         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4496                       root->objectid, key->objectid, key->offset, namebuf);
4497                 return;
4498         }
4499
4500         /* normal error */
4501         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4502                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4503                       root->objectid, key->offset,
4504                       btrfs_name_hash(namebuf, name_len),
4505                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4506                       namebuf, filetype);
4507         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4508                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4509                       root->objectid, key->offset, index,
4510                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4511                       namebuf, filetype);
4512 }
4513
4514 /*
4515  * Insert the missing inode item.
4516  *
4517  * Returns 0 means success.
4518  * Returns <0 means error.
4519  */
4520 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4521                                      u8 filetype)
4522 {
4523         struct btrfs_key key;
4524         struct btrfs_trans_handle *trans;
4525         struct btrfs_path path;
4526         int ret;
4527
4528         key.objectid = ino;
4529         key.type = BTRFS_INODE_ITEM_KEY;
4530         key.offset = 0;
4531
4532         btrfs_init_path(&path);
4533         trans = btrfs_start_transaction(root, 1);
4534         if (IS_ERR(trans)) {
4535                 ret = -EIO;
4536                 goto out;
4537         }
4538
4539         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4540         if (ret < 0 || !ret)
4541                 goto fail;
4542
4543         /* insert inode item */
4544         create_inode_item_lowmem(trans, root, ino, filetype);
4545         ret = 0;
4546 fail:
4547         btrfs_commit_transaction(trans, root);
4548 out:
4549         if (ret)
4550                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4551                       root->objectid, ino);
4552         btrfs_release_path(&path);
4553         return ret;
4554 }
4555
4556 /*
4557  * The ternary means dir item, dir index and relative inode ref.
4558  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4559  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4560  * strategy:
4561  * If two of three is missing or mismatched, delete the existing one.
4562  * If one of three is missing or mismatched, add the missing one.
4563  *
4564  * returns 0 means success.
4565  * returns not 0 means on error;
4566  */
4567 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4568                           u64 index, char *name, int name_len, u8 filetype,
4569                           int err)
4570 {
4571         struct btrfs_trans_handle *trans;
4572         int stage = 0;
4573         int ret = 0;
4574
4575         /*
4576          * stage shall be one of following valild values:
4577          *      0: Fine, nothing to do.
4578          *      1: One of three is wrong, so add missing one.
4579          *      2: Two of three is wrong, so delete existed one.
4580          */
4581         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4582                 stage++;
4583         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4584                 stage++;
4585         if (err & (INODE_REF_MISSING))
4586                 stage++;
4587
4588         /* stage must be smllarer than 3 */
4589         ASSERT(stage < 3);
4590
4591         trans = btrfs_start_transaction(root, 1);
4592         if (stage == 2) {
4593                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4594                                    name_len, 0);
4595                 goto out;
4596         }
4597         if (stage == 1) {
4598                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4599                                filetype, &index, 1, 1);
4600                 goto out;
4601         }
4602 out:
4603         btrfs_commit_transaction(trans, root);
4604
4605         if (ret)
4606                 error("fail to repair inode %llu name %s filetype %u",
4607                       ino, name, filetype);
4608         else
4609                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4610                        stage == 2 ? "Delete" : "Add",
4611                        ino, name, filetype);
4612
4613         return ret;
4614 }
4615
4616 /*
4617  * Traverse the given INODE_REF and call find_dir_item() to find related
4618  * DIR_ITEM/DIR_INDEX.
4619  *
4620  * @root:       the root of the fs/file tree
4621  * @ref_key:    the key of the INODE_REF
4622  * @path        the path provides node and slot
4623  * @refs:       the count of INODE_REF
4624  * @mode:       the st_mode of INODE_ITEM
4625  * @name_ret:   returns with the first ref's name
4626  * @name_len_ret:    len of the name_ret
4627  *
4628  * Return 0 if no error occurred.
4629  */
4630 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4631                            struct btrfs_path *path, char *name_ret,
4632                            u32 *namelen_ret, u64 *refs_ret, int mode)
4633 {
4634         struct btrfs_key key;
4635         struct btrfs_key location;
4636         struct btrfs_inode_ref *ref;
4637         struct extent_buffer *node;
4638         char namebuf[BTRFS_NAME_LEN] = {0};
4639         u32 total;
4640         u32 cur = 0;
4641         u32 len;
4642         u32 name_len;
4643         u64 index;
4644         int ret;
4645         int err = 0;
4646         int tmp_err;
4647         int slot;
4648         int need_research = 0;
4649         u64 refs;
4650
4651 begin:
4652         err = 0;
4653         cur = 0;
4654         refs = *refs_ret;
4655
4656         /* since after repair, path and the dir item may be changed */
4657         if (need_research) {
4658                 need_research = 0;
4659                 btrfs_release_path(path);
4660                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4661                 /* the item was deleted, let path point to the last checked item */
4662                 if (ret > 0) {
4663                         if (path->slots[0] == 0)
4664                                 btrfs_prev_leaf(root, path);
4665                         else
4666                                 path->slots[0]--;
4667                 }
4668                 if (ret)
4669                         goto out;
4670         }
4671
4672         location.objectid = ref_key->objectid;
4673         location.type = BTRFS_INODE_ITEM_KEY;
4674         location.offset = 0;
4675         node = path->nodes[0];
4676         slot = path->slots[0];
4677
4678         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4679         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4680         total = btrfs_item_size_nr(node, slot);
4681
4682 next:
4683         /* Update inode ref count */
4684         refs++;
4685         tmp_err = 0;
4686         index = btrfs_inode_ref_index(node, ref);
4687         name_len = btrfs_inode_ref_name_len(node, ref);
4688
4689         if (name_len <= BTRFS_NAME_LEN) {
4690                 len = name_len;
4691         } else {
4692                 len = BTRFS_NAME_LEN;
4693                 warning("root %llu INODE_REF[%llu %llu] name too long",
4694                         root->objectid, ref_key->objectid, ref_key->offset);
4695         }
4696
4697         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4698
4699         /* copy the first name found to name_ret */
4700         if (refs == 1 && name_ret) {
4701                 memcpy(name_ret, namebuf, len);
4702                 *namelen_ret = len;
4703         }
4704
4705         /* Check root dir ref */
4706         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4707                 if (index != 0 || len != strlen("..") ||
4708                     strncmp("..", namebuf, len) ||
4709                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4710                         /* set err bits then repair will delete the ref */
4711                         err |= DIR_INDEX_MISSING;
4712                         err |= DIR_ITEM_MISSING;
4713                 }
4714                 goto end;
4715         }
4716
4717         /* Find related DIR_INDEX */
4718         key.objectid = ref_key->offset;
4719         key.type = BTRFS_DIR_INDEX_KEY;
4720         key.offset = index;
4721         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4722                             imode_to_type(mode));
4723
4724         /* Find related dir_item */
4725         key.objectid = ref_key->offset;
4726         key.type = BTRFS_DIR_ITEM_KEY;
4727         key.offset = btrfs_name_hash(namebuf, len);
4728         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4729                             imode_to_type(mode));
4730 end:
4731         if (tmp_err && repair) {
4732                 ret = repair_ternary_lowmem(root, ref_key->offset,
4733                                             ref_key->objectid, index, namebuf,
4734                                             name_len, imode_to_type(mode),
4735                                             tmp_err);
4736                 if (!ret) {
4737                         need_research = 1;
4738                         goto begin;
4739                 }
4740         }
4741         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4742                             imode_to_type(mode), tmp_err);
4743         err |= tmp_err;
4744         len = sizeof(*ref) + name_len;
4745         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4746         cur += len;
4747         if (cur < total)
4748                 goto next;
4749
4750 out:
4751         *refs_ret = refs;
4752         return err;
4753 }
4754
4755 /*
4756  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4757  * DIR_ITEM/DIR_INDEX.
4758  *
4759  * @root:       the root of the fs/file tree
4760  * @ref_key:    the key of the INODE_EXTREF
4761  * @refs:       the count of INODE_EXTREF
4762  * @mode:       the st_mode of INODE_ITEM
4763  *
4764  * Return 0 if no error occurred.
4765  */
4766 static int check_inode_extref(struct btrfs_root *root,
4767                               struct btrfs_key *ref_key,
4768                               struct extent_buffer *node, int slot, u64 *refs,
4769                               int mode)
4770 {
4771         struct btrfs_key key;
4772         struct btrfs_key location;
4773         struct btrfs_inode_extref *extref;
4774         char namebuf[BTRFS_NAME_LEN] = {0};
4775         u32 total;
4776         u32 cur = 0;
4777         u32 len;
4778         u32 name_len;
4779         u64 index;
4780         u64 parent;
4781         int ret;
4782         int err = 0;
4783
4784         location.objectid = ref_key->objectid;
4785         location.type = BTRFS_INODE_ITEM_KEY;
4786         location.offset = 0;
4787
4788         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4789         total = btrfs_item_size_nr(node, slot);
4790
4791 next:
4792         /* update inode ref count */
4793         (*refs)++;
4794         name_len = btrfs_inode_extref_name_len(node, extref);
4795         index = btrfs_inode_extref_index(node, extref);
4796         parent = btrfs_inode_extref_parent(node, extref);
4797         if (name_len <= BTRFS_NAME_LEN) {
4798                 len = name_len;
4799         } else {
4800                 len = BTRFS_NAME_LEN;
4801                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4802                         root->objectid, ref_key->objectid, ref_key->offset);
4803         }
4804         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4805
4806         /* Check root dir ref name */
4807         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4808                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4809                       root->objectid, ref_key->objectid, ref_key->offset,
4810                       namebuf);
4811                 err |= ROOT_DIR_ERROR;
4812         }
4813
4814         /* find related dir_index */
4815         key.objectid = parent;
4816         key.type = BTRFS_DIR_INDEX_KEY;
4817         key.offset = index;
4818         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4819         err |= ret;
4820
4821         /* find related dir_item */
4822         key.objectid = parent;
4823         key.type = BTRFS_DIR_ITEM_KEY;
4824         key.offset = btrfs_name_hash(namebuf, len);
4825         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4826         err |= ret;
4827
4828         len = sizeof(*extref) + name_len;
4829         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4830         cur += len;
4831
4832         if (cur < total)
4833                 goto next;
4834
4835         return err;
4836 }
4837
4838 /*
4839  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4840  * DIR_ITEM/DIR_INDEX match.
4841  * Return with @index_ret.
4842  *
4843  * @root:       the root of the fs/file tree
4844  * @key:        the key of the INODE_REF/INODE_EXTREF
4845  * @name:       the name in the INODE_REF/INODE_EXTREF
4846  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4847  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
4848  *              value (64)-1 means do not check index
4849  * @ext_ref:    the EXTENDED_IREF feature
4850  *
4851  * Return 0 if no error occurred.
4852  * Return >0 for error bitmap
4853  */
4854 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4855                           char *name, int namelen, u64 *index_ret,
4856                           unsigned int ext_ref)
4857 {
4858         struct btrfs_path path;
4859         struct btrfs_inode_ref *ref;
4860         struct btrfs_inode_extref *extref;
4861         struct extent_buffer *node;
4862         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4863         u32 total;
4864         u32 cur = 0;
4865         u32 len;
4866         u32 ref_namelen;
4867         u64 ref_index;
4868         u64 parent;
4869         u64 dir_id;
4870         int slot;
4871         int ret;
4872
4873         ASSERT(index_ret);
4874
4875         btrfs_init_path(&path);
4876         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4877         if (ret) {
4878                 ret = INODE_REF_MISSING;
4879                 goto extref;
4880         }
4881
4882         node = path.nodes[0];
4883         slot = path.slots[0];
4884
4885         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4886         total = btrfs_item_size_nr(node, slot);
4887
4888         /* Iterate all entry of INODE_REF */
4889         while (cur < total) {
4890                 ret = INODE_REF_MISSING;
4891
4892                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4893                 ref_index = btrfs_inode_ref_index(node, ref);
4894                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4895                         goto next_ref;
4896
4897                 if (cur + sizeof(*ref) + ref_namelen > total ||
4898                     ref_namelen > BTRFS_NAME_LEN) {
4899                         warning("root %llu INODE %s[%llu %llu] name too long",
4900                                 root->objectid,
4901                                 key->type == BTRFS_INODE_REF_KEY ?
4902                                         "REF" : "EXTREF",
4903                                 key->objectid, key->offset);
4904
4905                         if (cur + sizeof(*ref) > total)
4906                                 break;
4907                         len = min_t(u32, total - cur - sizeof(*ref),
4908                                     BTRFS_NAME_LEN);
4909                 } else {
4910                         len = ref_namelen;
4911                 }
4912
4913                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4914                                    len);
4915
4916                 if (len != namelen || strncmp(ref_namebuf, name, len))
4917                         goto next_ref;
4918
4919                 *index_ret = ref_index;
4920                 ret = 0;
4921                 goto out;
4922 next_ref:
4923                 len = sizeof(*ref) + ref_namelen;
4924                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4925                 cur += len;
4926         }
4927
4928 extref:
4929         /* Skip if not support EXTENDED_IREF feature */
4930         if (!ext_ref)
4931                 goto out;
4932
4933         btrfs_release_path(&path);
4934         btrfs_init_path(&path);
4935
4936         dir_id = key->offset;
4937         key->type = BTRFS_INODE_EXTREF_KEY;
4938         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4939
4940         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4941         if (ret) {
4942                 ret = INODE_REF_MISSING;
4943                 goto out;
4944         }
4945
4946         node = path.nodes[0];
4947         slot = path.slots[0];
4948
4949         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4950         cur = 0;
4951         total = btrfs_item_size_nr(node, slot);
4952
4953         /* Iterate all entry of INODE_EXTREF */
4954         while (cur < total) {
4955                 ret = INODE_REF_MISSING;
4956
4957                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4958                 ref_index = btrfs_inode_extref_index(node, extref);
4959                 parent = btrfs_inode_extref_parent(node, extref);
4960                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4961                         goto next_extref;
4962
4963                 if (parent != dir_id)
4964                         goto next_extref;
4965
4966                 if (ref_namelen <= BTRFS_NAME_LEN) {
4967                         len = ref_namelen;
4968                 } else {
4969                         len = BTRFS_NAME_LEN;
4970                         warning("root %llu INODE %s[%llu %llu] name too long",
4971                                 root->objectid,
4972                                 key->type == BTRFS_INODE_REF_KEY ?
4973                                         "REF" : "EXTREF",
4974                                 key->objectid, key->offset);
4975                 }
4976                 read_extent_buffer(node, ref_namebuf,
4977                                    (unsigned long)(extref + 1), len);
4978
4979                 if (len != namelen || strncmp(ref_namebuf, name, len))
4980                         goto next_extref;
4981
4982                 *index_ret = ref_index;
4983                 ret = 0;
4984                 goto out;
4985
4986 next_extref:
4987                 len = sizeof(*extref) + ref_namelen;
4988                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4989                 cur += len;
4990
4991         }
4992 out:
4993         btrfs_release_path(&path);
4994         return ret;
4995 }
4996
4997 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4998                                u64 ino, u64 index, const char *namebuf,
4999                                int name_len, u8 filetype, int err)
5000 {
5001         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5002                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5003                       root->objectid, key->objectid, key->offset, namebuf,
5004                       filetype,
5005                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5006         }
5007
5008         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5009                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5010                       root->objectid, key->objectid, index, namebuf, filetype,
5011                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5012         }
5013
5014         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5015                 error(
5016                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5017                       root->objectid, ino, index, namebuf, filetype,
5018                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5019         }
5020
5021         if (err & INODE_REF_MISSING)
5022                 error(
5023                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5024                       root->objectid, ino, key->objectid, namebuf, filetype);
5025
5026 }
5027
5028 /*
5029  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5030  *
5031  * Returns error after repair
5032  */
5033 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5034                            u64 index, u8 filetype, char *namebuf, u32 name_len,
5035                            int err)
5036 {
5037         int ret;
5038
5039         if (err & INODE_ITEM_MISSING) {
5040                 ret = repair_inode_item_missing(root, ino, filetype);
5041                 if (!ret)
5042                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5043         }
5044
5045         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5046                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5047                                             name_len, filetype, err);
5048                 if (!ret) {
5049                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5050                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5051                         err &= ~(INODE_REF_MISSING);
5052                 }
5053         }
5054         return err;
5055 }
5056
5057 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5058                 u64 *size_ret)
5059 {
5060         struct btrfs_key key;
5061         struct btrfs_path path;
5062         u32 len;
5063         struct btrfs_dir_item *di;
5064         int ret;
5065         int cur = 0;
5066         int total = 0;
5067
5068         ASSERT(size_ret);
5069         *size_ret = 0;
5070
5071         key.objectid = ino;
5072         key.type = type;
5073         key.offset = (u64)-1;
5074
5075         btrfs_init_path(&path);
5076         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5077         if (ret < 0) {
5078                 ret = -EIO;
5079                 goto out;
5080         }
5081         /* if found, go to spacial case */
5082         if (ret == 0)
5083                 goto special_case;
5084
5085 loop:
5086         ret = btrfs_previous_item(root, &path, ino, type);
5087
5088         if (ret) {
5089                 ret = 0;
5090                 goto out;
5091         }
5092
5093 special_case:
5094         di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5095         cur = 0;
5096         total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5097
5098         while (cur < total) {
5099                 len = btrfs_dir_name_len(path.nodes[0], di);
5100                 if (len > BTRFS_NAME_LEN)
5101                         len = BTRFS_NAME_LEN;
5102                 *size_ret += len;
5103
5104                 len += btrfs_dir_data_len(path.nodes[0], di);
5105                 len += sizeof(*di);
5106                 di = (struct btrfs_dir_item *)((char *)di + len);
5107                 cur += len;
5108         }
5109         goto loop;
5110
5111 out:
5112         btrfs_release_path(&path);
5113         return ret;
5114 }
5115
5116 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5117 {
5118         u64 item_size;
5119         u64 index_size;
5120         int ret;
5121
5122         ASSERT(size);
5123         ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5124         if (ret)
5125                 goto out;
5126
5127         ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5128         if (ret)
5129                 goto out;
5130
5131         *size = item_size + index_size;
5132
5133 out:
5134         if (ret)
5135                 error("failed to count root %llu INODE[%llu] root size",
5136                       root->objectid, ino);
5137         return ret;
5138 }
5139
5140 /*
5141  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5142  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5143  *
5144  * @root:       the root of the fs/file tree
5145  * @key:        the key of the INODE_REF/INODE_EXTREF
5146  * @path:       the path
5147  * @size:       the st_size of the INODE_ITEM
5148  * @ext_ref:    the EXTENDED_IREF feature
5149  *
5150  * Return 0 if no error occurred.
5151  * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5152  */
5153 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5154                           struct btrfs_path *path, u64 *size,
5155                           unsigned int ext_ref)
5156 {
5157         struct btrfs_dir_item *di;
5158         struct btrfs_inode_item *ii;
5159         struct btrfs_key key;
5160         struct btrfs_key location;
5161         struct extent_buffer *node;
5162         int slot;
5163         char namebuf[BTRFS_NAME_LEN] = {0};
5164         u32 total;
5165         u32 cur = 0;
5166         u32 len;
5167         u32 name_len;
5168         u32 data_len;
5169         u8 filetype;
5170         u32 mode = 0;
5171         u64 index;
5172         int ret;
5173         int err;
5174         int tmp_err;
5175         int need_research = 0;
5176
5177         /*
5178          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5179          * ignore index check.
5180          */
5181         if (di_key->type == BTRFS_DIR_INDEX_KEY)
5182                 index = di_key->offset;
5183         else
5184                 index = (u64)-1;
5185 begin:
5186         err = 0;
5187         cur = 0;
5188
5189         /* since after repair, path and the dir item may be changed */
5190         if (need_research) {
5191                 need_research = 0;
5192                 err |= DIR_COUNT_AGAIN;
5193                 btrfs_release_path(path);
5194                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5195                 /* the item was deleted, let path point the last checked item */
5196                 if (ret > 0) {
5197                         if (path->slots[0] == 0)
5198                                 btrfs_prev_leaf(root, path);
5199                         else
5200                                 path->slots[0]--;
5201                 }
5202                 if (ret)
5203                         goto out;
5204         }
5205
5206         node = path->nodes[0];
5207         slot = path->slots[0];
5208
5209         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5210         total = btrfs_item_size_nr(node, slot);
5211         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5212
5213         while (cur < total) {
5214                 data_len = btrfs_dir_data_len(node, di);
5215                 tmp_err = 0;
5216                 if (data_len)
5217                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5218                               root->objectid,
5219               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5220                               di_key->objectid, di_key->offset, data_len);
5221
5222                 name_len = btrfs_dir_name_len(node, di);
5223                 if (name_len <= BTRFS_NAME_LEN) {
5224                         len = name_len;
5225                 } else {
5226                         len = BTRFS_NAME_LEN;
5227                         warning("root %llu %s[%llu %llu] name too long",
5228                                 root->objectid,
5229                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5230                                 di_key->objectid, di_key->offset);
5231                 }
5232                 (*size) += name_len;
5233                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5234                                    len);
5235                 filetype = btrfs_dir_type(node, di);
5236
5237                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5238                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5239                         err |= -EIO;
5240                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5241                         root->objectid, di_key->objectid, di_key->offset,
5242                         namebuf, len, filetype, di_key->offset,
5243                         btrfs_name_hash(namebuf, len));
5244                 }
5245
5246                 btrfs_dir_item_key_to_cpu(node, di, &location);
5247                 /* Ignore related ROOT_ITEM check */
5248                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5249                         goto next;
5250
5251                 btrfs_release_path(path);
5252                 /* Check relative INODE_ITEM(existence/filetype) */
5253                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5254                 if (ret) {
5255                         tmp_err |= INODE_ITEM_MISSING;
5256                         goto next;
5257                 }
5258
5259                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5260                                     struct btrfs_inode_item);
5261                 mode = btrfs_inode_mode(path->nodes[0], ii);
5262                 if (imode_to_type(mode) != filetype) {
5263                         tmp_err |= INODE_ITEM_MISMATCH;
5264                         goto next;
5265                 }
5266
5267                 /* Check relative INODE_REF/INODE_EXTREF */
5268                 key.objectid = location.objectid;
5269                 key.type = BTRFS_INODE_REF_KEY;
5270                 key.offset = di_key->objectid;
5271                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5272                                           &index, ext_ref);
5273
5274                 /* check relative INDEX/ITEM */
5275                 key.objectid = di_key->objectid;
5276                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5277                         key.type = BTRFS_DIR_INDEX_KEY;
5278                         key.offset = index;
5279                 } else {
5280                         key.type = BTRFS_DIR_ITEM_KEY;
5281                         key.offset = btrfs_name_hash(namebuf, name_len);
5282                 }
5283
5284                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5285                                          name_len, filetype);
5286                 /* find_dir_item may find index */
5287                 if (key.type == BTRFS_DIR_INDEX_KEY)
5288                         index = key.offset;
5289 next:
5290
5291                 if (tmp_err && repair) {
5292                         ret = repair_dir_item(root, di_key->objectid,
5293                                               location.objectid, index,
5294                                               imode_to_type(mode), namebuf,
5295                                               name_len, tmp_err);
5296                         if (ret != tmp_err) {
5297                                 need_research = 1;
5298                                 goto begin;
5299                         }
5300                 }
5301                 btrfs_release_path(path);
5302                 print_dir_item_err(root, di_key, location.objectid, index,
5303                                    namebuf, name_len, filetype, tmp_err);
5304                 err |= tmp_err;
5305                 len = sizeof(*di) + name_len + data_len;
5306                 di = (struct btrfs_dir_item *)((char *)di + len);
5307                 cur += len;
5308
5309                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5310                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5311                               root->objectid, di_key->objectid,
5312                               di_key->offset);
5313                         break;
5314                 }
5315         }
5316 out:
5317         /* research path */
5318         btrfs_release_path(path);
5319         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5320         if (ret)
5321                 err |= ret > 0 ? -ENOENT : ret;
5322         return err;
5323 }
5324
5325 /*
5326  * Wrapper function of btrfs_punch_hole.
5327  *
5328  * Returns 0 means success.
5329  * Returns not 0 means error.
5330  */
5331 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5332                              u64 len)
5333 {
5334         struct btrfs_trans_handle *trans;
5335         int ret = 0;
5336
5337         trans = btrfs_start_transaction(root, 1);
5338         if (IS_ERR(trans))
5339                 return PTR_ERR(trans);
5340
5341         ret = btrfs_punch_hole(trans, root, ino, start, len);
5342         if (ret)
5343                 error("failed to add hole [%llu, %llu] in inode [%llu]",
5344                       start, len, ino);
5345         else
5346                 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5347                        ino);
5348
5349         btrfs_commit_transaction(trans, root);
5350         return ret;
5351 }
5352
5353 /*
5354  * Check file extent datasum/hole, update the size of the file extents,
5355  * check and update the last offset of the file extent.
5356  *
5357  * @root:       the root of fs/file tree.
5358  * @fkey:       the key of the file extent.
5359  * @nodatasum:  INODE_NODATASUM feature.
5360  * @size:       the sum of all EXTENT_DATA items size for this inode.
5361  * @end:        the offset of the last extent.
5362  *
5363  * Return 0 if no error occurred.
5364  */
5365 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5366                              struct extent_buffer *node, int slot,
5367                              unsigned int nodatasum, u64 *size, u64 *end)
5368 {
5369         struct btrfs_file_extent_item *fi;
5370         u64 disk_bytenr;
5371         u64 disk_num_bytes;
5372         u64 extent_num_bytes;
5373         u64 extent_offset;
5374         u64 csum_found;         /* In byte size, sectorsize aligned */
5375         u64 search_start;       /* Logical range start we search for csum */
5376         u64 search_len;         /* Logical range len we search for csum */
5377         unsigned int extent_type;
5378         unsigned int is_hole;
5379         int compressed = 0;
5380         int ret;
5381         int err = 0;
5382
5383         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5384
5385         /* Check inline extent */
5386         extent_type = btrfs_file_extent_type(node, fi);
5387         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5388                 struct btrfs_item *e = btrfs_item_nr(slot);
5389                 u32 item_inline_len;
5390
5391                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5392                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5393                 compressed = btrfs_file_extent_compression(node, fi);
5394                 if (extent_num_bytes == 0) {
5395                         error(
5396                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5397                                 root->objectid, fkey->objectid, fkey->offset);
5398                         err |= FILE_EXTENT_ERROR;
5399                 }
5400                 if (!compressed && extent_num_bytes != item_inline_len) {
5401                         error(
5402                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5403                                 root->objectid, fkey->objectid, fkey->offset,
5404                                 extent_num_bytes, item_inline_len);
5405                         err |= FILE_EXTENT_ERROR;
5406                 }
5407                 *end += extent_num_bytes;
5408                 *size += extent_num_bytes;
5409                 return err;
5410         }
5411
5412         /* Check extent type */
5413         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5414                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5415                 err |= FILE_EXTENT_ERROR;
5416                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5417                       root->objectid, fkey->objectid, fkey->offset);
5418                 return err;
5419         }
5420
5421         /* Check REG_EXTENT/PREALLOC_EXTENT */
5422         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5423         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5424         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5425         extent_offset = btrfs_file_extent_offset(node, fi);
5426         compressed = btrfs_file_extent_compression(node, fi);
5427         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5428
5429         /*
5430          * Check EXTENT_DATA csum
5431          *
5432          * For plain (uncompressed) extent, we should only check the range
5433          * we're referring to, as it's possible that part of prealloc extent
5434          * has been written, and has csum:
5435          *
5436          * |<--- Original large preallocated extent A ---->|
5437          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5438          *      No csum                         Has csum
5439          *
5440          * For compressed extent, we should check the whole range.
5441          */
5442         if (!compressed) {
5443                 search_start = disk_bytenr + extent_offset;
5444                 search_len = extent_num_bytes;
5445         } else {
5446                 search_start = disk_bytenr;
5447                 search_len = disk_num_bytes;
5448         }
5449         ret = count_csum_range(root, search_start, search_len, &csum_found);
5450         if (csum_found > 0 && nodatasum) {
5451                 err |= ODD_CSUM_ITEM;
5452                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5453                       root->objectid, fkey->objectid, fkey->offset);
5454         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5455                    !is_hole && (ret < 0 || csum_found < search_len)) {
5456                 err |= CSUM_ITEM_MISSING;
5457                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5458                       root->objectid, fkey->objectid, fkey->offset,
5459                       csum_found, search_len);
5460         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5461                 err |= ODD_CSUM_ITEM;
5462                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5463                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5464         }
5465
5466         /* Check EXTENT_DATA hole */
5467         if (!no_holes && *end != fkey->offset) {
5468                 if (repair)
5469                         ret = punch_extent_hole(root, fkey->objectid,
5470                                                 *end, fkey->offset - *end);
5471                 if (!repair || ret) {
5472                         err |= FILE_EXTENT_ERROR;
5473                         error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5474                               root->objectid, fkey->objectid, fkey->offset);
5475                 }
5476         }
5477
5478         *end += extent_num_bytes;
5479         if (!is_hole)
5480                 *size += extent_num_bytes;
5481
5482         return err;
5483 }
5484
5485 /*
5486  * Set inode item nbytes to @nbytes
5487  *
5488  * Returns  0     on success
5489  * Returns  != 0  on error
5490  */
5491 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5492                                       struct btrfs_path *path,
5493                                       u64 ino, u64 nbytes)
5494 {
5495         struct btrfs_trans_handle *trans;
5496         struct btrfs_inode_item *ii;
5497         struct btrfs_key key;
5498         struct btrfs_key research_key;
5499         int err = 0;
5500         int ret;
5501
5502         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5503
5504         key.objectid = ino;
5505         key.type = BTRFS_INODE_ITEM_KEY;
5506         key.offset = 0;
5507
5508         trans = btrfs_start_transaction(root, 1);
5509         if (IS_ERR(trans)) {
5510                 ret = PTR_ERR(trans);
5511                 err |= ret;
5512                 goto out;
5513         }
5514
5515         btrfs_release_path(path);
5516         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5517         if (ret > 0)
5518                 ret = -ENOENT;
5519         if (ret) {
5520                 err |= ret;
5521                 goto fail;
5522         }
5523
5524         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5525                             struct btrfs_inode_item);
5526         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5527         btrfs_mark_buffer_dirty(path->nodes[0]);
5528 fail:
5529         btrfs_commit_transaction(trans, root);
5530 out:
5531         if (ret)
5532                 error("failed to set nbytes in inode %llu root %llu",
5533                       ino, root->root_key.objectid);
5534         else
5535                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5536                        root->root_key.objectid, nbytes);
5537
5538         /* research path */
5539         btrfs_release_path(path);
5540         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5541         err |= ret;
5542
5543         return err;
5544 }
5545
5546 /*
5547  * Set directory inode isize to @isize.
5548  *
5549  * Returns 0     on success.
5550  * Returns != 0  on error.
5551  */
5552 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5553                                    struct btrfs_path *path,
5554                                    u64 ino, u64 isize)
5555 {
5556         struct btrfs_trans_handle *trans;
5557         struct btrfs_inode_item *ii;
5558         struct btrfs_key key;
5559         struct btrfs_key research_key;
5560         int ret;
5561         int err = 0;
5562
5563         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5564
5565         key.objectid = ino;
5566         key.type = BTRFS_INODE_ITEM_KEY;
5567         key.offset = 0;
5568
5569         trans = btrfs_start_transaction(root, 1);
5570         if (IS_ERR(trans)) {
5571                 ret = PTR_ERR(trans);
5572                 err |= ret;
5573                 goto out;
5574         }
5575
5576         btrfs_release_path(path);
5577         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5578         if (ret > 0)
5579                 ret = -ENOENT;
5580         if (ret) {
5581                 err |= ret;
5582                 goto fail;
5583         }
5584
5585         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5586                             struct btrfs_inode_item);
5587         btrfs_set_inode_size(path->nodes[0], ii, isize);
5588         btrfs_mark_buffer_dirty(path->nodes[0]);
5589 fail:
5590         btrfs_commit_transaction(trans, root);
5591 out:
5592         if (ret)
5593                 error("failed to set isize in inode %llu root %llu",
5594                       ino, root->root_key.objectid);
5595         else
5596                 printf("Set isize in inode %llu root %llu to %llu\n",
5597                        ino, root->root_key.objectid, isize);
5598
5599         btrfs_release_path(path);
5600         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5601         err |= ret;
5602
5603         return err;
5604 }
5605
5606 /*
5607  * Wrapper function for btrfs_add_orphan_item().
5608  *
5609  * Returns 0     on success.
5610  * Returns != 0  on error.
5611  */
5612 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5613                                            struct btrfs_path *path, u64 ino)
5614 {
5615         struct btrfs_trans_handle *trans;
5616         struct btrfs_key research_key;
5617         int ret;
5618         int err = 0;
5619
5620         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5621
5622         trans = btrfs_start_transaction(root, 1);
5623         if (IS_ERR(trans)) {
5624                 ret = PTR_ERR(trans);
5625                 err |= ret;
5626                 goto out;
5627         }
5628
5629         btrfs_release_path(path);
5630         ret = btrfs_add_orphan_item(trans, root, path, ino);
5631         err |= ret;
5632         btrfs_commit_transaction(trans, root);
5633 out:
5634         if (ret)
5635                 error("failed to add inode %llu as orphan item root %llu",
5636                       ino, root->root_key.objectid);
5637         else
5638                 printf("Added inode %llu as orphan item root %llu\n",
5639                        ino, root->root_key.objectid);
5640
5641         btrfs_release_path(path);
5642         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5643         err |= ret;
5644
5645         return err;
5646 }
5647
5648 /* Set inode_item nlink to @ref_count.
5649  * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5650  *
5651  * Returns 0 on success
5652  */
5653 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5654                                       struct btrfs_path *path, u64 ino,
5655                                       const char *name, u32 namelen,
5656                                       u64 ref_count, u8 filetype, u64 *nlink)
5657 {
5658         struct btrfs_trans_handle *trans;
5659         struct btrfs_inode_item *ii;
5660         struct btrfs_key key;
5661         struct btrfs_key old_key;
5662         char namebuf[BTRFS_NAME_LEN] = {0};
5663         int name_len;
5664         int ret;
5665         int ret2;
5666
5667         /* save the key */
5668         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5669
5670         if (name && namelen) {
5671                 ASSERT(namelen <= BTRFS_NAME_LEN);
5672                 memcpy(namebuf, name, namelen);
5673                 name_len = namelen;
5674         } else {
5675                 sprintf(namebuf, "%llu", ino);
5676                 name_len = count_digits(ino);
5677                 printf("Can't find file name for inode %llu, use %s instead\n",
5678                        ino, namebuf);
5679         }
5680
5681         trans = btrfs_start_transaction(root, 1);
5682         if (IS_ERR(trans)) {
5683                 ret = PTR_ERR(trans);
5684                 goto out;
5685         }
5686
5687         btrfs_release_path(path);
5688         /* if refs is 0, put it into lostfound */
5689         if (ref_count == 0) {
5690                 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5691                                               name_len, filetype, &ref_count);
5692                 if (ret)
5693                         goto fail;
5694         }
5695
5696         /* reset inode_item's nlink to ref_count */
5697         key.objectid = ino;
5698         key.type = BTRFS_INODE_ITEM_KEY;
5699         key.offset = 0;
5700
5701         btrfs_release_path(path);
5702         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5703         if (ret > 0)
5704                 ret = -ENOENT;
5705         if (ret)
5706                 goto fail;
5707
5708         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5709                             struct btrfs_inode_item);
5710         btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5711         btrfs_mark_buffer_dirty(path->nodes[0]);
5712
5713         if (nlink)
5714                 *nlink = ref_count;
5715 fail:
5716         btrfs_commit_transaction(trans, root);
5717 out:
5718         if (ret)
5719                 error(
5720         "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5721                        root->objectid, ino, namebuf, filetype);
5722         else
5723                 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5724                        root->objectid, ino, namebuf, filetype);
5725
5726         /* research */
5727         btrfs_release_path(path);
5728         ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5729         if (ret2 < 0)
5730                 return ret |= ret2;
5731         return ret;
5732 }
5733
5734 /*
5735  * Check INODE_ITEM and related ITEMs (the same inode number)
5736  * 1. check link count
5737  * 2. check inode ref/extref
5738  * 3. check dir item/index
5739  *
5740  * @ext_ref:    the EXTENDED_IREF feature
5741  *
5742  * Return 0 if no error occurred.
5743  * Return >0 for error or hit the traversal is done(by error bitmap)
5744  */
5745 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5746                             unsigned int ext_ref)
5747 {
5748         struct extent_buffer *node;
5749         struct btrfs_inode_item *ii;
5750         struct btrfs_key key;
5751         u64 inode_id;
5752         u32 mode;
5753         u64 nlink;
5754         u64 nbytes;
5755         u64 isize;
5756         u64 size = 0;
5757         u64 refs = 0;
5758         u64 extent_end = 0;
5759         u64 extent_size = 0;
5760         unsigned int dir;
5761         unsigned int nodatasum;
5762         int slot;
5763         int ret;
5764         int err = 0;
5765         char namebuf[BTRFS_NAME_LEN] = {0};
5766         u32 name_len = 0;
5767
5768         node = path->nodes[0];
5769         slot = path->slots[0];
5770
5771         btrfs_item_key_to_cpu(node, &key, slot);
5772         inode_id = key.objectid;
5773
5774         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5775                 ret = btrfs_next_item(root, path);
5776                 if (ret > 0)
5777                         err |= LAST_ITEM;
5778                 return err;
5779         }
5780
5781         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5782         isize = btrfs_inode_size(node, ii);
5783         nbytes = btrfs_inode_nbytes(node, ii);
5784         mode = btrfs_inode_mode(node, ii);
5785         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5786         nlink = btrfs_inode_nlink(node, ii);
5787         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5788
5789         while (1) {
5790                 ret = btrfs_next_item(root, path);
5791                 if (ret < 0) {
5792                         /* out will fill 'err' rusing current statistics */
5793                         goto out;
5794                 } else if (ret > 0) {
5795                         err |= LAST_ITEM;
5796                         goto out;
5797                 }
5798
5799                 node = path->nodes[0];
5800                 slot = path->slots[0];
5801                 btrfs_item_key_to_cpu(node, &key, slot);
5802                 if (key.objectid != inode_id)
5803                         goto out;
5804
5805                 switch (key.type) {
5806                 case BTRFS_INODE_REF_KEY:
5807                         ret = check_inode_ref(root, &key, path, namebuf,
5808                                               &name_len, &refs, mode);
5809                         err |= ret;
5810                         break;
5811                 case BTRFS_INODE_EXTREF_KEY:
5812                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5813                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5814                                         root->objectid, key.objectid,
5815                                         key.offset);
5816                         ret = check_inode_extref(root, &key, node, slot, &refs,
5817                                                  mode);
5818                         err |= ret;
5819                         break;
5820                 case BTRFS_DIR_ITEM_KEY:
5821                 case BTRFS_DIR_INDEX_KEY:
5822                         if (!dir) {
5823                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5824                                         root->objectid, inode_id,
5825                                         imode_to_type(mode), key.objectid,
5826                                         key.offset);
5827                         }
5828                         ret = check_dir_item(root, &key, path, &size, ext_ref);
5829                         err |= ret;
5830                         break;
5831                 case BTRFS_EXTENT_DATA_KEY:
5832                         if (dir) {
5833                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5834                                         root->objectid, inode_id, key.objectid,
5835                                         key.offset);
5836                         }
5837                         ret = check_file_extent(root, &key, node, slot,
5838                                                 nodatasum, &extent_size,
5839                                                 &extent_end);
5840                         err |= ret;
5841                         break;
5842                 case BTRFS_XATTR_ITEM_KEY:
5843                         break;
5844                 default:
5845                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5846                               key.objectid, key.type, key.offset);
5847                 }
5848         }
5849
5850 out:
5851         /* verify INODE_ITEM nlink/isize/nbytes */
5852         if (dir) {
5853                 if (repair && (err & DIR_COUNT_AGAIN)) {
5854                         err &= ~DIR_COUNT_AGAIN;
5855                         count_dir_isize(root, inode_id, &size);
5856                 }
5857
5858                 if ((nlink != 1 || refs != 1) && repair) {
5859                         ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5860                                 namebuf, name_len, refs, imode_to_type(mode),
5861                                 &nlink);
5862                 }
5863
5864                 if (nlink != 1) {
5865                         err |= LINK_COUNT_ERROR;
5866                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5867                               root->objectid, inode_id, nlink);
5868                 }
5869
5870                 /*
5871                  * Just a warning, as dir inode nbytes is just an
5872                  * instructive value.
5873                  */
5874                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5875                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5876                                 root->objectid, inode_id,
5877                                 root->fs_info->nodesize);
5878                 }
5879
5880                 if (isize != size) {
5881                         if (repair)
5882                                 ret = repair_dir_isize_lowmem(root, path,
5883                                                               inode_id, size);
5884                         if (!repair || ret) {
5885                                 err |= ISIZE_ERROR;
5886                                 error(
5887                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5888                                       root->objectid, inode_id, isize, size);
5889                         }
5890                 }
5891         } else {
5892                 if (nlink != refs) {
5893                         if (repair)
5894                                 ret = repair_inode_nlinks_lowmem(root, path,
5895                                          inode_id, namebuf, name_len, refs,
5896                                          imode_to_type(mode), &nlink);
5897                         if (!repair || ret) {
5898                                 err |= LINK_COUNT_ERROR;
5899                                 error(
5900                 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5901                                       root->objectid, inode_id, nlink, refs);
5902                         }
5903                 } else if (!nlink) {
5904                         if (repair)
5905                                 ret = repair_inode_orphan_item_lowmem(root,
5906                                                               path, inode_id);
5907                         if (!repair || ret) {
5908                                 err |= ORPHAN_ITEM;
5909                                 error("root %llu INODE[%llu] is orphan item",
5910                                       root->objectid, inode_id);
5911                         }
5912                 }
5913
5914                 if (!nbytes && !no_holes && extent_end < isize) {
5915                         if (repair)
5916                                 ret = punch_extent_hole(root, inode_id,
5917                                                 extent_end, isize - extent_end);
5918                         if (!repair || ret) {
5919                                 err |= NBYTES_ERROR;
5920                                 error(
5921         "root %llu INODE[%llu] size %llu should have a file extent hole",
5922                                       root->objectid, inode_id, isize);
5923                         }
5924                 }
5925
5926                 if (nbytes != extent_size) {
5927                         if (repair)
5928                                 ret = repair_inode_nbytes_lowmem(root, path,
5929                                                          inode_id, extent_size);
5930                         if (!repair || ret) {
5931                                 err |= NBYTES_ERROR;
5932                                 error(
5933         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5934                                       root->objectid, inode_id, nbytes,
5935                                       extent_size);
5936                         }
5937                 }
5938         }
5939
5940         return err;
5941 }
5942
5943 /*
5944  * Insert the missing inode item and inode ref.
5945  *
5946  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
5947  * Root dir should be handled specially because root dir is the root of fs.
5948  *
5949  * returns err (>0 or 0) after repair
5950  */
5951 static int repair_fs_first_inode(struct btrfs_root *root, int err)
5952 {
5953         struct btrfs_trans_handle *trans;
5954         struct btrfs_key key;
5955         struct btrfs_path path;
5956         int filetype = BTRFS_FT_DIR;
5957         int ret = 0;
5958
5959         btrfs_init_path(&path);
5960
5961         if (err & INODE_REF_MISSING) {
5962                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5963                 key.type = BTRFS_INODE_REF_KEY;
5964                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5965
5966                 trans = btrfs_start_transaction(root, 1);
5967                 if (IS_ERR(trans)) {
5968                         ret = PTR_ERR(trans);
5969                         goto out;
5970                 }
5971
5972                 btrfs_release_path(&path);
5973                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
5974                 if (ret)
5975                         goto trans_fail;
5976
5977                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
5978                                              BTRFS_FIRST_FREE_OBJECTID,
5979                                              BTRFS_FIRST_FREE_OBJECTID, 0);
5980                 if (ret)
5981                         goto trans_fail;
5982
5983                 printf("Add INODE_REF[%llu %llu] name %s\n",
5984                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
5985                        "..");
5986                 err &= ~INODE_REF_MISSING;
5987 trans_fail:
5988                 if (ret)
5989                         error("fail to insert first inode's ref");
5990                 btrfs_commit_transaction(trans, root);
5991         }
5992
5993         if (err & INODE_ITEM_MISSING) {
5994                 ret = repair_inode_item_missing(root,
5995                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
5996                 if (ret)
5997                         goto out;
5998                 err &= ~INODE_ITEM_MISSING;
5999         }
6000 out:
6001         if (ret)
6002                 error("fail to repair first inode");
6003         btrfs_release_path(&path);
6004         return err;
6005 }
6006
6007 /*
6008  * check first root dir's inode_item and inode_ref
6009  *
6010  * returns 0 means no error
6011  * returns >0 means error
6012  * returns <0 means fatal error
6013  */
6014 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6015 {
6016         struct btrfs_path path;
6017         struct btrfs_key key;
6018         struct btrfs_inode_item *ii;
6019         u64 index;
6020         u32 mode;
6021         int err = 0;
6022         int ret;
6023
6024         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6025         key.type = BTRFS_INODE_ITEM_KEY;
6026         key.offset = 0;
6027
6028         /* For root being dropped, we don't need to check first inode */
6029         if (btrfs_root_refs(&root->root_item) == 0 &&
6030             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6031             BTRFS_FIRST_FREE_OBJECTID)
6032                 return 0;
6033
6034         btrfs_init_path(&path);
6035         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6036         if (ret < 0)
6037                 goto out;
6038         if (ret > 0) {
6039                 ret = 0;
6040                 err |= INODE_ITEM_MISSING;
6041         } else {
6042                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6043                                     struct btrfs_inode_item);
6044                 mode = btrfs_inode_mode(path.nodes[0], ii);
6045                 if (imode_to_type(mode) != BTRFS_FT_DIR)
6046                         err |= INODE_ITEM_MISMATCH;
6047         }
6048
6049         /* lookup first inode ref */
6050         key.offset = BTRFS_FIRST_FREE_OBJECTID;
6051         key.type = BTRFS_INODE_REF_KEY;
6052         /* special index value */
6053         index = 0;
6054
6055         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6056         if (ret < 0)
6057                 goto out;
6058         err |= ret;
6059
6060 out:
6061         btrfs_release_path(&path);
6062
6063         if (err && repair)
6064                 err = repair_fs_first_inode(root, err);
6065
6066         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6067                 error("root dir INODE_ITEM is %s",
6068                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6069         if (err & INODE_REF_MISSING)
6070                 error("root dir INODE_REF is missing");
6071
6072         return ret < 0 ? ret : err;
6073 }
6074
6075 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6076                                                 u64 parent, u64 root)
6077 {
6078         struct rb_node *node;
6079         struct tree_backref *back = NULL;
6080         struct tree_backref match = {
6081                 .node = {
6082                         .is_data = 0,
6083                 },
6084         };
6085
6086         if (parent) {
6087                 match.parent = parent;
6088                 match.node.full_backref = 1;
6089         } else {
6090                 match.root = root;
6091         }
6092
6093         node = rb_search(&rec->backref_tree, &match.node.node,
6094                          (rb_compare_keys)compare_extent_backref, NULL);
6095         if (node)
6096                 back = to_tree_backref(rb_node_to_extent_backref(node));
6097
6098         return back;
6099 }
6100
6101 static struct data_backref *find_data_backref(struct extent_record *rec,
6102                                                 u64 parent, u64 root,
6103                                                 u64 owner, u64 offset,
6104                                                 int found_ref,
6105                                                 u64 disk_bytenr, u64 bytes)
6106 {
6107         struct rb_node *node;
6108         struct data_backref *back = NULL;
6109         struct data_backref match = {
6110                 .node = {
6111                         .is_data = 1,
6112                 },
6113                 .owner = owner,
6114                 .offset = offset,
6115                 .bytes = bytes,
6116                 .found_ref = found_ref,
6117                 .disk_bytenr = disk_bytenr,
6118         };
6119
6120         if (parent) {
6121                 match.parent = parent;
6122                 match.node.full_backref = 1;
6123         } else {
6124                 match.root = root;
6125         }
6126
6127         node = rb_search(&rec->backref_tree, &match.node.node,
6128                          (rb_compare_keys)compare_extent_backref, NULL);
6129         if (node)
6130                 back = to_data_backref(rb_node_to_extent_backref(node));
6131
6132         return back;
6133 }
6134 /*
6135  * Iterate all item on the tree and call check_inode_item() to check.
6136  *
6137  * @root:       the root of the tree to be checked.
6138  * @ext_ref:    the EXTENDED_IREF feature
6139  *
6140  * Return 0 if no error found.
6141  * Return <0 for error.
6142  */
6143 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6144 {
6145         struct btrfs_path path;
6146         struct node_refs nrefs;
6147         struct btrfs_root_item *root_item = &root->root_item;
6148         int ret;
6149         int level;
6150         int err = 0;
6151
6152         /*
6153          * We need to manually check the first inode item(256)
6154          * As the following traversal function will only start from
6155          * the first inode item in the leaf, if inode item(256) is missing
6156          * we will just skip it forever.
6157          */
6158         ret = check_fs_first_inode(root, ext_ref);
6159         if (ret < 0)
6160                 return ret;
6161         err |= !!ret;
6162
6163         memset(&nrefs, 0, sizeof(nrefs));
6164         level = btrfs_header_level(root->node);
6165         btrfs_init_path(&path);
6166
6167         if (btrfs_root_refs(root_item) > 0 ||
6168             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6169                 path.nodes[level] = root->node;
6170                 path.slots[level] = 0;
6171                 extent_buffer_get(root->node);
6172         } else {
6173                 struct btrfs_key key;
6174
6175                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6176                 level = root_item->drop_level;
6177                 path.lowest_level = level;
6178                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6179                 if (ret < 0)
6180                         goto out;
6181                 ret = 0;
6182         }
6183
6184         while (1) {
6185                 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
6186                 err |= !!ret;
6187
6188                 /* if ret is negative, walk shall stop */
6189                 if (ret < 0) {
6190                         ret = err;
6191                         break;
6192                 }
6193
6194                 ret = walk_up_tree_v2(root, &path, &level);
6195                 if (ret != 0) {
6196                         /* Normal exit, reset ret to err */
6197                         ret = err;
6198                         break;
6199                 }
6200         }
6201
6202 out:
6203         btrfs_release_path(&path);
6204         return ret;
6205 }
6206
6207 /*
6208  * Find the relative ref for root_ref and root_backref.
6209  *
6210  * @root:       the root of the root tree.
6211  * @ref_key:    the key of the root ref.
6212  *
6213  * Return 0 if no error occurred.
6214  */
6215 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6216                           struct extent_buffer *node, int slot)
6217 {
6218         struct btrfs_path path;
6219         struct btrfs_key key;
6220         struct btrfs_root_ref *ref;
6221         struct btrfs_root_ref *backref;
6222         char ref_name[BTRFS_NAME_LEN] = {0};
6223         char backref_name[BTRFS_NAME_LEN] = {0};
6224         u64 ref_dirid;
6225         u64 ref_seq;
6226         u32 ref_namelen;
6227         u64 backref_dirid;
6228         u64 backref_seq;
6229         u32 backref_namelen;
6230         u32 len;
6231         int ret;
6232         int err = 0;
6233
6234         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6235         ref_dirid = btrfs_root_ref_dirid(node, ref);
6236         ref_seq = btrfs_root_ref_sequence(node, ref);
6237         ref_namelen = btrfs_root_ref_name_len(node, ref);
6238
6239         if (ref_namelen <= BTRFS_NAME_LEN) {
6240                 len = ref_namelen;
6241         } else {
6242                 len = BTRFS_NAME_LEN;
6243                 warning("%s[%llu %llu] ref_name too long",
6244                         ref_key->type == BTRFS_ROOT_REF_KEY ?
6245                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6246                         ref_key->offset);
6247         }
6248         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6249
6250         /* Find relative root_ref */
6251         key.objectid = ref_key->offset;
6252         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6253         key.offset = ref_key->objectid;
6254
6255         btrfs_init_path(&path);
6256         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6257         if (ret) {
6258                 err |= ROOT_REF_MISSING;
6259                 error("%s[%llu %llu] couldn't find relative ref",
6260                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6261                       "ROOT_REF" : "ROOT_BACKREF",
6262                       ref_key->objectid, ref_key->offset);
6263                 goto out;
6264         }
6265
6266         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6267                                  struct btrfs_root_ref);
6268         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6269         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6270         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6271
6272         if (backref_namelen <= BTRFS_NAME_LEN) {
6273                 len = backref_namelen;
6274         } else {
6275                 len = BTRFS_NAME_LEN;
6276                 warning("%s[%llu %llu] ref_name too long",
6277                         key.type == BTRFS_ROOT_REF_KEY ?
6278                         "ROOT_REF" : "ROOT_BACKREF",
6279                         key.objectid, key.offset);
6280         }
6281         read_extent_buffer(path.nodes[0], backref_name,
6282                            (unsigned long)(backref + 1), len);
6283
6284         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6285             ref_namelen != backref_namelen ||
6286             strncmp(ref_name, backref_name, len)) {
6287                 err |= ROOT_REF_MISMATCH;
6288                 error("%s[%llu %llu] mismatch relative ref",
6289                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6290                       "ROOT_REF" : "ROOT_BACKREF",
6291                       ref_key->objectid, ref_key->offset);
6292         }
6293 out:
6294         btrfs_release_path(&path);
6295         return err;
6296 }
6297
6298 /*
6299  * Check all fs/file tree in low_memory mode.
6300  *
6301  * 1. for fs tree root item, call check_fs_root_v2()
6302  * 2. for fs tree root ref/backref, call check_root_ref()
6303  *
6304  * Return 0 if no error occurred.
6305  */
6306 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6307 {
6308         struct btrfs_root *tree_root = fs_info->tree_root;
6309         struct btrfs_root *cur_root = NULL;
6310         struct btrfs_path path;
6311         struct btrfs_key key;
6312         struct extent_buffer *node;
6313         unsigned int ext_ref;
6314         int slot;
6315         int ret;
6316         int err = 0;
6317
6318         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6319
6320         btrfs_init_path(&path);
6321         key.objectid = BTRFS_FS_TREE_OBJECTID;
6322         key.offset = 0;
6323         key.type = BTRFS_ROOT_ITEM_KEY;
6324
6325         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6326         if (ret < 0) {
6327                 err = ret;
6328                 goto out;
6329         } else if (ret > 0) {
6330                 err = -ENOENT;
6331                 goto out;
6332         }
6333
6334         while (1) {
6335                 node = path.nodes[0];
6336                 slot = path.slots[0];
6337                 btrfs_item_key_to_cpu(node, &key, slot);
6338                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6339                         goto out;
6340                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6341                     fs_root_objectid(key.objectid)) {
6342                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6343                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6344                                                                        &key);
6345                         } else {
6346                                 key.offset = (u64)-1;
6347                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6348                         }
6349
6350                         if (IS_ERR(cur_root)) {
6351                                 error("Fail to read fs/subvol tree: %lld",
6352                                       key.objectid);
6353                                 err = -EIO;
6354                                 goto next;
6355                         }
6356
6357                         ret = check_fs_root_v2(cur_root, ext_ref);
6358                         err |= ret;
6359
6360                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6361                                 btrfs_free_fs_root(cur_root);
6362                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6363                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6364                         ret = check_root_ref(tree_root, &key, node, slot);
6365                         err |= ret;
6366                 }
6367 next:
6368                 ret = btrfs_next_item(tree_root, &path);
6369                 if (ret > 0)
6370                         goto out;
6371                 if (ret < 0) {
6372                         err = ret;
6373                         goto out;
6374                 }
6375         }
6376
6377 out:
6378         btrfs_release_path(&path);
6379         return err;
6380 }
6381
6382 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6383                           struct cache_tree *root_cache)
6384 {
6385         int ret;
6386
6387         if (!ctx.progress_enabled)
6388                 fprintf(stderr, "checking fs roots\n");
6389         if (check_mode == CHECK_MODE_LOWMEM)
6390                 ret = check_fs_roots_v2(fs_info);
6391         else
6392                 ret = check_fs_roots(fs_info, root_cache);
6393
6394         return ret;
6395 }
6396
6397 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6398 {
6399         struct extent_backref *back, *tmp;
6400         struct tree_backref *tback;
6401         struct data_backref *dback;
6402         u64 found = 0;
6403         int err = 0;
6404
6405         rbtree_postorder_for_each_entry_safe(back, tmp,
6406                                              &rec->backref_tree, node) {
6407                 if (!back->found_extent_tree) {
6408                         err = 1;
6409                         if (!print_errs)
6410                                 goto out;
6411                         if (back->is_data) {
6412                                 dback = to_data_backref(back);
6413                                 fprintf(stderr, "Data backref %llu %s %llu"
6414                                         " owner %llu offset %llu num_refs %lu"
6415                                         " not found in extent tree\n",
6416                                         (unsigned long long)rec->start,
6417                                         back->full_backref ?
6418                                         "parent" : "root",
6419                                         back->full_backref ?
6420                                         (unsigned long long)dback->parent:
6421                                         (unsigned long long)dback->root,
6422                                         (unsigned long long)dback->owner,
6423                                         (unsigned long long)dback->offset,
6424                                         (unsigned long)dback->num_refs);
6425                         } else {
6426                                 tback = to_tree_backref(back);
6427                                 fprintf(stderr, "Tree backref %llu parent %llu"
6428                                         " root %llu not found in extent tree\n",
6429                                         (unsigned long long)rec->start,
6430                                         (unsigned long long)tback->parent,
6431                                         (unsigned long long)tback->root);
6432                         }
6433                 }
6434                 if (!back->is_data && !back->found_ref) {
6435                         err = 1;
6436                         if (!print_errs)
6437                                 goto out;
6438                         tback = to_tree_backref(back);
6439                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6440                                 (unsigned long long)rec->start,
6441                                 back->full_backref ? "parent" : "root",
6442                                 back->full_backref ?
6443                                 (unsigned long long)tback->parent :
6444                                 (unsigned long long)tback->root, back);
6445                 }
6446                 if (back->is_data) {
6447                         dback = to_data_backref(back);
6448                         if (dback->found_ref != dback->num_refs) {
6449                                 err = 1;
6450                                 if (!print_errs)
6451                                         goto out;
6452                                 fprintf(stderr, "Incorrect local backref count"
6453                                         " on %llu %s %llu owner %llu"
6454                                         " offset %llu found %u wanted %u back %p\n",
6455                                         (unsigned long long)rec->start,
6456                                         back->full_backref ?
6457                                         "parent" : "root",
6458                                         back->full_backref ?
6459                                         (unsigned long long)dback->parent:
6460                                         (unsigned long long)dback->root,
6461                                         (unsigned long long)dback->owner,
6462                                         (unsigned long long)dback->offset,
6463                                         dback->found_ref, dback->num_refs, back);
6464                         }
6465                         if (dback->disk_bytenr != rec->start) {
6466                                 err = 1;
6467                                 if (!print_errs)
6468                                         goto out;
6469                                 fprintf(stderr, "Backref disk bytenr does not"
6470                                         " match extent record, bytenr=%llu, "
6471                                         "ref bytenr=%llu\n",
6472                                         (unsigned long long)rec->start,
6473                                         (unsigned long long)dback->disk_bytenr);
6474                         }
6475
6476                         if (dback->bytes != rec->nr) {
6477                                 err = 1;
6478                                 if (!print_errs)
6479                                         goto out;
6480                                 fprintf(stderr, "Backref bytes do not match "
6481                                         "extent backref, bytenr=%llu, ref "
6482                                         "bytes=%llu, backref bytes=%llu\n",
6483                                         (unsigned long long)rec->start,
6484                                         (unsigned long long)rec->nr,
6485                                         (unsigned long long)dback->bytes);
6486                         }
6487                 }
6488                 if (!back->is_data) {
6489                         found += 1;
6490                 } else {
6491                         dback = to_data_backref(back);
6492                         found += dback->found_ref;
6493                 }
6494         }
6495         if (found != rec->refs) {
6496                 err = 1;
6497                 if (!print_errs)
6498                         goto out;
6499                 fprintf(stderr, "Incorrect global backref count "
6500                         "on %llu found %llu wanted %llu\n",
6501                         (unsigned long long)rec->start,
6502                         (unsigned long long)found,
6503                         (unsigned long long)rec->refs);
6504         }
6505 out:
6506         return err;
6507 }
6508
6509 static void __free_one_backref(struct rb_node *node)
6510 {
6511         struct extent_backref *back = rb_node_to_extent_backref(node);
6512
6513         free(back);
6514 }
6515
6516 static void free_all_extent_backrefs(struct extent_record *rec)
6517 {
6518         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6519 }
6520
6521 static void free_extent_record_cache(struct cache_tree *extent_cache)
6522 {
6523         struct cache_extent *cache;
6524         struct extent_record *rec;
6525
6526         while (1) {
6527                 cache = first_cache_extent(extent_cache);
6528                 if (!cache)
6529                         break;
6530                 rec = container_of(cache, struct extent_record, cache);
6531                 remove_cache_extent(extent_cache, cache);
6532                 free_all_extent_backrefs(rec);
6533                 free(rec);
6534         }
6535 }
6536
6537 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6538                                  struct extent_record *rec)
6539 {
6540         if (rec->content_checked && rec->owner_ref_checked &&
6541             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6542             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6543             !rec->bad_full_backref && !rec->crossing_stripes &&
6544             !rec->wrong_chunk_type) {
6545                 remove_cache_extent(extent_cache, &rec->cache);
6546                 free_all_extent_backrefs(rec);
6547                 list_del_init(&rec->list);
6548                 free(rec);
6549         }
6550         return 0;
6551 }
6552
6553 static int check_owner_ref(struct btrfs_root *root,
6554                             struct extent_record *rec,
6555                             struct extent_buffer *buf)
6556 {
6557         struct extent_backref *node, *tmp;
6558         struct tree_backref *back;
6559         struct btrfs_root *ref_root;
6560         struct btrfs_key key;
6561         struct btrfs_path path;
6562         struct extent_buffer *parent;
6563         int level;
6564         int found = 0;
6565         int ret;
6566
6567         rbtree_postorder_for_each_entry_safe(node, tmp,
6568                                              &rec->backref_tree, node) {
6569                 if (node->is_data)
6570                         continue;
6571                 if (!node->found_ref)
6572                         continue;
6573                 if (node->full_backref)
6574                         continue;
6575                 back = to_tree_backref(node);
6576                 if (btrfs_header_owner(buf) == back->root)
6577                         return 0;
6578         }
6579         BUG_ON(rec->is_root);
6580
6581         /* try to find the block by search corresponding fs tree */
6582         key.objectid = btrfs_header_owner(buf);
6583         key.type = BTRFS_ROOT_ITEM_KEY;
6584         key.offset = (u64)-1;
6585
6586         ref_root = btrfs_read_fs_root(root->fs_info, &key);
6587         if (IS_ERR(ref_root))
6588                 return 1;
6589
6590         level = btrfs_header_level(buf);
6591         if (level == 0)
6592                 btrfs_item_key_to_cpu(buf, &key, 0);
6593         else
6594                 btrfs_node_key_to_cpu(buf, &key, 0);
6595
6596         btrfs_init_path(&path);
6597         path.lowest_level = level + 1;
6598         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6599         if (ret < 0)
6600                 return 0;
6601
6602         parent = path.nodes[level + 1];
6603         if (parent && buf->start == btrfs_node_blockptr(parent,
6604                                                         path.slots[level + 1]))
6605                 found = 1;
6606
6607         btrfs_release_path(&path);
6608         return found ? 0 : 1;
6609 }
6610
6611 static int is_extent_tree_record(struct extent_record *rec)
6612 {
6613         struct extent_backref *node, *tmp;
6614         struct tree_backref *back;
6615         int is_extent = 0;
6616
6617         rbtree_postorder_for_each_entry_safe(node, tmp,
6618                                              &rec->backref_tree, node) {
6619                 if (node->is_data)
6620                         return 0;
6621                 back = to_tree_backref(node);
6622                 if (node->full_backref)
6623                         return 0;
6624                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6625                         is_extent = 1;
6626         }
6627         return is_extent;
6628 }
6629
6630
6631 static int record_bad_block_io(struct btrfs_fs_info *info,
6632                                struct cache_tree *extent_cache,
6633                                u64 start, u64 len)
6634 {
6635         struct extent_record *rec;
6636         struct cache_extent *cache;
6637         struct btrfs_key key;
6638
6639         cache = lookup_cache_extent(extent_cache, start, len);
6640         if (!cache)
6641                 return 0;
6642
6643         rec = container_of(cache, struct extent_record, cache);
6644         if (!is_extent_tree_record(rec))
6645                 return 0;
6646
6647         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6648         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6649 }
6650
6651 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6652                        struct extent_buffer *buf, int slot)
6653 {
6654         if (btrfs_header_level(buf)) {
6655                 struct btrfs_key_ptr ptr1, ptr2;
6656
6657                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6658                                    sizeof(struct btrfs_key_ptr));
6659                 read_extent_buffer(buf, &ptr2,
6660                                    btrfs_node_key_ptr_offset(slot + 1),
6661                                    sizeof(struct btrfs_key_ptr));
6662                 write_extent_buffer(buf, &ptr1,
6663                                     btrfs_node_key_ptr_offset(slot + 1),
6664                                     sizeof(struct btrfs_key_ptr));
6665                 write_extent_buffer(buf, &ptr2,
6666                                     btrfs_node_key_ptr_offset(slot),
6667                                     sizeof(struct btrfs_key_ptr));
6668                 if (slot == 0) {
6669                         struct btrfs_disk_key key;
6670                         btrfs_node_key(buf, &key, 0);
6671                         btrfs_fixup_low_keys(root, path, &key,
6672                                              btrfs_header_level(buf) + 1);
6673                 }
6674         } else {
6675                 struct btrfs_item *item1, *item2;
6676                 struct btrfs_key k1, k2;
6677                 char *item1_data, *item2_data;
6678                 u32 item1_offset, item2_offset, item1_size, item2_size;
6679
6680                 item1 = btrfs_item_nr(slot);
6681                 item2 = btrfs_item_nr(slot + 1);
6682                 btrfs_item_key_to_cpu(buf, &k1, slot);
6683                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6684                 item1_offset = btrfs_item_offset(buf, item1);
6685                 item2_offset = btrfs_item_offset(buf, item2);
6686                 item1_size = btrfs_item_size(buf, item1);
6687                 item2_size = btrfs_item_size(buf, item2);
6688
6689                 item1_data = malloc(item1_size);
6690                 if (!item1_data)
6691                         return -ENOMEM;
6692                 item2_data = malloc(item2_size);
6693                 if (!item2_data) {
6694                         free(item1_data);
6695                         return -ENOMEM;
6696                 }
6697
6698                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6699                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6700
6701                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6702                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6703                 free(item1_data);
6704                 free(item2_data);
6705
6706                 btrfs_set_item_offset(buf, item1, item2_offset);
6707                 btrfs_set_item_offset(buf, item2, item1_offset);
6708                 btrfs_set_item_size(buf, item1, item2_size);
6709                 btrfs_set_item_size(buf, item2, item1_size);
6710
6711                 path->slots[0] = slot;
6712                 btrfs_set_item_key_unsafe(root, path, &k2);
6713                 path->slots[0] = slot + 1;
6714                 btrfs_set_item_key_unsafe(root, path, &k1);
6715         }
6716         return 0;
6717 }
6718
6719 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6720 {
6721         struct extent_buffer *buf;
6722         struct btrfs_key k1, k2;
6723         int i;
6724         int level = path->lowest_level;
6725         int ret = -EIO;
6726
6727         buf = path->nodes[level];
6728         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6729                 if (level) {
6730                         btrfs_node_key_to_cpu(buf, &k1, i);
6731                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6732                 } else {
6733                         btrfs_item_key_to_cpu(buf, &k1, i);
6734                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6735                 }
6736                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6737                         continue;
6738                 ret = swap_values(root, path, buf, i);
6739                 if (ret)
6740                         break;
6741                 btrfs_mark_buffer_dirty(buf);
6742                 i = 0;
6743         }
6744         return ret;
6745 }
6746
6747 static int delete_bogus_item(struct btrfs_root *root,
6748                              struct btrfs_path *path,
6749                              struct extent_buffer *buf, int slot)
6750 {
6751         struct btrfs_key key;
6752         int nritems = btrfs_header_nritems(buf);
6753
6754         btrfs_item_key_to_cpu(buf, &key, slot);
6755
6756         /* These are all the keys we can deal with missing. */
6757         if (key.type != BTRFS_DIR_INDEX_KEY &&
6758             key.type != BTRFS_EXTENT_ITEM_KEY &&
6759             key.type != BTRFS_METADATA_ITEM_KEY &&
6760             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6761             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6762                 return -1;
6763
6764         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6765                (unsigned long long)key.objectid, key.type,
6766                (unsigned long long)key.offset, slot, buf->start);
6767         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6768                               btrfs_item_nr_offset(slot + 1),
6769                               sizeof(struct btrfs_item) *
6770                               (nritems - slot - 1));
6771         btrfs_set_header_nritems(buf, nritems - 1);
6772         if (slot == 0) {
6773                 struct btrfs_disk_key disk_key;
6774
6775                 btrfs_item_key(buf, &disk_key, 0);
6776                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6777         }
6778         btrfs_mark_buffer_dirty(buf);
6779         return 0;
6780 }
6781
6782 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6783 {
6784         struct extent_buffer *buf;
6785         int i;
6786         int ret = 0;
6787
6788         /* We should only get this for leaves */
6789         BUG_ON(path->lowest_level);
6790         buf = path->nodes[0];
6791 again:
6792         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6793                 unsigned int shift = 0, offset;
6794
6795                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6796                     BTRFS_LEAF_DATA_SIZE(root)) {
6797                         if (btrfs_item_end_nr(buf, i) >
6798                             BTRFS_LEAF_DATA_SIZE(root)) {
6799                                 ret = delete_bogus_item(root, path, buf, i);
6800                                 if (!ret)
6801                                         goto again;
6802                                 fprintf(stderr, "item is off the end of the "
6803                                         "leaf, can't fix\n");
6804                                 ret = -EIO;
6805                                 break;
6806                         }
6807                         shift = BTRFS_LEAF_DATA_SIZE(root) -
6808                                 btrfs_item_end_nr(buf, i);
6809                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6810                            btrfs_item_offset_nr(buf, i - 1)) {
6811                         if (btrfs_item_end_nr(buf, i) >
6812                             btrfs_item_offset_nr(buf, i - 1)) {
6813                                 ret = delete_bogus_item(root, path, buf, i);
6814                                 if (!ret)
6815                                         goto again;
6816                                 fprintf(stderr, "items overlap, can't fix\n");
6817                                 ret = -EIO;
6818                                 break;
6819                         }
6820                         shift = btrfs_item_offset_nr(buf, i - 1) -
6821                                 btrfs_item_end_nr(buf, i);
6822                 }
6823                 if (!shift)
6824                         continue;
6825
6826                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6827                        i, shift, (unsigned long long)buf->start);
6828                 offset = btrfs_item_offset_nr(buf, i);
6829                 memmove_extent_buffer(buf,
6830                                       btrfs_leaf_data(buf) + offset + shift,
6831                                       btrfs_leaf_data(buf) + offset,
6832                                       btrfs_item_size_nr(buf, i));
6833                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6834                                       offset + shift);
6835                 btrfs_mark_buffer_dirty(buf);
6836         }
6837
6838         /*
6839          * We may have moved things, in which case we want to exit so we don't
6840          * write those changes out.  Once we have proper abort functionality in
6841          * progs this can be changed to something nicer.
6842          */
6843         BUG_ON(ret);
6844         return ret;
6845 }
6846
6847 /*
6848  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6849  * then just return -EIO.
6850  */
6851 static int try_to_fix_bad_block(struct btrfs_root *root,
6852                                 struct extent_buffer *buf,
6853                                 enum btrfs_tree_block_status status)
6854 {
6855         struct btrfs_trans_handle *trans;
6856         struct ulist *roots;
6857         struct ulist_node *node;
6858         struct btrfs_root *search_root;
6859         struct btrfs_path path;
6860         struct ulist_iterator iter;
6861         struct btrfs_key root_key, key;
6862         int ret;
6863
6864         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6865             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6866                 return -EIO;
6867
6868         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6869         if (ret)
6870                 return -EIO;
6871
6872         btrfs_init_path(&path);
6873         ULIST_ITER_INIT(&iter);
6874         while ((node = ulist_next(roots, &iter))) {
6875                 root_key.objectid = node->val;
6876                 root_key.type = BTRFS_ROOT_ITEM_KEY;
6877                 root_key.offset = (u64)-1;
6878
6879                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6880                 if (IS_ERR(root)) {
6881                         ret = -EIO;
6882                         break;
6883                 }
6884
6885
6886                 trans = btrfs_start_transaction(search_root, 0);
6887                 if (IS_ERR(trans)) {
6888                         ret = PTR_ERR(trans);
6889                         break;
6890                 }
6891
6892                 path.lowest_level = btrfs_header_level(buf);
6893                 path.skip_check_block = 1;
6894                 if (path.lowest_level)
6895                         btrfs_node_key_to_cpu(buf, &key, 0);
6896                 else
6897                         btrfs_item_key_to_cpu(buf, &key, 0);
6898                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6899                 if (ret) {
6900                         ret = -EIO;
6901                         btrfs_commit_transaction(trans, search_root);
6902                         break;
6903                 }
6904                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6905                         ret = fix_key_order(search_root, &path);
6906                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6907                         ret = fix_item_offset(search_root, &path);
6908                 if (ret) {
6909                         btrfs_commit_transaction(trans, search_root);
6910                         break;
6911                 }
6912                 btrfs_release_path(&path);
6913                 btrfs_commit_transaction(trans, search_root);
6914         }
6915         ulist_free(roots);
6916         btrfs_release_path(&path);
6917         return ret;
6918 }
6919
6920 static int check_block(struct btrfs_root *root,
6921                        struct cache_tree *extent_cache,
6922                        struct extent_buffer *buf, u64 flags)
6923 {
6924         struct extent_record *rec;
6925         struct cache_extent *cache;
6926         struct btrfs_key key;
6927         enum btrfs_tree_block_status status;
6928         int ret = 0;
6929         int level;
6930
6931         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6932         if (!cache)
6933                 return 1;
6934         rec = container_of(cache, struct extent_record, cache);
6935         rec->generation = btrfs_header_generation(buf);
6936
6937         level = btrfs_header_level(buf);
6938         if (btrfs_header_nritems(buf) > 0) {
6939
6940                 if (level == 0)
6941                         btrfs_item_key_to_cpu(buf, &key, 0);
6942                 else
6943                         btrfs_node_key_to_cpu(buf, &key, 0);
6944
6945                 rec->info_objectid = key.objectid;
6946         }
6947         rec->info_level = level;
6948
6949         if (btrfs_is_leaf(buf))
6950                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6951         else
6952                 status = btrfs_check_node(root, &rec->parent_key, buf);
6953
6954         if (status != BTRFS_TREE_BLOCK_CLEAN) {
6955                 if (repair)
6956                         status = try_to_fix_bad_block(root, buf, status);
6957                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6958                         ret = -EIO;
6959                         fprintf(stderr, "bad block %llu\n",
6960                                 (unsigned long long)buf->start);
6961                 } else {
6962                         /*
6963                          * Signal to callers we need to start the scan over
6964                          * again since we'll have cowed blocks.
6965                          */
6966                         ret = -EAGAIN;
6967                 }
6968         } else {
6969                 rec->content_checked = 1;
6970                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6971                         rec->owner_ref_checked = 1;
6972                 else {
6973                         ret = check_owner_ref(root, rec, buf);
6974                         if (!ret)
6975                                 rec->owner_ref_checked = 1;
6976                 }
6977         }
6978         if (!ret)
6979                 maybe_free_extent_rec(extent_cache, rec);
6980         return ret;
6981 }
6982
6983 #if 0
6984 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6985                                                 u64 parent, u64 root)
6986 {
6987         struct list_head *cur = rec->backrefs.next;
6988         struct extent_backref *node;
6989         struct tree_backref *back;
6990
6991         while(cur != &rec->backrefs) {
6992                 node = to_extent_backref(cur);
6993                 cur = cur->next;
6994                 if (node->is_data)
6995                         continue;
6996                 back = to_tree_backref(node);
6997                 if (parent > 0) {
6998                         if (!node->full_backref)
6999                                 continue;
7000                         if (parent == back->parent)
7001                                 return back;
7002                 } else {
7003                         if (node->full_backref)
7004                                 continue;
7005                         if (back->root == root)
7006                                 return back;
7007                 }
7008         }
7009         return NULL;
7010 }
7011 #endif
7012
7013 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7014                                                 u64 parent, u64 root)
7015 {
7016         struct tree_backref *ref = malloc(sizeof(*ref));
7017
7018         if (!ref)
7019                 return NULL;
7020         memset(&ref->node, 0, sizeof(ref->node));
7021         if (parent > 0) {
7022                 ref->parent = parent;
7023                 ref->node.full_backref = 1;
7024         } else {
7025                 ref->root = root;
7026                 ref->node.full_backref = 0;
7027         }
7028
7029         return ref;
7030 }
7031
7032 #if 0
7033 static struct data_backref *find_data_backref(struct extent_record *rec,
7034                                                 u64 parent, u64 root,
7035                                                 u64 owner, u64 offset,
7036                                                 int found_ref,
7037                                                 u64 disk_bytenr, u64 bytes)
7038 {
7039         struct list_head *cur = rec->backrefs.next;
7040         struct extent_backref *node;
7041         struct data_backref *back;
7042
7043         while(cur != &rec->backrefs) {
7044                 node = to_extent_backref(cur);
7045                 cur = cur->next;
7046                 if (!node->is_data)
7047                         continue;
7048                 back = to_data_backref(node);
7049                 if (parent > 0) {
7050                         if (!node->full_backref)
7051                                 continue;
7052                         if (parent == back->parent)
7053                                 return back;
7054                 } else {
7055                         if (node->full_backref)
7056                                 continue;
7057                         if (back->root == root && back->owner == owner &&
7058                             back->offset == offset) {
7059                                 if (found_ref && node->found_ref &&
7060                                     (back->bytes != bytes ||
7061                                     back->disk_bytenr != disk_bytenr))
7062                                         continue;
7063                                 return back;
7064                         }
7065                 }
7066         }
7067         return NULL;
7068 }
7069 #endif
7070
7071 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7072                                                 u64 parent, u64 root,
7073                                                 u64 owner, u64 offset,
7074                                                 u64 max_size)
7075 {
7076         struct data_backref *ref = malloc(sizeof(*ref));
7077
7078         if (!ref)
7079                 return NULL;
7080         memset(&ref->node, 0, sizeof(ref->node));
7081         ref->node.is_data = 1;
7082
7083         if (parent > 0) {
7084                 ref->parent = parent;
7085                 ref->owner = 0;
7086                 ref->offset = 0;
7087                 ref->node.full_backref = 1;
7088         } else {
7089                 ref->root = root;
7090                 ref->owner = owner;
7091                 ref->offset = offset;
7092                 ref->node.full_backref = 0;
7093         }
7094         ref->bytes = max_size;
7095         ref->found_ref = 0;
7096         ref->num_refs = 0;
7097         if (max_size > rec->max_size)
7098                 rec->max_size = max_size;
7099         return ref;
7100 }
7101
7102 /* Check if the type of extent matches with its chunk */
7103 static void check_extent_type(struct extent_record *rec)
7104 {
7105         struct btrfs_block_group_cache *bg_cache;
7106
7107         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7108         if (!bg_cache)
7109                 return;
7110
7111         /* data extent, check chunk directly*/
7112         if (!rec->metadata) {
7113                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7114                         rec->wrong_chunk_type = 1;
7115                 return;
7116         }
7117
7118         /* metadata extent, check the obvious case first */
7119         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7120                                  BTRFS_BLOCK_GROUP_METADATA))) {
7121                 rec->wrong_chunk_type = 1;
7122                 return;
7123         }
7124
7125         /*
7126          * Check SYSTEM extent, as it's also marked as metadata, we can only
7127          * make sure it's a SYSTEM extent by its backref
7128          */
7129         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7130                 struct extent_backref *node;
7131                 struct tree_backref *tback;
7132                 u64 bg_type;
7133
7134                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7135                 if (node->is_data) {
7136                         /* tree block shouldn't have data backref */
7137                         rec->wrong_chunk_type = 1;
7138                         return;
7139                 }
7140                 tback = container_of(node, struct tree_backref, node);
7141
7142                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7143                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7144                 else
7145                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
7146                 if (!(bg_cache->flags & bg_type))
7147                         rec->wrong_chunk_type = 1;
7148         }
7149 }
7150
7151 /*
7152  * Allocate a new extent record, fill default values from @tmpl and insert int
7153  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7154  * the cache, otherwise it fails.
7155  */
7156 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7157                 struct extent_record *tmpl)
7158 {
7159         struct extent_record *rec;
7160         int ret = 0;
7161
7162         BUG_ON(tmpl->max_size == 0);
7163         rec = malloc(sizeof(*rec));
7164         if (!rec)
7165                 return -ENOMEM;
7166         rec->start = tmpl->start;
7167         rec->max_size = tmpl->max_size;
7168         rec->nr = max(tmpl->nr, tmpl->max_size);
7169         rec->found_rec = tmpl->found_rec;
7170         rec->content_checked = tmpl->content_checked;
7171         rec->owner_ref_checked = tmpl->owner_ref_checked;
7172         rec->num_duplicates = 0;
7173         rec->metadata = tmpl->metadata;
7174         rec->flag_block_full_backref = FLAG_UNSET;
7175         rec->bad_full_backref = 0;
7176         rec->crossing_stripes = 0;
7177         rec->wrong_chunk_type = 0;
7178         rec->is_root = tmpl->is_root;
7179         rec->refs = tmpl->refs;
7180         rec->extent_item_refs = tmpl->extent_item_refs;
7181         rec->parent_generation = tmpl->parent_generation;
7182         INIT_LIST_HEAD(&rec->backrefs);
7183         INIT_LIST_HEAD(&rec->dups);
7184         INIT_LIST_HEAD(&rec->list);
7185         rec->backref_tree = RB_ROOT;
7186         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7187         rec->cache.start = tmpl->start;
7188         rec->cache.size = tmpl->nr;
7189         ret = insert_cache_extent(extent_cache, &rec->cache);
7190         if (ret) {
7191                 free(rec);
7192                 return ret;
7193         }
7194         bytes_used += rec->nr;
7195
7196         if (tmpl->metadata)
7197                 rec->crossing_stripes = check_crossing_stripes(global_info,
7198                                 rec->start, global_info->nodesize);
7199         check_extent_type(rec);
7200         return ret;
7201 }
7202
7203 /*
7204  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7205  * some are hints:
7206  * - refs              - if found, increase refs
7207  * - is_root           - if found, set
7208  * - content_checked   - if found, set
7209  * - owner_ref_checked - if found, set
7210  *
7211  * If not found, create a new one, initialize and insert.
7212  */
7213 static int add_extent_rec(struct cache_tree *extent_cache,
7214                 struct extent_record *tmpl)
7215 {
7216         struct extent_record *rec;
7217         struct cache_extent *cache;
7218         int ret = 0;
7219         int dup = 0;
7220
7221         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7222         if (cache) {
7223                 rec = container_of(cache, struct extent_record, cache);
7224                 if (tmpl->refs)
7225                         rec->refs++;
7226                 if (rec->nr == 1)
7227                         rec->nr = max(tmpl->nr, tmpl->max_size);
7228
7229                 /*
7230                  * We need to make sure to reset nr to whatever the extent
7231                  * record says was the real size, this way we can compare it to
7232                  * the backrefs.
7233                  */
7234                 if (tmpl->found_rec) {
7235                         if (tmpl->start != rec->start || rec->found_rec) {
7236                                 struct extent_record *tmp;
7237
7238                                 dup = 1;
7239                                 if (list_empty(&rec->list))
7240                                         list_add_tail(&rec->list,
7241                                                       &duplicate_extents);
7242
7243                                 /*
7244                                  * We have to do this song and dance in case we
7245                                  * find an extent record that falls inside of
7246                                  * our current extent record but does not have
7247                                  * the same objectid.
7248                                  */
7249                                 tmp = malloc(sizeof(*tmp));
7250                                 if (!tmp)
7251                                         return -ENOMEM;
7252                                 tmp->start = tmpl->start;
7253                                 tmp->max_size = tmpl->max_size;
7254                                 tmp->nr = tmpl->nr;
7255                                 tmp->found_rec = 1;
7256                                 tmp->metadata = tmpl->metadata;
7257                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7258                                 INIT_LIST_HEAD(&tmp->list);
7259                                 list_add_tail(&tmp->list, &rec->dups);
7260                                 rec->num_duplicates++;
7261                         } else {
7262                                 rec->nr = tmpl->nr;
7263                                 rec->found_rec = 1;
7264                         }
7265                 }
7266
7267                 if (tmpl->extent_item_refs && !dup) {
7268                         if (rec->extent_item_refs) {
7269                                 fprintf(stderr, "block %llu rec "
7270                                         "extent_item_refs %llu, passed %llu\n",
7271                                         (unsigned long long)tmpl->start,
7272                                         (unsigned long long)
7273                                                         rec->extent_item_refs,
7274                                         (unsigned long long)tmpl->extent_item_refs);
7275                         }
7276                         rec->extent_item_refs = tmpl->extent_item_refs;
7277                 }
7278                 if (tmpl->is_root)
7279                         rec->is_root = 1;
7280                 if (tmpl->content_checked)
7281                         rec->content_checked = 1;
7282                 if (tmpl->owner_ref_checked)
7283                         rec->owner_ref_checked = 1;
7284                 memcpy(&rec->parent_key, &tmpl->parent_key,
7285                                 sizeof(tmpl->parent_key));
7286                 if (tmpl->parent_generation)
7287                         rec->parent_generation = tmpl->parent_generation;
7288                 if (rec->max_size < tmpl->max_size)
7289                         rec->max_size = tmpl->max_size;
7290
7291                 /*
7292                  * A metadata extent can't cross stripe_len boundary, otherwise
7293                  * kernel scrub won't be able to handle it.
7294                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7295                  * it.
7296                  */
7297                 if (tmpl->metadata)
7298                         rec->crossing_stripes = check_crossing_stripes(
7299                                         global_info, rec->start,
7300                                         global_info->nodesize);
7301                 check_extent_type(rec);
7302                 maybe_free_extent_rec(extent_cache, rec);
7303                 return ret;
7304         }
7305
7306         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7307
7308         return ret;
7309 }
7310
7311 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7312                             u64 parent, u64 root, int found_ref)
7313 {
7314         struct extent_record *rec;
7315         struct tree_backref *back;
7316         struct cache_extent *cache;
7317         int ret;
7318         bool insert = false;
7319
7320         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7321         if (!cache) {
7322                 struct extent_record tmpl;
7323
7324                 memset(&tmpl, 0, sizeof(tmpl));
7325                 tmpl.start = bytenr;
7326                 tmpl.nr = 1;
7327                 tmpl.metadata = 1;
7328                 tmpl.max_size = 1;
7329
7330                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7331                 if (ret)
7332                         return ret;
7333
7334                 /* really a bug in cache_extent implement now */
7335                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7336                 if (!cache)
7337                         return -ENOENT;
7338         }
7339
7340         rec = container_of(cache, struct extent_record, cache);
7341         if (rec->start != bytenr) {
7342                 /*
7343                  * Several cause, from unaligned bytenr to over lapping extents
7344                  */
7345                 return -EEXIST;
7346         }
7347
7348         back = find_tree_backref(rec, parent, root);
7349         if (!back) {
7350                 back = alloc_tree_backref(rec, parent, root);
7351                 if (!back)
7352                         return -ENOMEM;
7353                 insert = true;
7354         }
7355
7356         if (found_ref) {
7357                 if (back->node.found_ref) {
7358                         fprintf(stderr, "Extent back ref already exists "
7359                                 "for %llu parent %llu root %llu \n",
7360                                 (unsigned long long)bytenr,
7361                                 (unsigned long long)parent,
7362                                 (unsigned long long)root);
7363                 }
7364                 back->node.found_ref = 1;
7365         } else {
7366                 if (back->node.found_extent_tree) {
7367                         fprintf(stderr, "Extent back ref already exists "
7368                                 "for %llu parent %llu root %llu \n",
7369                                 (unsigned long long)bytenr,
7370                                 (unsigned long long)parent,
7371                                 (unsigned long long)root);
7372                 }
7373                 back->node.found_extent_tree = 1;
7374         }
7375         if (insert)
7376                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7377                         compare_extent_backref));
7378         check_extent_type(rec);
7379         maybe_free_extent_rec(extent_cache, rec);
7380         return 0;
7381 }
7382
7383 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7384                             u64 parent, u64 root, u64 owner, u64 offset,
7385                             u32 num_refs, int found_ref, u64 max_size)
7386 {
7387         struct extent_record *rec;
7388         struct data_backref *back;
7389         struct cache_extent *cache;
7390         int ret;
7391         bool insert = false;
7392
7393         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7394         if (!cache) {
7395                 struct extent_record tmpl;
7396
7397                 memset(&tmpl, 0, sizeof(tmpl));
7398                 tmpl.start = bytenr;
7399                 tmpl.nr = 1;
7400                 tmpl.max_size = max_size;
7401
7402                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7403                 if (ret)
7404                         return ret;
7405
7406                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7407                 if (!cache)
7408                         abort();
7409         }
7410
7411         rec = container_of(cache, struct extent_record, cache);
7412         if (rec->max_size < max_size)
7413                 rec->max_size = max_size;
7414
7415         /*
7416          * If found_ref is set then max_size is the real size and must match the
7417          * existing refs.  So if we have already found a ref then we need to
7418          * make sure that this ref matches the existing one, otherwise we need
7419          * to add a new backref so we can notice that the backrefs don't match
7420          * and we need to figure out who is telling the truth.  This is to
7421          * account for that awful fsync bug I introduced where we'd end up with
7422          * a btrfs_file_extent_item that would have its length include multiple
7423          * prealloc extents or point inside of a prealloc extent.
7424          */
7425         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7426                                  bytenr, max_size);
7427         if (!back) {
7428                 back = alloc_data_backref(rec, parent, root, owner, offset,
7429                                           max_size);
7430                 BUG_ON(!back);
7431                 insert = true;
7432         }
7433
7434         if (found_ref) {
7435                 BUG_ON(num_refs != 1);
7436                 if (back->node.found_ref)
7437                         BUG_ON(back->bytes != max_size);
7438                 back->node.found_ref = 1;
7439                 back->found_ref += 1;
7440                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7441                         back->bytes = max_size;
7442                         back->disk_bytenr = bytenr;
7443
7444                         /* Need to reinsert if not already in the tree */
7445                         if (!insert) {
7446                                 rb_erase(&back->node.node, &rec->backref_tree);
7447                                 insert = true;
7448                         }
7449                 }
7450                 rec->refs += 1;
7451                 rec->content_checked = 1;
7452                 rec->owner_ref_checked = 1;
7453         } else {
7454                 if (back->node.found_extent_tree) {
7455                         fprintf(stderr, "Extent back ref already exists "
7456                                 "for %llu parent %llu root %llu "
7457                                 "owner %llu offset %llu num_refs %lu\n",
7458                                 (unsigned long long)bytenr,
7459                                 (unsigned long long)parent,
7460                                 (unsigned long long)root,
7461                                 (unsigned long long)owner,
7462                                 (unsigned long long)offset,
7463                                 (unsigned long)num_refs);
7464                 }
7465                 back->num_refs = num_refs;
7466                 back->node.found_extent_tree = 1;
7467         }
7468         if (insert)
7469                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7470                         compare_extent_backref));
7471
7472         maybe_free_extent_rec(extent_cache, rec);
7473         return 0;
7474 }
7475
7476 static int add_pending(struct cache_tree *pending,
7477                        struct cache_tree *seen, u64 bytenr, u32 size)
7478 {
7479         int ret;
7480         ret = add_cache_extent(seen, bytenr, size);
7481         if (ret)
7482                 return ret;
7483         add_cache_extent(pending, bytenr, size);
7484         return 0;
7485 }
7486
7487 static int pick_next_pending(struct cache_tree *pending,
7488                         struct cache_tree *reada,
7489                         struct cache_tree *nodes,
7490                         u64 last, struct block_info *bits, int bits_nr,
7491                         int *reada_bits)
7492 {
7493         unsigned long node_start = last;
7494         struct cache_extent *cache;
7495         int ret;
7496
7497         cache = search_cache_extent(reada, 0);
7498         if (cache) {
7499                 bits[0].start = cache->start;
7500                 bits[0].size = cache->size;
7501                 *reada_bits = 1;
7502                 return 1;
7503         }
7504         *reada_bits = 0;
7505         if (node_start > 32768)
7506                 node_start -= 32768;
7507
7508         cache = search_cache_extent(nodes, node_start);
7509         if (!cache)
7510                 cache = search_cache_extent(nodes, 0);
7511
7512         if (!cache) {
7513                  cache = search_cache_extent(pending, 0);
7514                  if (!cache)
7515                          return 0;
7516                  ret = 0;
7517                  do {
7518                          bits[ret].start = cache->start;
7519                          bits[ret].size = cache->size;
7520                          cache = next_cache_extent(cache);
7521                          ret++;
7522                  } while (cache && ret < bits_nr);
7523                  return ret;
7524         }
7525
7526         ret = 0;
7527         do {
7528                 bits[ret].start = cache->start;
7529                 bits[ret].size = cache->size;
7530                 cache = next_cache_extent(cache);
7531                 ret++;
7532         } while (cache && ret < bits_nr);
7533
7534         if (bits_nr - ret > 8) {
7535                 u64 lookup = bits[0].start + bits[0].size;
7536                 struct cache_extent *next;
7537                 next = search_cache_extent(pending, lookup);
7538                 while(next) {
7539                         if (next->start - lookup > 32768)
7540                                 break;
7541                         bits[ret].start = next->start;
7542                         bits[ret].size = next->size;
7543                         lookup = next->start + next->size;
7544                         ret++;
7545                         if (ret == bits_nr)
7546                                 break;
7547                         next = next_cache_extent(next);
7548                         if (!next)
7549                                 break;
7550                 }
7551         }
7552         return ret;
7553 }
7554
7555 static void free_chunk_record(struct cache_extent *cache)
7556 {
7557         struct chunk_record *rec;
7558
7559         rec = container_of(cache, struct chunk_record, cache);
7560         list_del_init(&rec->list);
7561         list_del_init(&rec->dextents);
7562         free(rec);
7563 }
7564
7565 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7566 {
7567         cache_tree_free_extents(chunk_cache, free_chunk_record);
7568 }
7569
7570 static void free_device_record(struct rb_node *node)
7571 {
7572         struct device_record *rec;
7573
7574         rec = container_of(node, struct device_record, node);
7575         free(rec);
7576 }
7577
7578 FREE_RB_BASED_TREE(device_cache, free_device_record);
7579
7580 int insert_block_group_record(struct block_group_tree *tree,
7581                               struct block_group_record *bg_rec)
7582 {
7583         int ret;
7584
7585         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7586         if (ret)
7587                 return ret;
7588
7589         list_add_tail(&bg_rec->list, &tree->block_groups);
7590         return 0;
7591 }
7592
7593 static void free_block_group_record(struct cache_extent *cache)
7594 {
7595         struct block_group_record *rec;
7596
7597         rec = container_of(cache, struct block_group_record, cache);
7598         list_del_init(&rec->list);
7599         free(rec);
7600 }
7601
7602 void free_block_group_tree(struct block_group_tree *tree)
7603 {
7604         cache_tree_free_extents(&tree->tree, free_block_group_record);
7605 }
7606
7607 int insert_device_extent_record(struct device_extent_tree *tree,
7608                                 struct device_extent_record *de_rec)
7609 {
7610         int ret;
7611
7612         /*
7613          * Device extent is a bit different from the other extents, because
7614          * the extents which belong to the different devices may have the
7615          * same start and size, so we need use the special extent cache
7616          * search/insert functions.
7617          */
7618         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7619         if (ret)
7620                 return ret;
7621
7622         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7623         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7624         return 0;
7625 }
7626
7627 static void free_device_extent_record(struct cache_extent *cache)
7628 {
7629         struct device_extent_record *rec;
7630
7631         rec = container_of(cache, struct device_extent_record, cache);
7632         if (!list_empty(&rec->chunk_list))
7633                 list_del_init(&rec->chunk_list);
7634         if (!list_empty(&rec->device_list))
7635                 list_del_init(&rec->device_list);
7636         free(rec);
7637 }
7638
7639 void free_device_extent_tree(struct device_extent_tree *tree)
7640 {
7641         cache_tree_free_extents(&tree->tree, free_device_extent_record);
7642 }
7643
7644 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7645 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7646                                  struct extent_buffer *leaf, int slot)
7647 {
7648         struct btrfs_extent_ref_v0 *ref0;
7649         struct btrfs_key key;
7650         int ret;
7651
7652         btrfs_item_key_to_cpu(leaf, &key, slot);
7653         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7654         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7655                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7656                                 0, 0);
7657         } else {
7658                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7659                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7660         }
7661         return ret;
7662 }
7663 #endif
7664
7665 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7666                                             struct btrfs_key *key,
7667                                             int slot)
7668 {
7669         struct btrfs_chunk *ptr;
7670         struct chunk_record *rec;
7671         int num_stripes, i;
7672
7673         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7674         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7675
7676         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7677         if (!rec) {
7678                 fprintf(stderr, "memory allocation failed\n");
7679                 exit(-1);
7680         }
7681
7682         INIT_LIST_HEAD(&rec->list);
7683         INIT_LIST_HEAD(&rec->dextents);
7684         rec->bg_rec = NULL;
7685
7686         rec->cache.start = key->offset;
7687         rec->cache.size = btrfs_chunk_length(leaf, ptr);
7688
7689         rec->generation = btrfs_header_generation(leaf);
7690
7691         rec->objectid = key->objectid;
7692         rec->type = key->type;
7693         rec->offset = key->offset;
7694
7695         rec->length = rec->cache.size;
7696         rec->owner = btrfs_chunk_owner(leaf, ptr);
7697         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7698         rec->type_flags = btrfs_chunk_type(leaf, ptr);
7699         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7700         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7701         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7702         rec->num_stripes = num_stripes;
7703         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7704
7705         for (i = 0; i < rec->num_stripes; ++i) {
7706                 rec->stripes[i].devid =
7707                         btrfs_stripe_devid_nr(leaf, ptr, i);
7708                 rec->stripes[i].offset =
7709                         btrfs_stripe_offset_nr(leaf, ptr, i);
7710                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7711                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7712                                 BTRFS_UUID_SIZE);
7713         }
7714
7715         return rec;
7716 }
7717
7718 static int process_chunk_item(struct cache_tree *chunk_cache,
7719                               struct btrfs_key *key, struct extent_buffer *eb,
7720                               int slot)
7721 {
7722         struct chunk_record *rec;
7723         struct btrfs_chunk *chunk;
7724         int ret = 0;
7725
7726         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7727         /*
7728          * Do extra check for this chunk item,
7729          *
7730          * It's still possible one can craft a leaf with CHUNK_ITEM, with
7731          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7732          * and owner<->key_type check.
7733          */
7734         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7735                                       key->offset);
7736         if (ret < 0) {
7737                 error("chunk(%llu, %llu) is not valid, ignore it",
7738                       key->offset, btrfs_chunk_length(eb, chunk));
7739                 return 0;
7740         }
7741         rec = btrfs_new_chunk_record(eb, key, slot);
7742         ret = insert_cache_extent(chunk_cache, &rec->cache);
7743         if (ret) {
7744                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7745                         rec->offset, rec->length);
7746                 free(rec);
7747         }
7748
7749         return ret;
7750 }
7751
7752 static int process_device_item(struct rb_root *dev_cache,
7753                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7754 {
7755         struct btrfs_dev_item *ptr;
7756         struct device_record *rec;
7757         int ret = 0;
7758
7759         ptr = btrfs_item_ptr(eb,
7760                 slot, struct btrfs_dev_item);
7761
7762         rec = malloc(sizeof(*rec));
7763         if (!rec) {
7764                 fprintf(stderr, "memory allocation failed\n");
7765                 return -ENOMEM;
7766         }
7767
7768         rec->devid = key->offset;
7769         rec->generation = btrfs_header_generation(eb);
7770
7771         rec->objectid = key->objectid;
7772         rec->type = key->type;
7773         rec->offset = key->offset;
7774
7775         rec->devid = btrfs_device_id(eb, ptr);
7776         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7777         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7778
7779         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7780         if (ret) {
7781                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7782                 free(rec);
7783         }
7784
7785         return ret;
7786 }
7787
7788 struct block_group_record *
7789 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7790                              int slot)
7791 {
7792         struct btrfs_block_group_item *ptr;
7793         struct block_group_record *rec;
7794
7795         rec = calloc(1, sizeof(*rec));
7796         if (!rec) {
7797                 fprintf(stderr, "memory allocation failed\n");
7798                 exit(-1);
7799         }
7800
7801         rec->cache.start = key->objectid;
7802         rec->cache.size = key->offset;
7803
7804         rec->generation = btrfs_header_generation(leaf);
7805
7806         rec->objectid = key->objectid;
7807         rec->type = key->type;
7808         rec->offset = key->offset;
7809
7810         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7811         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7812
7813         INIT_LIST_HEAD(&rec->list);
7814
7815         return rec;
7816 }
7817
7818 static int process_block_group_item(struct block_group_tree *block_group_cache,
7819                                     struct btrfs_key *key,
7820                                     struct extent_buffer *eb, int slot)
7821 {
7822         struct block_group_record *rec;
7823         int ret = 0;
7824
7825         rec = btrfs_new_block_group_record(eb, key, slot);
7826         ret = insert_block_group_record(block_group_cache, rec);
7827         if (ret) {
7828                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7829                         rec->objectid, rec->offset);
7830                 free(rec);
7831         }
7832
7833         return ret;
7834 }
7835
7836 struct device_extent_record *
7837 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7838                                struct btrfs_key *key, int slot)
7839 {
7840         struct device_extent_record *rec;
7841         struct btrfs_dev_extent *ptr;
7842
7843         rec = calloc(1, sizeof(*rec));
7844         if (!rec) {
7845                 fprintf(stderr, "memory allocation failed\n");
7846                 exit(-1);
7847         }
7848
7849         rec->cache.objectid = key->objectid;
7850         rec->cache.start = key->offset;
7851
7852         rec->generation = btrfs_header_generation(leaf);
7853
7854         rec->objectid = key->objectid;
7855         rec->type = key->type;
7856         rec->offset = key->offset;
7857
7858         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7859         rec->chunk_objecteid =
7860                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7861         rec->chunk_offset =
7862                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7863         rec->length = btrfs_dev_extent_length(leaf, ptr);
7864         rec->cache.size = rec->length;
7865
7866         INIT_LIST_HEAD(&rec->chunk_list);
7867         INIT_LIST_HEAD(&rec->device_list);
7868
7869         return rec;
7870 }
7871
7872 static int
7873 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7874                            struct btrfs_key *key, struct extent_buffer *eb,
7875                            int slot)
7876 {
7877         struct device_extent_record *rec;
7878         int ret;
7879
7880         rec = btrfs_new_device_extent_record(eb, key, slot);
7881         ret = insert_device_extent_record(dev_extent_cache, rec);
7882         if (ret) {
7883                 fprintf(stderr,
7884                         "Device extent[%llu, %llu, %llu] existed.\n",
7885                         rec->objectid, rec->offset, rec->length);
7886                 free(rec);
7887         }
7888
7889         return ret;
7890 }
7891
7892 static int process_extent_item(struct btrfs_root *root,
7893                                struct cache_tree *extent_cache,
7894                                struct extent_buffer *eb, int slot)
7895 {
7896         struct btrfs_extent_item *ei;
7897         struct btrfs_extent_inline_ref *iref;
7898         struct btrfs_extent_data_ref *dref;
7899         struct btrfs_shared_data_ref *sref;
7900         struct btrfs_key key;
7901         struct extent_record tmpl;
7902         unsigned long end;
7903         unsigned long ptr;
7904         int ret;
7905         int type;
7906         u32 item_size = btrfs_item_size_nr(eb, slot);
7907         u64 refs = 0;
7908         u64 offset;
7909         u64 num_bytes;
7910         int metadata = 0;
7911
7912         btrfs_item_key_to_cpu(eb, &key, slot);
7913
7914         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7915                 metadata = 1;
7916                 num_bytes = root->fs_info->nodesize;
7917         } else {
7918                 num_bytes = key.offset;
7919         }
7920
7921         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7922                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7923                       key.objectid, root->fs_info->sectorsize);
7924                 return -EIO;
7925         }
7926         if (item_size < sizeof(*ei)) {
7927 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7928                 struct btrfs_extent_item_v0 *ei0;
7929                 BUG_ON(item_size != sizeof(*ei0));
7930                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7931                 refs = btrfs_extent_refs_v0(eb, ei0);
7932 #else
7933                 BUG();
7934 #endif
7935                 memset(&tmpl, 0, sizeof(tmpl));
7936                 tmpl.start = key.objectid;
7937                 tmpl.nr = num_bytes;
7938                 tmpl.extent_item_refs = refs;
7939                 tmpl.metadata = metadata;
7940                 tmpl.found_rec = 1;
7941                 tmpl.max_size = num_bytes;
7942
7943                 return add_extent_rec(extent_cache, &tmpl);
7944         }
7945
7946         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7947         refs = btrfs_extent_refs(eb, ei);
7948         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7949                 metadata = 1;
7950         else
7951                 metadata = 0;
7952         if (metadata && num_bytes != root->fs_info->nodesize) {
7953                 error("ignore invalid metadata extent, length %llu does not equal to %u",
7954                       num_bytes, root->fs_info->nodesize);
7955                 return -EIO;
7956         }
7957         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7958                 error("ignore invalid data extent, length %llu is not aligned to %u",
7959                       num_bytes, root->fs_info->sectorsize);
7960                 return -EIO;
7961         }
7962
7963         memset(&tmpl, 0, sizeof(tmpl));
7964         tmpl.start = key.objectid;
7965         tmpl.nr = num_bytes;
7966         tmpl.extent_item_refs = refs;
7967         tmpl.metadata = metadata;
7968         tmpl.found_rec = 1;
7969         tmpl.max_size = num_bytes;
7970         add_extent_rec(extent_cache, &tmpl);
7971
7972         ptr = (unsigned long)(ei + 1);
7973         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7974             key.type == BTRFS_EXTENT_ITEM_KEY)
7975                 ptr += sizeof(struct btrfs_tree_block_info);
7976
7977         end = (unsigned long)ei + item_size;
7978         while (ptr < end) {
7979                 iref = (struct btrfs_extent_inline_ref *)ptr;
7980                 type = btrfs_extent_inline_ref_type(eb, iref);
7981                 offset = btrfs_extent_inline_ref_offset(eb, iref);
7982                 switch (type) {
7983                 case BTRFS_TREE_BLOCK_REF_KEY:
7984                         ret = add_tree_backref(extent_cache, key.objectid,
7985                                         0, offset, 0);
7986                         if (ret < 0)
7987                                 error(
7988                         "add_tree_backref failed (extent items tree block): %s",
7989                                       strerror(-ret));
7990                         break;
7991                 case BTRFS_SHARED_BLOCK_REF_KEY:
7992                         ret = add_tree_backref(extent_cache, key.objectid,
7993                                         offset, 0, 0);
7994                         if (ret < 0)
7995                                 error(
7996                         "add_tree_backref failed (extent items shared block): %s",
7997                                       strerror(-ret));
7998                         break;
7999                 case BTRFS_EXTENT_DATA_REF_KEY:
8000                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8001                         add_data_backref(extent_cache, key.objectid, 0,
8002                                         btrfs_extent_data_ref_root(eb, dref),
8003                                         btrfs_extent_data_ref_objectid(eb,
8004                                                                        dref),
8005                                         btrfs_extent_data_ref_offset(eb, dref),
8006                                         btrfs_extent_data_ref_count(eb, dref),
8007                                         0, num_bytes);
8008                         break;
8009                 case BTRFS_SHARED_DATA_REF_KEY:
8010                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
8011                         add_data_backref(extent_cache, key.objectid, offset,
8012                                         0, 0, 0,
8013                                         btrfs_shared_data_ref_count(eb, sref),
8014                                         0, num_bytes);
8015                         break;
8016                 default:
8017                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8018                                 key.objectid, key.type, num_bytes);
8019                         goto out;
8020                 }
8021                 ptr += btrfs_extent_inline_ref_size(type);
8022         }
8023         WARN_ON(ptr > end);
8024 out:
8025         return 0;
8026 }
8027
8028 static int check_cache_range(struct btrfs_root *root,
8029                              struct btrfs_block_group_cache *cache,
8030                              u64 offset, u64 bytes)
8031 {
8032         struct btrfs_free_space *entry;
8033         u64 *logical;
8034         u64 bytenr;
8035         int stripe_len;
8036         int i, nr, ret;
8037
8038         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8039                 bytenr = btrfs_sb_offset(i);
8040                 ret = btrfs_rmap_block(root->fs_info,
8041                                        cache->key.objectid, bytenr, 0,
8042                                        &logical, &nr, &stripe_len);
8043                 if (ret)
8044                         return ret;
8045
8046                 while (nr--) {
8047                         if (logical[nr] + stripe_len <= offset)
8048                                 continue;
8049                         if (offset + bytes <= logical[nr])
8050                                 continue;
8051                         if (logical[nr] == offset) {
8052                                 if (stripe_len >= bytes) {
8053                                         free(logical);
8054                                         return 0;
8055                                 }
8056                                 bytes -= stripe_len;
8057                                 offset += stripe_len;
8058                         } else if (logical[nr] < offset) {
8059                                 if (logical[nr] + stripe_len >=
8060                                     offset + bytes) {
8061                                         free(logical);
8062                                         return 0;
8063                                 }
8064                                 bytes = (offset + bytes) -
8065                                         (logical[nr] + stripe_len);
8066                                 offset = logical[nr] + stripe_len;
8067                         } else {
8068                                 /*
8069                                  * Could be tricky, the super may land in the
8070                                  * middle of the area we're checking.  First
8071                                  * check the easiest case, it's at the end.
8072                                  */
8073                                 if (logical[nr] + stripe_len >=
8074                                     bytes + offset) {
8075                                         bytes = logical[nr] - offset;
8076                                         continue;
8077                                 }
8078
8079                                 /* Check the left side */
8080                                 ret = check_cache_range(root, cache,
8081                                                         offset,
8082                                                         logical[nr] - offset);
8083                                 if (ret) {
8084                                         free(logical);
8085                                         return ret;
8086                                 }
8087
8088                                 /* Now we continue with the right side */
8089                                 bytes = (offset + bytes) -
8090                                         (logical[nr] + stripe_len);
8091                                 offset = logical[nr] + stripe_len;
8092                         }
8093                 }
8094
8095                 free(logical);
8096         }
8097
8098         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8099         if (!entry) {
8100                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8101                         offset, offset+bytes);
8102                 return -EINVAL;
8103         }
8104
8105         if (entry->offset != offset) {
8106                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8107                         entry->offset);
8108                 return -EINVAL;
8109         }
8110
8111         if (entry->bytes != bytes) {
8112                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8113                         bytes, entry->bytes, offset);
8114                 return -EINVAL;
8115         }
8116
8117         unlink_free_space(cache->free_space_ctl, entry);
8118         free(entry);
8119         return 0;
8120 }
8121
8122 static int verify_space_cache(struct btrfs_root *root,
8123                               struct btrfs_block_group_cache *cache)
8124 {
8125         struct btrfs_path path;
8126         struct extent_buffer *leaf;
8127         struct btrfs_key key;
8128         u64 last;
8129         int ret = 0;
8130
8131         root = root->fs_info->extent_root;
8132
8133         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8134
8135         btrfs_init_path(&path);
8136         key.objectid = last;
8137         key.offset = 0;
8138         key.type = BTRFS_EXTENT_ITEM_KEY;
8139         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8140         if (ret < 0)
8141                 goto out;
8142         ret = 0;
8143         while (1) {
8144                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8145                         ret = btrfs_next_leaf(root, &path);
8146                         if (ret < 0)
8147                                 goto out;
8148                         if (ret > 0) {
8149                                 ret = 0;
8150                                 break;
8151                         }
8152                 }
8153                 leaf = path.nodes[0];
8154                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8155                 if (key.objectid >= cache->key.offset + cache->key.objectid)
8156                         break;
8157                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8158                     key.type != BTRFS_METADATA_ITEM_KEY) {
8159                         path.slots[0]++;
8160                         continue;
8161                 }
8162
8163                 if (last == key.objectid) {
8164                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
8165                                 last = key.objectid + key.offset;
8166                         else
8167                                 last = key.objectid + root->fs_info->nodesize;
8168                         path.slots[0]++;
8169                         continue;
8170                 }
8171
8172                 ret = check_cache_range(root, cache, last,
8173                                         key.objectid - last);
8174                 if (ret)
8175                         break;
8176                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8177                         last = key.objectid + key.offset;
8178                 else
8179                         last = key.objectid + root->fs_info->nodesize;
8180                 path.slots[0]++;
8181         }
8182
8183         if (last < cache->key.objectid + cache->key.offset)
8184                 ret = check_cache_range(root, cache, last,
8185                                         cache->key.objectid +
8186                                         cache->key.offset - last);
8187
8188 out:
8189         btrfs_release_path(&path);
8190
8191         if (!ret &&
8192             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8193                 fprintf(stderr, "There are still entries left in the space "
8194                         "cache\n");
8195                 ret = -EINVAL;
8196         }
8197
8198         return ret;
8199 }
8200
8201 static int check_space_cache(struct btrfs_root *root)
8202 {
8203         struct btrfs_block_group_cache *cache;
8204         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8205         int ret;
8206         int error = 0;
8207
8208         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8209             btrfs_super_generation(root->fs_info->super_copy) !=
8210             btrfs_super_cache_generation(root->fs_info->super_copy)) {
8211                 printf("cache and super generation don't match, space cache "
8212                        "will be invalidated\n");
8213                 return 0;
8214         }
8215
8216         if (ctx.progress_enabled) {
8217                 ctx.tp = TASK_FREE_SPACE;
8218                 task_start(ctx.info);
8219         }
8220
8221         while (1) {
8222                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8223                 if (!cache)
8224                         break;
8225
8226                 start = cache->key.objectid + cache->key.offset;
8227                 if (!cache->free_space_ctl) {
8228                         if (btrfs_init_free_space_ctl(cache,
8229                                                 root->fs_info->sectorsize)) {
8230                                 ret = -ENOMEM;
8231                                 break;
8232                         }
8233                 } else {
8234                         btrfs_remove_free_space_cache(cache);
8235                 }
8236
8237                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8238                         ret = exclude_super_stripes(root, cache);
8239                         if (ret) {
8240                                 fprintf(stderr, "could not exclude super stripes: %s\n",
8241                                         strerror(-ret));
8242                                 error++;
8243                                 continue;
8244                         }
8245                         ret = load_free_space_tree(root->fs_info, cache);
8246                         free_excluded_extents(root, cache);
8247                         if (ret < 0) {
8248                                 fprintf(stderr, "could not load free space tree: %s\n",
8249                                         strerror(-ret));
8250                                 error++;
8251                                 continue;
8252                         }
8253                         error += ret;
8254                 } else {
8255                         ret = load_free_space_cache(root->fs_info, cache);
8256                         if (!ret)
8257                                 continue;
8258                 }
8259
8260                 ret = verify_space_cache(root, cache);
8261                 if (ret) {
8262                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8263                                 cache->key.objectid);
8264                         error++;
8265                 }
8266         }
8267
8268         task_stop(ctx.info);
8269
8270         return error ? -EINVAL : 0;
8271 }
8272
8273 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8274                         u64 num_bytes, unsigned long leaf_offset,
8275                         struct extent_buffer *eb) {
8276
8277         struct btrfs_fs_info *fs_info = root->fs_info;
8278         u64 offset = 0;
8279         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8280         char *data;
8281         unsigned long csum_offset;
8282         u32 csum;
8283         u32 csum_expected;
8284         u64 read_len;
8285         u64 data_checked = 0;
8286         u64 tmp;
8287         int ret = 0;
8288         int mirror;
8289         int num_copies;
8290
8291         if (num_bytes % fs_info->sectorsize)
8292                 return -EINVAL;
8293
8294         data = malloc(num_bytes);
8295         if (!data)
8296                 return -ENOMEM;
8297
8298         while (offset < num_bytes) {
8299                 mirror = 0;
8300 again:
8301                 read_len = num_bytes - offset;
8302                 /* read as much space once a time */
8303                 ret = read_extent_data(fs_info, data + offset,
8304                                 bytenr + offset, &read_len, mirror);
8305                 if (ret)
8306                         goto out;
8307                 data_checked = 0;
8308                 /* verify every 4k data's checksum */
8309                 while (data_checked < read_len) {
8310                         csum = ~(u32)0;
8311                         tmp = offset + data_checked;
8312
8313                         csum = btrfs_csum_data((char *)data + tmp,
8314                                                csum, fs_info->sectorsize);
8315                         btrfs_csum_final(csum, (u8 *)&csum);
8316
8317                         csum_offset = leaf_offset +
8318                                  tmp / fs_info->sectorsize * csum_size;
8319                         read_extent_buffer(eb, (char *)&csum_expected,
8320                                            csum_offset, csum_size);
8321                         /* try another mirror */
8322                         if (csum != csum_expected) {
8323                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8324                                                 mirror, bytenr + tmp,
8325                                                 csum, csum_expected);
8326                                 num_copies = btrfs_num_copies(root->fs_info,
8327                                                 bytenr, num_bytes);
8328                                 if (mirror < num_copies - 1) {
8329                                         mirror += 1;
8330                                         goto again;
8331                                 }
8332                         }
8333                         data_checked += fs_info->sectorsize;
8334                 }
8335                 offset += read_len;
8336         }
8337 out:
8338         free(data);
8339         return ret;
8340 }
8341
8342 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8343                                u64 num_bytes)
8344 {
8345         struct btrfs_path path;
8346         struct extent_buffer *leaf;
8347         struct btrfs_key key;
8348         int ret;
8349
8350         btrfs_init_path(&path);
8351         key.objectid = bytenr;
8352         key.type = BTRFS_EXTENT_ITEM_KEY;
8353         key.offset = (u64)-1;
8354
8355 again:
8356         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8357                                 0, 0);
8358         if (ret < 0) {
8359                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8360                 btrfs_release_path(&path);
8361                 return ret;
8362         } else if (ret) {
8363                 if (path.slots[0] > 0) {
8364                         path.slots[0]--;
8365                 } else {
8366                         ret = btrfs_prev_leaf(root, &path);
8367                         if (ret < 0) {
8368                                 goto out;
8369                         } else if (ret > 0) {
8370                                 ret = 0;
8371                                 goto out;
8372                         }
8373                 }
8374         }
8375
8376         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8377
8378         /*
8379          * Block group items come before extent items if they have the same
8380          * bytenr, so walk back one more just in case.  Dear future traveller,
8381          * first congrats on mastering time travel.  Now if it's not too much
8382          * trouble could you go back to 2006 and tell Chris to make the
8383          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8384          * EXTENT_ITEM_KEY please?
8385          */
8386         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8387                 if (path.slots[0] > 0) {
8388                         path.slots[0]--;
8389                 } else {
8390                         ret = btrfs_prev_leaf(root, &path);
8391                         if (ret < 0) {
8392                                 goto out;
8393                         } else if (ret > 0) {
8394                                 ret = 0;
8395                                 goto out;
8396                         }
8397                 }
8398                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8399         }
8400
8401         while (num_bytes) {
8402                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8403                         ret = btrfs_next_leaf(root, &path);
8404                         if (ret < 0) {
8405                                 fprintf(stderr, "Error going to next leaf "
8406                                         "%d\n", ret);
8407                                 btrfs_release_path(&path);
8408                                 return ret;
8409                         } else if (ret) {
8410                                 break;
8411                         }
8412                 }
8413                 leaf = path.nodes[0];
8414                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8415                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8416                         path.slots[0]++;
8417                         continue;
8418                 }
8419                 if (key.objectid + key.offset < bytenr) {
8420                         path.slots[0]++;
8421                         continue;
8422                 }
8423                 if (key.objectid > bytenr + num_bytes)
8424                         break;
8425
8426                 if (key.objectid == bytenr) {
8427                         if (key.offset >= num_bytes) {
8428                                 num_bytes = 0;
8429                                 break;
8430                         }
8431                         num_bytes -= key.offset;
8432                         bytenr += key.offset;
8433                 } else if (key.objectid < bytenr) {
8434                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8435                                 num_bytes = 0;
8436                                 break;
8437                         }
8438                         num_bytes = (bytenr + num_bytes) -
8439                                 (key.objectid + key.offset);
8440                         bytenr = key.objectid + key.offset;
8441                 } else {
8442                         if (key.objectid + key.offset < bytenr + num_bytes) {
8443                                 u64 new_start = key.objectid + key.offset;
8444                                 u64 new_bytes = bytenr + num_bytes - new_start;
8445
8446                                 /*
8447                                  * Weird case, the extent is in the middle of
8448                                  * our range, we'll have to search one side
8449                                  * and then the other.  Not sure if this happens
8450                                  * in real life, but no harm in coding it up
8451                                  * anyway just in case.
8452                                  */
8453                                 btrfs_release_path(&path);
8454                                 ret = check_extent_exists(root, new_start,
8455                                                           new_bytes);
8456                                 if (ret) {
8457                                         fprintf(stderr, "Right section didn't "
8458                                                 "have a record\n");
8459                                         break;
8460                                 }
8461                                 num_bytes = key.objectid - bytenr;
8462                                 goto again;
8463                         }
8464                         num_bytes = key.objectid - bytenr;
8465                 }
8466                 path.slots[0]++;
8467         }
8468         ret = 0;
8469
8470 out:
8471         if (num_bytes && !ret) {
8472                 fprintf(stderr, "There are no extents for csum range "
8473                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8474                 ret = 1;
8475         }
8476
8477         btrfs_release_path(&path);
8478         return ret;
8479 }
8480
8481 static int check_csums(struct btrfs_root *root)
8482 {
8483         struct btrfs_path path;
8484         struct extent_buffer *leaf;
8485         struct btrfs_key key;
8486         u64 offset = 0, num_bytes = 0;
8487         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8488         int errors = 0;
8489         int ret;
8490         u64 data_len;
8491         unsigned long leaf_offset;
8492
8493         root = root->fs_info->csum_root;
8494         if (!extent_buffer_uptodate(root->node)) {
8495                 fprintf(stderr, "No valid csum tree found\n");
8496                 return -ENOENT;
8497         }
8498
8499         btrfs_init_path(&path);
8500         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8501         key.type = BTRFS_EXTENT_CSUM_KEY;
8502         key.offset = 0;
8503         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8504         if (ret < 0) {
8505                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8506                 btrfs_release_path(&path);
8507                 return ret;
8508         }
8509
8510         if (ret > 0 && path.slots[0])
8511                 path.slots[0]--;
8512         ret = 0;
8513
8514         while (1) {
8515                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8516                         ret = btrfs_next_leaf(root, &path);
8517                         if (ret < 0) {
8518                                 fprintf(stderr, "Error going to next leaf "
8519                                         "%d\n", ret);
8520                                 break;
8521                         }
8522                         if (ret)
8523                                 break;
8524                 }
8525                 leaf = path.nodes[0];
8526
8527                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8528                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8529                         path.slots[0]++;
8530                         continue;
8531                 }
8532
8533                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8534                               csum_size) * root->fs_info->sectorsize;
8535                 if (!check_data_csum)
8536                         goto skip_csum_check;
8537                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8538                 ret = check_extent_csums(root, key.offset, data_len,
8539                                          leaf_offset, leaf);
8540                 if (ret)
8541                         break;
8542 skip_csum_check:
8543                 if (!num_bytes) {
8544                         offset = key.offset;
8545                 } else if (key.offset != offset + num_bytes) {
8546                         ret = check_extent_exists(root, offset, num_bytes);
8547                         if (ret) {
8548                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8549                                         "there is no extent record\n",
8550                                         offset, offset+num_bytes);
8551                                 errors++;
8552                         }
8553                         offset = key.offset;
8554                         num_bytes = 0;
8555                 }
8556                 num_bytes += data_len;
8557                 path.slots[0]++;
8558         }
8559
8560         btrfs_release_path(&path);
8561         return errors;
8562 }
8563
8564 static int is_dropped_key(struct btrfs_key *key,
8565                           struct btrfs_key *drop_key) {
8566         if (key->objectid < drop_key->objectid)
8567                 return 1;
8568         else if (key->objectid == drop_key->objectid) {
8569                 if (key->type < drop_key->type)
8570                         return 1;
8571                 else if (key->type == drop_key->type) {
8572                         if (key->offset < drop_key->offset)
8573                                 return 1;
8574                 }
8575         }
8576         return 0;
8577 }
8578
8579 /*
8580  * Here are the rules for FULL_BACKREF.
8581  *
8582  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8583  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8584  *      FULL_BACKREF set.
8585  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
8586  *    if it happened after the relocation occurred since we'll have dropped the
8587  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8588  *    have no real way to know for sure.
8589  *
8590  * We process the blocks one root at a time, and we start from the lowest root
8591  * objectid and go to the highest.  So we can just lookup the owner backref for
8592  * the record and if we don't find it then we know it doesn't exist and we have
8593  * a FULL BACKREF.
8594  *
8595  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8596  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8597  * be set or not and then we can check later once we've gathered all the refs.
8598  */
8599 static int calc_extent_flag(struct cache_tree *extent_cache,
8600                            struct extent_buffer *buf,
8601                            struct root_item_record *ri,
8602                            u64 *flags)
8603 {
8604         struct extent_record *rec;
8605         struct cache_extent *cache;
8606         struct tree_backref *tback;
8607         u64 owner = 0;
8608
8609         cache = lookup_cache_extent(extent_cache, buf->start, 1);
8610         /* we have added this extent before */
8611         if (!cache)
8612                 return -ENOENT;
8613
8614         rec = container_of(cache, struct extent_record, cache);
8615
8616         /*
8617          * Except file/reloc tree, we can not have
8618          * FULL BACKREF MODE
8619          */
8620         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8621                 goto normal;
8622         /*
8623          * root node
8624          */
8625         if (buf->start == ri->bytenr)
8626                 goto normal;
8627
8628         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8629                 goto full_backref;
8630
8631         owner = btrfs_header_owner(buf);
8632         if (owner == ri->objectid)
8633                 goto normal;
8634
8635         tback = find_tree_backref(rec, 0, owner);
8636         if (!tback)
8637                 goto full_backref;
8638 normal:
8639         *flags = 0;
8640         if (rec->flag_block_full_backref != FLAG_UNSET &&
8641             rec->flag_block_full_backref != 0)
8642                 rec->bad_full_backref = 1;
8643         return 0;
8644 full_backref:
8645         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8646         if (rec->flag_block_full_backref != FLAG_UNSET &&
8647             rec->flag_block_full_backref != 1)
8648                 rec->bad_full_backref = 1;
8649         return 0;
8650 }
8651
8652 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8653 {
8654         fprintf(stderr, "Invalid key type(");
8655         print_key_type(stderr, 0, key_type);
8656         fprintf(stderr, ") found in root(");
8657         print_objectid(stderr, rootid, 0);
8658         fprintf(stderr, ")\n");
8659 }
8660
8661 /*
8662  * Check if the key is valid with its extent buffer.
8663  *
8664  * This is a early check in case invalid key exists in a extent buffer
8665  * This is not comprehensive yet, but should prevent wrong key/item passed
8666  * further
8667  */
8668 static int check_type_with_root(u64 rootid, u8 key_type)
8669 {
8670         switch (key_type) {
8671         /* Only valid in chunk tree */
8672         case BTRFS_DEV_ITEM_KEY:
8673         case BTRFS_CHUNK_ITEM_KEY:
8674                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8675                         goto err;
8676                 break;
8677         /* valid in csum and log tree */
8678         case BTRFS_CSUM_TREE_OBJECTID:
8679                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8680                       is_fstree(rootid)))
8681                         goto err;
8682                 break;
8683         case BTRFS_EXTENT_ITEM_KEY:
8684         case BTRFS_METADATA_ITEM_KEY:
8685         case BTRFS_BLOCK_GROUP_ITEM_KEY:
8686                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8687                         goto err;
8688                 break;
8689         case BTRFS_ROOT_ITEM_KEY:
8690                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8691                         goto err;
8692                 break;
8693         case BTRFS_DEV_EXTENT_KEY:
8694                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8695                         goto err;
8696                 break;
8697         }
8698         return 0;
8699 err:
8700         report_mismatch_key_root(key_type, rootid);
8701         return -EINVAL;
8702 }
8703
8704 static int run_next_block(struct btrfs_root *root,
8705                           struct block_info *bits,
8706                           int bits_nr,
8707                           u64 *last,
8708                           struct cache_tree *pending,
8709                           struct cache_tree *seen,
8710                           struct cache_tree *reada,
8711                           struct cache_tree *nodes,
8712                           struct cache_tree *extent_cache,
8713                           struct cache_tree *chunk_cache,
8714                           struct rb_root *dev_cache,
8715                           struct block_group_tree *block_group_cache,
8716                           struct device_extent_tree *dev_extent_cache,
8717                           struct root_item_record *ri)
8718 {
8719         struct btrfs_fs_info *fs_info = root->fs_info;
8720         struct extent_buffer *buf;
8721         struct extent_record *rec = NULL;
8722         u64 bytenr;
8723         u32 size;
8724         u64 parent;
8725         u64 owner;
8726         u64 flags;
8727         u64 ptr;
8728         u64 gen = 0;
8729         int ret = 0;
8730         int i;
8731         int nritems;
8732         struct btrfs_key key;
8733         struct cache_extent *cache;
8734         int reada_bits;
8735
8736         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8737                                     bits_nr, &reada_bits);
8738         if (nritems == 0)
8739                 return 1;
8740
8741         if (!reada_bits) {
8742                 for(i = 0; i < nritems; i++) {
8743                         ret = add_cache_extent(reada, bits[i].start,
8744                                                bits[i].size);
8745                         if (ret == -EEXIST)
8746                                 continue;
8747
8748                         /* fixme, get the parent transid */
8749                         readahead_tree_block(fs_info, bits[i].start, 0);
8750                 }
8751         }
8752         *last = bits[0].start;
8753         bytenr = bits[0].start;
8754         size = bits[0].size;
8755
8756         cache = lookup_cache_extent(pending, bytenr, size);
8757         if (cache) {
8758                 remove_cache_extent(pending, cache);
8759                 free(cache);
8760         }
8761         cache = lookup_cache_extent(reada, bytenr, size);
8762         if (cache) {
8763                 remove_cache_extent(reada, cache);
8764                 free(cache);
8765         }
8766         cache = lookup_cache_extent(nodes, bytenr, size);
8767         if (cache) {
8768                 remove_cache_extent(nodes, cache);
8769                 free(cache);
8770         }
8771         cache = lookup_cache_extent(extent_cache, bytenr, size);
8772         if (cache) {
8773                 rec = container_of(cache, struct extent_record, cache);
8774                 gen = rec->parent_generation;
8775         }
8776
8777         /* fixme, get the real parent transid */
8778         buf = read_tree_block(root->fs_info, bytenr, gen);
8779         if (!extent_buffer_uptodate(buf)) {
8780                 record_bad_block_io(root->fs_info,
8781                                     extent_cache, bytenr, size);
8782                 goto out;
8783         }
8784
8785         nritems = btrfs_header_nritems(buf);
8786
8787         flags = 0;
8788         if (!init_extent_tree) {
8789                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8790                                        btrfs_header_level(buf), 1, NULL,
8791                                        &flags);
8792                 if (ret < 0) {
8793                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8794                         if (ret < 0) {
8795                                 fprintf(stderr, "Couldn't calc extent flags\n");
8796                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8797                         }
8798                 }
8799         } else {
8800                 flags = 0;
8801                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8802                 if (ret < 0) {
8803                         fprintf(stderr, "Couldn't calc extent flags\n");
8804                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8805                 }
8806         }
8807
8808         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8809                 if (ri != NULL &&
8810                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8811                     ri->objectid == btrfs_header_owner(buf)) {
8812                         /*
8813                          * Ok we got to this block from it's original owner and
8814                          * we have FULL_BACKREF set.  Relocation can leave
8815                          * converted blocks over so this is altogether possible,
8816                          * however it's not possible if the generation > the
8817                          * last snapshot, so check for this case.
8818                          */
8819                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8820                             btrfs_header_generation(buf) > ri->last_snapshot) {
8821                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8822                                 rec->bad_full_backref = 1;
8823                         }
8824                 }
8825         } else {
8826                 if (ri != NULL &&
8827                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8828                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8829                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8830                         rec->bad_full_backref = 1;
8831                 }
8832         }
8833
8834         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8835                 rec->flag_block_full_backref = 1;
8836                 parent = bytenr;
8837                 owner = 0;
8838         } else {
8839                 rec->flag_block_full_backref = 0;
8840                 parent = 0;
8841                 owner = btrfs_header_owner(buf);
8842         }
8843
8844         ret = check_block(root, extent_cache, buf, flags);
8845         if (ret)
8846                 goto out;
8847
8848         if (btrfs_is_leaf(buf)) {
8849                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8850                 for (i = 0; i < nritems; i++) {
8851                         struct btrfs_file_extent_item *fi;
8852                         btrfs_item_key_to_cpu(buf, &key, i);
8853                         /*
8854                          * Check key type against the leaf owner.
8855                          * Could filter quite a lot of early error if
8856                          * owner is correct
8857                          */
8858                         if (check_type_with_root(btrfs_header_owner(buf),
8859                                                  key.type)) {
8860                                 fprintf(stderr, "ignoring invalid key\n");
8861                                 continue;
8862                         }
8863                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8864                                 process_extent_item(root, extent_cache, buf,
8865                                                     i);
8866                                 continue;
8867                         }
8868                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8869                                 process_extent_item(root, extent_cache, buf,
8870                                                     i);
8871                                 continue;
8872                         }
8873                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8874                                 total_csum_bytes +=
8875                                         btrfs_item_size_nr(buf, i);
8876                                 continue;
8877                         }
8878                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8879                                 process_chunk_item(chunk_cache, &key, buf, i);
8880                                 continue;
8881                         }
8882                         if (key.type == BTRFS_DEV_ITEM_KEY) {
8883                                 process_device_item(dev_cache, &key, buf, i);
8884                                 continue;
8885                         }
8886                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8887                                 process_block_group_item(block_group_cache,
8888                                         &key, buf, i);
8889                                 continue;
8890                         }
8891                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
8892                                 process_device_extent_item(dev_extent_cache,
8893                                         &key, buf, i);
8894                                 continue;
8895
8896                         }
8897                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8898 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8899                                 process_extent_ref_v0(extent_cache, buf, i);
8900 #else
8901                                 BUG();
8902 #endif
8903                                 continue;
8904                         }
8905
8906                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8907                                 ret = add_tree_backref(extent_cache,
8908                                                 key.objectid, 0, key.offset, 0);
8909                                 if (ret < 0)
8910                                         error(
8911                                 "add_tree_backref failed (leaf tree block): %s",
8912                                               strerror(-ret));
8913                                 continue;
8914                         }
8915                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8916                                 ret = add_tree_backref(extent_cache,
8917                                                 key.objectid, key.offset, 0, 0);
8918                                 if (ret < 0)
8919                                         error(
8920                                 "add_tree_backref failed (leaf shared block): %s",
8921                                               strerror(-ret));
8922                                 continue;
8923                         }
8924                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8925                                 struct btrfs_extent_data_ref *ref;
8926                                 ref = btrfs_item_ptr(buf, i,
8927                                                 struct btrfs_extent_data_ref);
8928                                 add_data_backref(extent_cache,
8929                                         key.objectid, 0,
8930                                         btrfs_extent_data_ref_root(buf, ref),
8931                                         btrfs_extent_data_ref_objectid(buf,
8932                                                                        ref),
8933                                         btrfs_extent_data_ref_offset(buf, ref),
8934                                         btrfs_extent_data_ref_count(buf, ref),
8935                                         0, root->fs_info->sectorsize);
8936                                 continue;
8937                         }
8938                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8939                                 struct btrfs_shared_data_ref *ref;
8940                                 ref = btrfs_item_ptr(buf, i,
8941                                                 struct btrfs_shared_data_ref);
8942                                 add_data_backref(extent_cache,
8943                                         key.objectid, key.offset, 0, 0, 0,
8944                                         btrfs_shared_data_ref_count(buf, ref),
8945                                         0, root->fs_info->sectorsize);
8946                                 continue;
8947                         }
8948                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8949                                 struct bad_item *bad;
8950
8951                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8952                                         continue;
8953                                 if (!owner)
8954                                         continue;
8955                                 bad = malloc(sizeof(struct bad_item));
8956                                 if (!bad)
8957                                         continue;
8958                                 INIT_LIST_HEAD(&bad->list);
8959                                 memcpy(&bad->key, &key,
8960                                        sizeof(struct btrfs_key));
8961                                 bad->root_id = owner;
8962                                 list_add_tail(&bad->list, &delete_items);
8963                                 continue;
8964                         }
8965                         if (key.type != BTRFS_EXTENT_DATA_KEY)
8966                                 continue;
8967                         fi = btrfs_item_ptr(buf, i,
8968                                             struct btrfs_file_extent_item);
8969                         if (btrfs_file_extent_type(buf, fi) ==
8970                             BTRFS_FILE_EXTENT_INLINE)
8971                                 continue;
8972                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8973                                 continue;
8974
8975                         data_bytes_allocated +=
8976                                 btrfs_file_extent_disk_num_bytes(buf, fi);
8977                         if (data_bytes_allocated < root->fs_info->sectorsize) {
8978                                 abort();
8979                         }
8980                         data_bytes_referenced +=
8981                                 btrfs_file_extent_num_bytes(buf, fi);
8982                         add_data_backref(extent_cache,
8983                                 btrfs_file_extent_disk_bytenr(buf, fi),
8984                                 parent, owner, key.objectid, key.offset -
8985                                 btrfs_file_extent_offset(buf, fi), 1, 1,
8986                                 btrfs_file_extent_disk_num_bytes(buf, fi));
8987                 }
8988         } else {
8989                 int level;
8990                 struct btrfs_key first_key;
8991
8992                 first_key.objectid = 0;
8993
8994                 if (nritems > 0)
8995                         btrfs_item_key_to_cpu(buf, &first_key, 0);
8996                 level = btrfs_header_level(buf);
8997                 for (i = 0; i < nritems; i++) {
8998                         struct extent_record tmpl;
8999
9000                         ptr = btrfs_node_blockptr(buf, i);
9001                         size = root->fs_info->nodesize;
9002                         btrfs_node_key_to_cpu(buf, &key, i);
9003                         if (ri != NULL) {
9004                                 if ((level == ri->drop_level)
9005                                     && is_dropped_key(&key, &ri->drop_key)) {
9006                                         continue;
9007                                 }
9008                         }
9009
9010                         memset(&tmpl, 0, sizeof(tmpl));
9011                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9012                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9013                         tmpl.start = ptr;
9014                         tmpl.nr = size;
9015                         tmpl.refs = 1;
9016                         tmpl.metadata = 1;
9017                         tmpl.max_size = size;
9018                         ret = add_extent_rec(extent_cache, &tmpl);
9019                         if (ret < 0)
9020                                 goto out;
9021
9022                         ret = add_tree_backref(extent_cache, ptr, parent,
9023                                         owner, 1);
9024                         if (ret < 0) {
9025                                 error(
9026                                 "add_tree_backref failed (non-leaf block): %s",
9027                                       strerror(-ret));
9028                                 continue;
9029                         }
9030
9031                         if (level > 1) {
9032                                 add_pending(nodes, seen, ptr, size);
9033                         } else {
9034                                 add_pending(pending, seen, ptr, size);
9035                         }
9036                 }
9037                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
9038                                       nritems) * sizeof(struct btrfs_key_ptr);
9039         }
9040         total_btree_bytes += buf->len;
9041         if (fs_root_objectid(btrfs_header_owner(buf)))
9042                 total_fs_tree_bytes += buf->len;
9043         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9044                 total_extent_tree_bytes += buf->len;
9045 out:
9046         free_extent_buffer(buf);
9047         return ret;
9048 }
9049
9050 static int add_root_to_pending(struct extent_buffer *buf,
9051                                struct cache_tree *extent_cache,
9052                                struct cache_tree *pending,
9053                                struct cache_tree *seen,
9054                                struct cache_tree *nodes,
9055                                u64 objectid)
9056 {
9057         struct extent_record tmpl;
9058         int ret;
9059
9060         if (btrfs_header_level(buf) > 0)
9061                 add_pending(nodes, seen, buf->start, buf->len);
9062         else
9063                 add_pending(pending, seen, buf->start, buf->len);
9064
9065         memset(&tmpl, 0, sizeof(tmpl));
9066         tmpl.start = buf->start;
9067         tmpl.nr = buf->len;
9068         tmpl.is_root = 1;
9069         tmpl.refs = 1;
9070         tmpl.metadata = 1;
9071         tmpl.max_size = buf->len;
9072         add_extent_rec(extent_cache, &tmpl);
9073
9074         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9075             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9076                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9077                                 0, 1);
9078         else
9079                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9080                                 1);
9081         return ret;
9082 }
9083
9084 /* as we fix the tree, we might be deleting blocks that
9085  * we're tracking for repair.  This hook makes sure we
9086  * remove any backrefs for blocks as we are fixing them.
9087  */
9088 static int free_extent_hook(struct btrfs_trans_handle *trans,
9089                             struct btrfs_root *root,
9090                             u64 bytenr, u64 num_bytes, u64 parent,
9091                             u64 root_objectid, u64 owner, u64 offset,
9092                             int refs_to_drop)
9093 {
9094         struct extent_record *rec;
9095         struct cache_extent *cache;
9096         int is_data;
9097         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9098
9099         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9100         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9101         if (!cache)
9102                 return 0;
9103
9104         rec = container_of(cache, struct extent_record, cache);
9105         if (is_data) {
9106                 struct data_backref *back;
9107                 back = find_data_backref(rec, parent, root_objectid, owner,
9108                                          offset, 1, bytenr, num_bytes);
9109                 if (!back)
9110                         goto out;
9111                 if (back->node.found_ref) {
9112                         back->found_ref -= refs_to_drop;
9113                         if (rec->refs)
9114                                 rec->refs -= refs_to_drop;
9115                 }
9116                 if (back->node.found_extent_tree) {
9117                         back->num_refs -= refs_to_drop;
9118                         if (rec->extent_item_refs)
9119                                 rec->extent_item_refs -= refs_to_drop;
9120                 }
9121                 if (back->found_ref == 0)
9122                         back->node.found_ref = 0;
9123                 if (back->num_refs == 0)
9124                         back->node.found_extent_tree = 0;
9125
9126                 if (!back->node.found_extent_tree && back->node.found_ref) {
9127                         rb_erase(&back->node.node, &rec->backref_tree);
9128                         free(back);
9129                 }
9130         } else {
9131                 struct tree_backref *back;
9132                 back = find_tree_backref(rec, parent, root_objectid);
9133                 if (!back)
9134                         goto out;
9135                 if (back->node.found_ref) {
9136                         if (rec->refs)
9137                                 rec->refs--;
9138                         back->node.found_ref = 0;
9139                 }
9140                 if (back->node.found_extent_tree) {
9141                         if (rec->extent_item_refs)
9142                                 rec->extent_item_refs--;
9143                         back->node.found_extent_tree = 0;
9144                 }
9145                 if (!back->node.found_extent_tree && back->node.found_ref) {
9146                         rb_erase(&back->node.node, &rec->backref_tree);
9147                         free(back);
9148                 }
9149         }
9150         maybe_free_extent_rec(extent_cache, rec);
9151 out:
9152         return 0;
9153 }
9154
9155 static int delete_extent_records(struct btrfs_trans_handle *trans,
9156                                  struct btrfs_root *root,
9157                                  struct btrfs_path *path,
9158                                  u64 bytenr)
9159 {
9160         struct btrfs_key key;
9161         struct btrfs_key found_key;
9162         struct extent_buffer *leaf;
9163         int ret;
9164         int slot;
9165
9166
9167         key.objectid = bytenr;
9168         key.type = (u8)-1;
9169         key.offset = (u64)-1;
9170
9171         while(1) {
9172                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9173                                         &key, path, 0, 1);
9174                 if (ret < 0)
9175                         break;
9176
9177                 if (ret > 0) {
9178                         ret = 0;
9179                         if (path->slots[0] == 0)
9180                                 break;
9181                         path->slots[0]--;
9182                 }
9183                 ret = 0;
9184
9185                 leaf = path->nodes[0];
9186                 slot = path->slots[0];
9187
9188                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9189                 if (found_key.objectid != bytenr)
9190                         break;
9191
9192                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9193                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
9194                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9195                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9196                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9197                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9198                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9199                         btrfs_release_path(path);
9200                         if (found_key.type == 0) {
9201                                 if (found_key.offset == 0)
9202                                         break;
9203                                 key.offset = found_key.offset - 1;
9204                                 key.type = found_key.type;
9205                         }
9206                         key.type = found_key.type - 1;
9207                         key.offset = (u64)-1;
9208                         continue;
9209                 }
9210
9211                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9212                         found_key.objectid, found_key.type, found_key.offset);
9213
9214                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9215                 if (ret)
9216                         break;
9217                 btrfs_release_path(path);
9218
9219                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9220                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
9221                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9222                                 found_key.offset : root->fs_info->nodesize;
9223
9224                         ret = btrfs_update_block_group(trans, root, bytenr,
9225                                                        bytes, 0, 0);
9226                         if (ret)
9227                                 break;
9228                 }
9229         }
9230
9231         btrfs_release_path(path);
9232         return ret;
9233 }
9234
9235 /*
9236  * for a single backref, this will allocate a new extent
9237  * and add the backref to it.
9238  */
9239 static int record_extent(struct btrfs_trans_handle *trans,
9240                          struct btrfs_fs_info *info,
9241                          struct btrfs_path *path,
9242                          struct extent_record *rec,
9243                          struct extent_backref *back,
9244                          int allocated, u64 flags)
9245 {
9246         int ret = 0;
9247         struct btrfs_root *extent_root = info->extent_root;
9248         struct extent_buffer *leaf;
9249         struct btrfs_key ins_key;
9250         struct btrfs_extent_item *ei;
9251         struct data_backref *dback;
9252         struct btrfs_tree_block_info *bi;
9253
9254         if (!back->is_data)
9255                 rec->max_size = max_t(u64, rec->max_size,
9256                                     info->nodesize);
9257
9258         if (!allocated) {
9259                 u32 item_size = sizeof(*ei);
9260
9261                 if (!back->is_data)
9262                         item_size += sizeof(*bi);
9263
9264                 ins_key.objectid = rec->start;
9265                 ins_key.offset = rec->max_size;
9266                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9267
9268                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9269                                         &ins_key, item_size);
9270                 if (ret)
9271                         goto fail;
9272
9273                 leaf = path->nodes[0];
9274                 ei = btrfs_item_ptr(leaf, path->slots[0],
9275                                     struct btrfs_extent_item);
9276
9277                 btrfs_set_extent_refs(leaf, ei, 0);
9278                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9279
9280                 if (back->is_data) {
9281                         btrfs_set_extent_flags(leaf, ei,
9282                                                BTRFS_EXTENT_FLAG_DATA);
9283                 } else {
9284                         struct btrfs_disk_key copy_key;;
9285
9286                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9287                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9288                                              sizeof(*bi));
9289
9290                         btrfs_set_disk_key_objectid(&copy_key,
9291                                                     rec->info_objectid);
9292                         btrfs_set_disk_key_type(&copy_key, 0);
9293                         btrfs_set_disk_key_offset(&copy_key, 0);
9294
9295                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9296                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9297
9298                         btrfs_set_extent_flags(leaf, ei,
9299                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9300                 }
9301
9302                 btrfs_mark_buffer_dirty(leaf);
9303                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9304                                                rec->max_size, 1, 0);
9305                 if (ret)
9306                         goto fail;
9307                 btrfs_release_path(path);
9308         }
9309
9310         if (back->is_data) {
9311                 u64 parent;
9312                 int i;
9313
9314                 dback = to_data_backref(back);
9315                 if (back->full_backref)
9316                         parent = dback->parent;
9317                 else
9318                         parent = 0;
9319
9320                 for (i = 0; i < dback->found_ref; i++) {
9321                         /* if parent != 0, we're doing a full backref
9322                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9323                          * just makes the backref allocator create a data
9324                          * backref
9325                          */
9326                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9327                                                    rec->start, rec->max_size,
9328                                                    parent,
9329                                                    dback->root,
9330                                                    parent ?
9331                                                    BTRFS_FIRST_FREE_OBJECTID :
9332                                                    dback->owner,
9333                                                    dback->offset);
9334                         if (ret)
9335                                 break;
9336                 }
9337                 fprintf(stderr, "adding new data backref"
9338                                 " on %llu %s %llu owner %llu"
9339                                 " offset %llu found %d\n",
9340                                 (unsigned long long)rec->start,
9341                                 back->full_backref ?
9342                                 "parent" : "root",
9343                                 back->full_backref ?
9344                                 (unsigned long long)parent :
9345                                 (unsigned long long)dback->root,
9346                                 (unsigned long long)dback->owner,
9347                                 (unsigned long long)dback->offset,
9348                                 dback->found_ref);
9349         } else {
9350                 u64 parent;
9351                 struct tree_backref *tback;
9352
9353                 tback = to_tree_backref(back);
9354                 if (back->full_backref)
9355                         parent = tback->parent;
9356                 else
9357                         parent = 0;
9358
9359                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9360                                            rec->start, rec->max_size,
9361                                            parent, tback->root, 0, 0);
9362                 fprintf(stderr, "adding new tree backref on "
9363                         "start %llu len %llu parent %llu root %llu\n",
9364                         rec->start, rec->max_size, parent, tback->root);
9365         }
9366 fail:
9367         btrfs_release_path(path);
9368         return ret;
9369 }
9370
9371 static struct extent_entry *find_entry(struct list_head *entries,
9372                                        u64 bytenr, u64 bytes)
9373 {
9374         struct extent_entry *entry = NULL;
9375
9376         list_for_each_entry(entry, entries, list) {
9377                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9378                         return entry;
9379         }
9380
9381         return NULL;
9382 }
9383
9384 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9385 {
9386         struct extent_entry *entry, *best = NULL, *prev = NULL;
9387
9388         list_for_each_entry(entry, entries, list) {
9389                 /*
9390                  * If there are as many broken entries as entries then we know
9391                  * not to trust this particular entry.
9392                  */
9393                 if (entry->broken == entry->count)
9394                         continue;
9395
9396                 /*
9397                  * Special case, when there are only two entries and 'best' is
9398                  * the first one
9399                  */
9400                 if (!prev) {
9401                         best = entry;
9402                         prev = entry;
9403                         continue;
9404                 }
9405
9406                 /*
9407                  * If our current entry == best then we can't be sure our best
9408                  * is really the best, so we need to keep searching.
9409                  */
9410                 if (best && best->count == entry->count) {
9411                         prev = entry;
9412                         best = NULL;
9413                         continue;
9414                 }
9415
9416                 /* Prev == entry, not good enough, have to keep searching */
9417                 if (!prev->broken && prev->count == entry->count)
9418                         continue;
9419
9420                 if (!best)
9421                         best = (prev->count > entry->count) ? prev : entry;
9422                 else if (best->count < entry->count)
9423                         best = entry;
9424                 prev = entry;
9425         }
9426
9427         return best;
9428 }
9429
9430 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9431                       struct data_backref *dback, struct extent_entry *entry)
9432 {
9433         struct btrfs_trans_handle *trans;
9434         struct btrfs_root *root;
9435         struct btrfs_file_extent_item *fi;
9436         struct extent_buffer *leaf;
9437         struct btrfs_key key;
9438         u64 bytenr, bytes;
9439         int ret, err;
9440
9441         key.objectid = dback->root;
9442         key.type = BTRFS_ROOT_ITEM_KEY;
9443         key.offset = (u64)-1;
9444         root = btrfs_read_fs_root(info, &key);
9445         if (IS_ERR(root)) {
9446                 fprintf(stderr, "Couldn't find root for our ref\n");
9447                 return -EINVAL;
9448         }
9449
9450         /*
9451          * The backref points to the original offset of the extent if it was
9452          * split, so we need to search down to the offset we have and then walk
9453          * forward until we find the backref we're looking for.
9454          */
9455         key.objectid = dback->owner;
9456         key.type = BTRFS_EXTENT_DATA_KEY;
9457         key.offset = dback->offset;
9458         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9459         if (ret < 0) {
9460                 fprintf(stderr, "Error looking up ref %d\n", ret);
9461                 return ret;
9462         }
9463
9464         while (1) {
9465                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9466                         ret = btrfs_next_leaf(root, path);
9467                         if (ret) {
9468                                 fprintf(stderr, "Couldn't find our ref, next\n");
9469                                 return -EINVAL;
9470                         }
9471                 }
9472                 leaf = path->nodes[0];
9473                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9474                 if (key.objectid != dback->owner ||
9475                     key.type != BTRFS_EXTENT_DATA_KEY) {
9476                         fprintf(stderr, "Couldn't find our ref, search\n");
9477                         return -EINVAL;
9478                 }
9479                 fi = btrfs_item_ptr(leaf, path->slots[0],
9480                                     struct btrfs_file_extent_item);
9481                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9482                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9483
9484                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9485                         break;
9486                 path->slots[0]++;
9487         }
9488
9489         btrfs_release_path(path);
9490
9491         trans = btrfs_start_transaction(root, 1);
9492         if (IS_ERR(trans))
9493                 return PTR_ERR(trans);
9494
9495         /*
9496          * Ok we have the key of the file extent we want to fix, now we can cow
9497          * down to the thing and fix it.
9498          */
9499         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9500         if (ret < 0) {
9501                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9502                         key.objectid, key.type, key.offset, ret);
9503                 goto out;
9504         }
9505         if (ret > 0) {
9506                 fprintf(stderr, "Well that's odd, we just found this key "
9507                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9508                         key.offset);
9509                 ret = -EINVAL;
9510                 goto out;
9511         }
9512         leaf = path->nodes[0];
9513         fi = btrfs_item_ptr(leaf, path->slots[0],
9514                             struct btrfs_file_extent_item);
9515
9516         if (btrfs_file_extent_compression(leaf, fi) &&
9517             dback->disk_bytenr != entry->bytenr) {
9518                 fprintf(stderr, "Ref doesn't match the record start and is "
9519                         "compressed, please take a btrfs-image of this file "
9520                         "system and send it to a btrfs developer so they can "
9521                         "complete this functionality for bytenr %Lu\n",
9522                         dback->disk_bytenr);
9523                 ret = -EINVAL;
9524                 goto out;
9525         }
9526
9527         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9528                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9529         } else if (dback->disk_bytenr > entry->bytenr) {
9530                 u64 off_diff, offset;
9531
9532                 off_diff = dback->disk_bytenr - entry->bytenr;
9533                 offset = btrfs_file_extent_offset(leaf, fi);
9534                 if (dback->disk_bytenr + offset +
9535                     btrfs_file_extent_num_bytes(leaf, fi) >
9536                     entry->bytenr + entry->bytes) {
9537                         fprintf(stderr, "Ref is past the entry end, please "
9538                                 "take a btrfs-image of this file system and "
9539                                 "send it to a btrfs developer, ref %Lu\n",
9540                                 dback->disk_bytenr);
9541                         ret = -EINVAL;
9542                         goto out;
9543                 }
9544                 offset += off_diff;
9545                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9546                 btrfs_set_file_extent_offset(leaf, fi, offset);
9547         } else if (dback->disk_bytenr < entry->bytenr) {
9548                 u64 offset;
9549
9550                 offset = btrfs_file_extent_offset(leaf, fi);
9551                 if (dback->disk_bytenr + offset < entry->bytenr) {
9552                         fprintf(stderr, "Ref is before the entry start, please"
9553                                 " take a btrfs-image of this file system and "
9554                                 "send it to a btrfs developer, ref %Lu\n",
9555                                 dback->disk_bytenr);
9556                         ret = -EINVAL;
9557                         goto out;
9558                 }
9559
9560                 offset += dback->disk_bytenr;
9561                 offset -= entry->bytenr;
9562                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9563                 btrfs_set_file_extent_offset(leaf, fi, offset);
9564         }
9565
9566         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9567
9568         /*
9569          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9570          * only do this if we aren't using compression, otherwise it's a
9571          * trickier case.
9572          */
9573         if (!btrfs_file_extent_compression(leaf, fi))
9574                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9575         else
9576                 printf("ram bytes may be wrong?\n");
9577         btrfs_mark_buffer_dirty(leaf);
9578 out:
9579         err = btrfs_commit_transaction(trans, root);
9580         btrfs_release_path(path);
9581         return ret ? ret : err;
9582 }
9583
9584 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9585                            struct extent_record *rec)
9586 {
9587         struct extent_backref *back, *tmp;
9588         struct data_backref *dback;
9589         struct extent_entry *entry, *best = NULL;
9590         LIST_HEAD(entries);
9591         int nr_entries = 0;
9592         int broken_entries = 0;
9593         int ret = 0;
9594         short mismatch = 0;
9595
9596         /*
9597          * Metadata is easy and the backrefs should always agree on bytenr and
9598          * size, if not we've got bigger issues.
9599          */
9600         if (rec->metadata)
9601                 return 0;
9602
9603         rbtree_postorder_for_each_entry_safe(back, tmp,
9604                                              &rec->backref_tree, node) {
9605                 if (back->full_backref || !back->is_data)
9606                         continue;
9607
9608                 dback = to_data_backref(back);
9609
9610                 /*
9611                  * We only pay attention to backrefs that we found a real
9612                  * backref for.
9613                  */
9614                 if (dback->found_ref == 0)
9615                         continue;
9616
9617                 /*
9618                  * For now we only catch when the bytes don't match, not the
9619                  * bytenr.  We can easily do this at the same time, but I want
9620                  * to have a fs image to test on before we just add repair
9621                  * functionality willy-nilly so we know we won't screw up the
9622                  * repair.
9623                  */
9624
9625                 entry = find_entry(&entries, dback->disk_bytenr,
9626                                    dback->bytes);
9627                 if (!entry) {
9628                         entry = malloc(sizeof(struct extent_entry));
9629                         if (!entry) {
9630                                 ret = -ENOMEM;
9631                                 goto out;
9632                         }
9633                         memset(entry, 0, sizeof(*entry));
9634                         entry->bytenr = dback->disk_bytenr;
9635                         entry->bytes = dback->bytes;
9636                         list_add_tail(&entry->list, &entries);
9637                         nr_entries++;
9638                 }
9639
9640                 /*
9641                  * If we only have on entry we may think the entries agree when
9642                  * in reality they don't so we have to do some extra checking.
9643                  */
9644                 if (dback->disk_bytenr != rec->start ||
9645                     dback->bytes != rec->nr || back->broken)
9646                         mismatch = 1;
9647
9648                 if (back->broken) {
9649                         entry->broken++;
9650                         broken_entries++;
9651                 }
9652
9653                 entry->count++;
9654         }
9655
9656         /* Yay all the backrefs agree, carry on good sir */
9657         if (nr_entries <= 1 && !mismatch)
9658                 goto out;
9659
9660         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9661                 "%Lu\n", rec->start);
9662
9663         /*
9664          * First we want to see if the backrefs can agree amongst themselves who
9665          * is right, so figure out which one of the entries has the highest
9666          * count.
9667          */
9668         best = find_most_right_entry(&entries);
9669
9670         /*
9671          * Ok so we may have an even split between what the backrefs think, so
9672          * this is where we use the extent ref to see what it thinks.
9673          */
9674         if (!best) {
9675                 entry = find_entry(&entries, rec->start, rec->nr);
9676                 if (!entry && (!broken_entries || !rec->found_rec)) {
9677                         fprintf(stderr, "Backrefs don't agree with each other "
9678                                 "and extent record doesn't agree with anybody,"
9679                                 " so we can't fix bytenr %Lu bytes %Lu\n",
9680                                 rec->start, rec->nr);
9681                         ret = -EINVAL;
9682                         goto out;
9683                 } else if (!entry) {
9684                         /*
9685                          * Ok our backrefs were broken, we'll assume this is the
9686                          * correct value and add an entry for this range.
9687                          */
9688                         entry = malloc(sizeof(struct extent_entry));
9689                         if (!entry) {
9690                                 ret = -ENOMEM;
9691                                 goto out;
9692                         }
9693                         memset(entry, 0, sizeof(*entry));
9694                         entry->bytenr = rec->start;
9695                         entry->bytes = rec->nr;
9696                         list_add_tail(&entry->list, &entries);
9697                         nr_entries++;
9698                 }
9699                 entry->count++;
9700                 best = find_most_right_entry(&entries);
9701                 if (!best) {
9702                         fprintf(stderr, "Backrefs and extent record evenly "
9703                                 "split on who is right, this is going to "
9704                                 "require user input to fix bytenr %Lu bytes "
9705                                 "%Lu\n", rec->start, rec->nr);
9706                         ret = -EINVAL;
9707                         goto out;
9708                 }
9709         }
9710
9711         /*
9712          * I don't think this can happen currently as we'll abort() if we catch
9713          * this case higher up, but in case somebody removes that we still can't
9714          * deal with it properly here yet, so just bail out of that's the case.
9715          */
9716         if (best->bytenr != rec->start) {
9717                 fprintf(stderr, "Extent start and backref starts don't match, "
9718                         "please use btrfs-image on this file system and send "
9719                         "it to a btrfs developer so they can make fsck fix "
9720                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
9721                         rec->start, rec->nr);
9722                 ret = -EINVAL;
9723                 goto out;
9724         }
9725
9726         /*
9727          * Ok great we all agreed on an extent record, let's go find the real
9728          * references and fix up the ones that don't match.
9729          */
9730         rbtree_postorder_for_each_entry_safe(back, tmp,
9731                                              &rec->backref_tree, node) {
9732                 if (back->full_backref || !back->is_data)
9733                         continue;
9734
9735                 dback = to_data_backref(back);
9736
9737                 /*
9738                  * Still ignoring backrefs that don't have a real ref attached
9739                  * to them.
9740                  */
9741                 if (dback->found_ref == 0)
9742                         continue;
9743
9744                 if (dback->bytes == best->bytes &&
9745                     dback->disk_bytenr == best->bytenr)
9746                         continue;
9747
9748                 ret = repair_ref(info, path, dback, best);
9749                 if (ret)
9750                         goto out;
9751         }
9752
9753         /*
9754          * Ok we messed with the actual refs, which means we need to drop our
9755          * entire cache and go back and rescan.  I know this is a huge pain and
9756          * adds a lot of extra work, but it's the only way to be safe.  Once all
9757          * the backrefs agree we may not need to do anything to the extent
9758          * record itself.
9759          */
9760         ret = -EAGAIN;
9761 out:
9762         while (!list_empty(&entries)) {
9763                 entry = list_entry(entries.next, struct extent_entry, list);
9764                 list_del_init(&entry->list);
9765                 free(entry);
9766         }
9767         return ret;
9768 }
9769
9770 static int process_duplicates(struct cache_tree *extent_cache,
9771                               struct extent_record *rec)
9772 {
9773         struct extent_record *good, *tmp;
9774         struct cache_extent *cache;
9775         int ret;
9776
9777         /*
9778          * If we found a extent record for this extent then return, or if we
9779          * have more than one duplicate we are likely going to need to delete
9780          * something.
9781          */
9782         if (rec->found_rec || rec->num_duplicates > 1)
9783                 return 0;
9784
9785         /* Shouldn't happen but just in case */
9786         BUG_ON(!rec->num_duplicates);
9787
9788         /*
9789          * So this happens if we end up with a backref that doesn't match the
9790          * actual extent entry.  So either the backref is bad or the extent
9791          * entry is bad.  Either way we want to have the extent_record actually
9792          * reflect what we found in the extent_tree, so we need to take the
9793          * duplicate out and use that as the extent_record since the only way we
9794          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9795          */
9796         remove_cache_extent(extent_cache, &rec->cache);
9797
9798         good = to_extent_record(rec->dups.next);
9799         list_del_init(&good->list);
9800         INIT_LIST_HEAD(&good->backrefs);
9801         INIT_LIST_HEAD(&good->dups);
9802         good->cache.start = good->start;
9803         good->cache.size = good->nr;
9804         good->content_checked = 0;
9805         good->owner_ref_checked = 0;
9806         good->num_duplicates = 0;
9807         good->refs = rec->refs;
9808         list_splice_init(&rec->backrefs, &good->backrefs);
9809         while (1) {
9810                 cache = lookup_cache_extent(extent_cache, good->start,
9811                                             good->nr);
9812                 if (!cache)
9813                         break;
9814                 tmp = container_of(cache, struct extent_record, cache);
9815
9816                 /*
9817                  * If we find another overlapping extent and it's found_rec is
9818                  * set then it's a duplicate and we need to try and delete
9819                  * something.
9820                  */
9821                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9822                         if (list_empty(&good->list))
9823                                 list_add_tail(&good->list,
9824                                               &duplicate_extents);
9825                         good->num_duplicates += tmp->num_duplicates + 1;
9826                         list_splice_init(&tmp->dups, &good->dups);
9827                         list_del_init(&tmp->list);
9828                         list_add_tail(&tmp->list, &good->dups);
9829                         remove_cache_extent(extent_cache, &tmp->cache);
9830                         continue;
9831                 }
9832
9833                 /*
9834                  * Ok we have another non extent item backed extent rec, so lets
9835                  * just add it to this extent and carry on like we did above.
9836                  */
9837                 good->refs += tmp->refs;
9838                 list_splice_init(&tmp->backrefs, &good->backrefs);
9839                 remove_cache_extent(extent_cache, &tmp->cache);
9840                 free(tmp);
9841         }
9842         ret = insert_cache_extent(extent_cache, &good->cache);
9843         BUG_ON(ret);
9844         free(rec);
9845         return good->num_duplicates ? 0 : 1;
9846 }
9847
9848 static int delete_duplicate_records(struct btrfs_root *root,
9849                                     struct extent_record *rec)
9850 {
9851         struct btrfs_trans_handle *trans;
9852         LIST_HEAD(delete_list);
9853         struct btrfs_path path;
9854         struct extent_record *tmp, *good, *n;
9855         int nr_del = 0;
9856         int ret = 0, err;
9857         struct btrfs_key key;
9858
9859         btrfs_init_path(&path);
9860
9861         good = rec;
9862         /* Find the record that covers all of the duplicates. */
9863         list_for_each_entry(tmp, &rec->dups, list) {
9864                 if (good->start < tmp->start)
9865                         continue;
9866                 if (good->nr > tmp->nr)
9867                         continue;
9868
9869                 if (tmp->start + tmp->nr < good->start + good->nr) {
9870                         fprintf(stderr, "Ok we have overlapping extents that "
9871                                 "aren't completely covered by each other, this "
9872                                 "is going to require more careful thought.  "
9873                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9874                                 tmp->start, tmp->nr, good->start, good->nr);
9875                         abort();
9876                 }
9877                 good = tmp;
9878         }
9879
9880         if (good != rec)
9881                 list_add_tail(&rec->list, &delete_list);
9882
9883         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9884                 if (tmp == good)
9885                         continue;
9886                 list_move_tail(&tmp->list, &delete_list);
9887         }
9888
9889         root = root->fs_info->extent_root;
9890         trans = btrfs_start_transaction(root, 1);
9891         if (IS_ERR(trans)) {
9892                 ret = PTR_ERR(trans);
9893                 goto out;
9894         }
9895
9896         list_for_each_entry(tmp, &delete_list, list) {
9897                 if (tmp->found_rec == 0)
9898                         continue;
9899                 key.objectid = tmp->start;
9900                 key.type = BTRFS_EXTENT_ITEM_KEY;
9901                 key.offset = tmp->nr;
9902
9903                 /* Shouldn't happen but just in case */
9904                 if (tmp->metadata) {
9905                         fprintf(stderr, "Well this shouldn't happen, extent "
9906                                 "record overlaps but is metadata? "
9907                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9908                         abort();
9909                 }
9910
9911                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9912                 if (ret) {
9913                         if (ret > 0)
9914                                 ret = -EINVAL;
9915                         break;
9916                 }
9917                 ret = btrfs_del_item(trans, root, &path);
9918                 if (ret)
9919                         break;
9920                 btrfs_release_path(&path);
9921                 nr_del++;
9922         }
9923         err = btrfs_commit_transaction(trans, root);
9924         if (err && !ret)
9925                 ret = err;
9926 out:
9927         while (!list_empty(&delete_list)) {
9928                 tmp = to_extent_record(delete_list.next);
9929                 list_del_init(&tmp->list);
9930                 if (tmp == rec)
9931                         continue;
9932                 free(tmp);
9933         }
9934
9935         while (!list_empty(&rec->dups)) {
9936                 tmp = to_extent_record(rec->dups.next);
9937                 list_del_init(&tmp->list);
9938                 free(tmp);
9939         }
9940
9941         btrfs_release_path(&path);
9942
9943         if (!ret && !nr_del)
9944                 rec->num_duplicates = 0;
9945
9946         return ret ? ret : nr_del;
9947 }
9948
9949 static int find_possible_backrefs(struct btrfs_fs_info *info,
9950                                   struct btrfs_path *path,
9951                                   struct cache_tree *extent_cache,
9952                                   struct extent_record *rec)
9953 {
9954         struct btrfs_root *root;
9955         struct extent_backref *back, *tmp;
9956         struct data_backref *dback;
9957         struct cache_extent *cache;
9958         struct btrfs_file_extent_item *fi;
9959         struct btrfs_key key;
9960         u64 bytenr, bytes;
9961         int ret;
9962
9963         rbtree_postorder_for_each_entry_safe(back, tmp,
9964                                              &rec->backref_tree, node) {
9965                 /* Don't care about full backrefs (poor unloved backrefs) */
9966                 if (back->full_backref || !back->is_data)
9967                         continue;
9968
9969                 dback = to_data_backref(back);
9970
9971                 /* We found this one, we don't need to do a lookup */
9972                 if (dback->found_ref)
9973                         continue;
9974
9975                 key.objectid = dback->root;
9976                 key.type = BTRFS_ROOT_ITEM_KEY;
9977                 key.offset = (u64)-1;
9978
9979                 root = btrfs_read_fs_root(info, &key);
9980
9981                 /* No root, definitely a bad ref, skip */
9982                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9983                         continue;
9984                 /* Other err, exit */
9985                 if (IS_ERR(root))
9986                         return PTR_ERR(root);
9987
9988                 key.objectid = dback->owner;
9989                 key.type = BTRFS_EXTENT_DATA_KEY;
9990                 key.offset = dback->offset;
9991                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9992                 if (ret) {
9993                         btrfs_release_path(path);
9994                         if (ret < 0)
9995                                 return ret;
9996                         /* Didn't find it, we can carry on */
9997                         ret = 0;
9998                         continue;
9999                 }
10000
10001                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10002                                     struct btrfs_file_extent_item);
10003                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10004                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10005                 btrfs_release_path(path);
10006                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10007                 if (cache) {
10008                         struct extent_record *tmp;
10009                         tmp = container_of(cache, struct extent_record, cache);
10010
10011                         /*
10012                          * If we found an extent record for the bytenr for this
10013                          * particular backref then we can't add it to our
10014                          * current extent record.  We only want to add backrefs
10015                          * that don't have a corresponding extent item in the
10016                          * extent tree since they likely belong to this record
10017                          * and we need to fix it if it doesn't match bytenrs.
10018                          */
10019                         if  (tmp->found_rec)
10020                                 continue;
10021                 }
10022
10023                 dback->found_ref += 1;
10024                 dback->disk_bytenr = bytenr;
10025                 dback->bytes = bytes;
10026
10027                 /*
10028                  * Set this so the verify backref code knows not to trust the
10029                  * values in this backref.
10030                  */
10031                 back->broken = 1;
10032         }
10033
10034         return 0;
10035 }
10036
10037 /*
10038  * Record orphan data ref into corresponding root.
10039  *
10040  * Return 0 if the extent item contains data ref and recorded.
10041  * Return 1 if the extent item contains no useful data ref
10042  *   On that case, it may contains only shared_dataref or metadata backref
10043  *   or the file extent exists(this should be handled by the extent bytenr
10044  *   recovery routine)
10045  * Return <0 if something goes wrong.
10046  */
10047 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10048                                       struct extent_record *rec)
10049 {
10050         struct btrfs_key key;
10051         struct btrfs_root *dest_root;
10052         struct extent_backref *back, *tmp;
10053         struct data_backref *dback;
10054         struct orphan_data_extent *orphan;
10055         struct btrfs_path path;
10056         int recorded_data_ref = 0;
10057         int ret = 0;
10058
10059         if (rec->metadata)
10060                 return 1;
10061         btrfs_init_path(&path);
10062         rbtree_postorder_for_each_entry_safe(back, tmp,
10063                                              &rec->backref_tree, node) {
10064                 if (back->full_backref || !back->is_data ||
10065                     !back->found_extent_tree)
10066                         continue;
10067                 dback = to_data_backref(back);
10068                 if (dback->found_ref)
10069                         continue;
10070                 key.objectid = dback->root;
10071                 key.type = BTRFS_ROOT_ITEM_KEY;
10072                 key.offset = (u64)-1;
10073
10074                 dest_root = btrfs_read_fs_root(fs_info, &key);
10075
10076                 /* For non-exist root we just skip it */
10077                 if (IS_ERR(dest_root) || !dest_root)
10078                         continue;
10079
10080                 key.objectid = dback->owner;
10081                 key.type = BTRFS_EXTENT_DATA_KEY;
10082                 key.offset = dback->offset;
10083
10084                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10085                 btrfs_release_path(&path);
10086                 /*
10087                  * For ret < 0, it's OK since the fs-tree may be corrupted,
10088                  * we need to record it for inode/file extent rebuild.
10089                  * For ret > 0, we record it only for file extent rebuild.
10090                  * For ret == 0, the file extent exists but only bytenr
10091                  * mismatch, let the original bytenr fix routine to handle,
10092                  * don't record it.
10093                  */
10094                 if (ret == 0)
10095                         continue;
10096                 ret = 0;
10097                 orphan = malloc(sizeof(*orphan));
10098                 if (!orphan) {
10099                         ret = -ENOMEM;
10100                         goto out;
10101                 }
10102                 INIT_LIST_HEAD(&orphan->list);
10103                 orphan->root = dback->root;
10104                 orphan->objectid = dback->owner;
10105                 orphan->offset = dback->offset;
10106                 orphan->disk_bytenr = rec->cache.start;
10107                 orphan->disk_len = rec->cache.size;
10108                 list_add(&dest_root->orphan_data_extents, &orphan->list);
10109                 recorded_data_ref = 1;
10110         }
10111 out:
10112         btrfs_release_path(&path);
10113         if (!ret)
10114                 return !recorded_data_ref;
10115         else
10116                 return ret;
10117 }
10118
10119 /*
10120  * when an incorrect extent item is found, this will delete
10121  * all of the existing entries for it and recreate them
10122  * based on what the tree scan found.
10123  */
10124 static int fixup_extent_refs(struct btrfs_fs_info *info,
10125                              struct cache_tree *extent_cache,
10126                              struct extent_record *rec)
10127 {
10128         struct btrfs_trans_handle *trans = NULL;
10129         int ret;
10130         struct btrfs_path path;
10131         struct cache_extent *cache;
10132         struct extent_backref *back, *tmp;
10133         int allocated = 0;
10134         u64 flags = 0;
10135
10136         if (rec->flag_block_full_backref)
10137                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10138
10139         btrfs_init_path(&path);
10140         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10141                 /*
10142                  * Sometimes the backrefs themselves are so broken they don't
10143                  * get attached to any meaningful rec, so first go back and
10144                  * check any of our backrefs that we couldn't find and throw
10145                  * them into the list if we find the backref so that
10146                  * verify_backrefs can figure out what to do.
10147                  */
10148                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10149                 if (ret < 0)
10150                         goto out;
10151         }
10152
10153         /* step one, make sure all of the backrefs agree */
10154         ret = verify_backrefs(info, &path, rec);
10155         if (ret < 0)
10156                 goto out;
10157
10158         trans = btrfs_start_transaction(info->extent_root, 1);
10159         if (IS_ERR(trans)) {
10160                 ret = PTR_ERR(trans);
10161                 goto out;
10162         }
10163
10164         /* step two, delete all the existing records */
10165         ret = delete_extent_records(trans, info->extent_root, &path,
10166                                     rec->start);
10167
10168         if (ret < 0)
10169                 goto out;
10170
10171         /* was this block corrupt?  If so, don't add references to it */
10172         cache = lookup_cache_extent(info->corrupt_blocks,
10173                                     rec->start, rec->max_size);
10174         if (cache) {
10175                 ret = 0;
10176                 goto out;
10177         }
10178
10179         /* step three, recreate all the refs we did find */
10180         rbtree_postorder_for_each_entry_safe(back, tmp,
10181                                              &rec->backref_tree, node) {
10182                 /*
10183                  * if we didn't find any references, don't create a
10184                  * new extent record
10185                  */
10186                 if (!back->found_ref)
10187                         continue;
10188
10189                 rec->bad_full_backref = 0;
10190                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10191                 allocated = 1;
10192
10193                 if (ret)
10194                         goto out;
10195         }
10196 out:
10197         if (trans) {
10198                 int err = btrfs_commit_transaction(trans, info->extent_root);
10199                 if (!ret)
10200                         ret = err;
10201         }
10202
10203         if (!ret)
10204                 fprintf(stderr, "Repaired extent references for %llu\n",
10205                                 (unsigned long long)rec->start);
10206
10207         btrfs_release_path(&path);
10208         return ret;
10209 }
10210
10211 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10212                               struct extent_record *rec)
10213 {
10214         struct btrfs_trans_handle *trans;
10215         struct btrfs_root *root = fs_info->extent_root;
10216         struct btrfs_path path;
10217         struct btrfs_extent_item *ei;
10218         struct btrfs_key key;
10219         u64 flags;
10220         int ret = 0;
10221
10222         key.objectid = rec->start;
10223         if (rec->metadata) {
10224                 key.type = BTRFS_METADATA_ITEM_KEY;
10225                 key.offset = rec->info_level;
10226         } else {
10227                 key.type = BTRFS_EXTENT_ITEM_KEY;
10228                 key.offset = rec->max_size;
10229         }
10230
10231         trans = btrfs_start_transaction(root, 0);
10232         if (IS_ERR(trans))
10233                 return PTR_ERR(trans);
10234
10235         btrfs_init_path(&path);
10236         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10237         if (ret < 0) {
10238                 btrfs_release_path(&path);
10239                 btrfs_commit_transaction(trans, root);
10240                 return ret;
10241         } else if (ret) {
10242                 fprintf(stderr, "Didn't find extent for %llu\n",
10243                         (unsigned long long)rec->start);
10244                 btrfs_release_path(&path);
10245                 btrfs_commit_transaction(trans, root);
10246                 return -ENOENT;
10247         }
10248
10249         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10250                             struct btrfs_extent_item);
10251         flags = btrfs_extent_flags(path.nodes[0], ei);
10252         if (rec->flag_block_full_backref) {
10253                 fprintf(stderr, "setting full backref on %llu\n",
10254                         (unsigned long long)key.objectid);
10255                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10256         } else {
10257                 fprintf(stderr, "clearing full backref on %llu\n",
10258                         (unsigned long long)key.objectid);
10259                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10260         }
10261         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10262         btrfs_mark_buffer_dirty(path.nodes[0]);
10263         btrfs_release_path(&path);
10264         ret = btrfs_commit_transaction(trans, root);
10265         if (!ret)
10266                 fprintf(stderr, "Repaired extent flags for %llu\n",
10267                                 (unsigned long long)rec->start);
10268
10269         return ret;
10270 }
10271
10272 /* right now we only prune from the extent allocation tree */
10273 static int prune_one_block(struct btrfs_trans_handle *trans,
10274                            struct btrfs_fs_info *info,
10275                            struct btrfs_corrupt_block *corrupt)
10276 {
10277         int ret;
10278         struct btrfs_path path;
10279         struct extent_buffer *eb;
10280         u64 found;
10281         int slot;
10282         int nritems;
10283         int level = corrupt->level + 1;
10284
10285         btrfs_init_path(&path);
10286 again:
10287         /* we want to stop at the parent to our busted block */
10288         path.lowest_level = level;
10289
10290         ret = btrfs_search_slot(trans, info->extent_root,
10291                                 &corrupt->key, &path, -1, 1);
10292
10293         if (ret < 0)
10294                 goto out;
10295
10296         eb = path.nodes[level];
10297         if (!eb) {
10298                 ret = -ENOENT;
10299                 goto out;
10300         }
10301
10302         /*
10303          * hopefully the search gave us the block we want to prune,
10304          * lets try that first
10305          */
10306         slot = path.slots[level];
10307         found =  btrfs_node_blockptr(eb, slot);
10308         if (found == corrupt->cache.start)
10309                 goto del_ptr;
10310
10311         nritems = btrfs_header_nritems(eb);
10312
10313         /* the search failed, lets scan this node and hope we find it */
10314         for (slot = 0; slot < nritems; slot++) {
10315                 found =  btrfs_node_blockptr(eb, slot);
10316                 if (found == corrupt->cache.start)
10317                         goto del_ptr;
10318         }
10319         /*
10320          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10321          * to this block
10322          */
10323         if (eb == info->extent_root->node) {
10324                 ret = -ENOENT;
10325                 goto out;
10326         } else {
10327                 level++;
10328                 btrfs_release_path(&path);
10329                 goto again;
10330         }
10331
10332 del_ptr:
10333         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10334         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10335
10336 out:
10337         btrfs_release_path(&path);
10338         return ret;
10339 }
10340
10341 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10342 {
10343         struct btrfs_trans_handle *trans = NULL;
10344         struct cache_extent *cache;
10345         struct btrfs_corrupt_block *corrupt;
10346
10347         while (1) {
10348                 cache = search_cache_extent(info->corrupt_blocks, 0);
10349                 if (!cache)
10350                         break;
10351                 if (!trans) {
10352                         trans = btrfs_start_transaction(info->extent_root, 1);
10353                         if (IS_ERR(trans))
10354                                 return PTR_ERR(trans);
10355                 }
10356                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10357                 prune_one_block(trans, info, corrupt);
10358                 remove_cache_extent(info->corrupt_blocks, cache);
10359         }
10360         if (trans)
10361                 return btrfs_commit_transaction(trans, info->extent_root);
10362         return 0;
10363 }
10364
10365 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10366 {
10367         struct btrfs_block_group_cache *cache;
10368         u64 start, end;
10369         int ret;
10370
10371         while (1) {
10372                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10373                                             &start, &end, EXTENT_DIRTY);
10374                 if (ret)
10375                         break;
10376                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10377         }
10378
10379         start = 0;
10380         while (1) {
10381                 cache = btrfs_lookup_first_block_group(fs_info, start);
10382                 if (!cache)
10383                         break;
10384                 if (cache->cached)
10385                         cache->cached = 0;
10386                 start = cache->key.objectid + cache->key.offset;
10387         }
10388 }
10389
10390 static int check_extent_refs(struct btrfs_root *root,
10391                              struct cache_tree *extent_cache)
10392 {
10393         struct extent_record *rec;
10394         struct cache_extent *cache;
10395         int ret = 0;
10396         int had_dups = 0;
10397
10398         if (repair) {
10399                 /*
10400                  * if we're doing a repair, we have to make sure
10401                  * we don't allocate from the problem extents.
10402                  * In the worst case, this will be all the
10403                  * extents in the FS
10404                  */
10405                 cache = search_cache_extent(extent_cache, 0);
10406                 while(cache) {
10407                         rec = container_of(cache, struct extent_record, cache);
10408                         set_extent_dirty(root->fs_info->excluded_extents,
10409                                          rec->start,
10410                                          rec->start + rec->max_size - 1);
10411                         cache = next_cache_extent(cache);
10412                 }
10413
10414                 /* pin down all the corrupted blocks too */
10415                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10416                 while(cache) {
10417                         set_extent_dirty(root->fs_info->excluded_extents,
10418                                          cache->start,
10419                                          cache->start + cache->size - 1);
10420                         cache = next_cache_extent(cache);
10421                 }
10422                 prune_corrupt_blocks(root->fs_info);
10423                 reset_cached_block_groups(root->fs_info);
10424         }
10425
10426         reset_cached_block_groups(root->fs_info);
10427
10428         /*
10429          * We need to delete any duplicate entries we find first otherwise we
10430          * could mess up the extent tree when we have backrefs that actually
10431          * belong to a different extent item and not the weird duplicate one.
10432          */
10433         while (repair && !list_empty(&duplicate_extents)) {
10434                 rec = to_extent_record(duplicate_extents.next);
10435                 list_del_init(&rec->list);
10436
10437                 /* Sometimes we can find a backref before we find an actual
10438                  * extent, so we need to process it a little bit to see if there
10439                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10440                  * if this is a backref screwup.  If we need to delete stuff
10441                  * process_duplicates() will return 0, otherwise it will return
10442                  * 1 and we
10443                  */
10444                 if (process_duplicates(extent_cache, rec))
10445                         continue;
10446                 ret = delete_duplicate_records(root, rec);
10447                 if (ret < 0)
10448                         return ret;
10449                 /*
10450                  * delete_duplicate_records will return the number of entries
10451                  * deleted, so if it's greater than 0 then we know we actually
10452                  * did something and we need to remove.
10453                  */
10454                 if (ret)
10455                         had_dups = 1;
10456         }
10457
10458         if (had_dups)
10459                 return -EAGAIN;
10460
10461         while(1) {
10462                 int cur_err = 0;
10463                 int fix = 0;
10464
10465                 cache = search_cache_extent(extent_cache, 0);
10466                 if (!cache)
10467                         break;
10468                 rec = container_of(cache, struct extent_record, cache);
10469                 if (rec->num_duplicates) {
10470                         fprintf(stderr, "extent item %llu has multiple extent "
10471                                 "items\n", (unsigned long long)rec->start);
10472                         cur_err = 1;
10473                 }
10474
10475                 if (rec->refs != rec->extent_item_refs) {
10476                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10477                                 (unsigned long long)rec->start,
10478                                 (unsigned long long)rec->nr);
10479                         fprintf(stderr, "extent item %llu, found %llu\n",
10480                                 (unsigned long long)rec->extent_item_refs,
10481                                 (unsigned long long)rec->refs);
10482                         ret = record_orphan_data_extents(root->fs_info, rec);
10483                         if (ret < 0)
10484                                 goto repair_abort;
10485                         fix = ret;
10486                         cur_err = 1;
10487                 }
10488                 if (all_backpointers_checked(rec, 1)) {
10489                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10490                                 (unsigned long long)rec->start,
10491                                 (unsigned long long)rec->nr);
10492                         fix = 1;
10493                         cur_err = 1;
10494                 }
10495                 if (!rec->owner_ref_checked) {
10496                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10497                                 (unsigned long long)rec->start,
10498                                 (unsigned long long)rec->nr);
10499                         fix = 1;
10500                         cur_err = 1;
10501                 }
10502
10503                 if (repair && fix) {
10504                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10505                         if (ret)
10506                                 goto repair_abort;
10507                 }
10508
10509
10510                 if (rec->bad_full_backref) {
10511                         fprintf(stderr, "bad full backref, on [%llu]\n",
10512                                 (unsigned long long)rec->start);
10513                         if (repair) {
10514                                 ret = fixup_extent_flags(root->fs_info, rec);
10515                                 if (ret)
10516                                         goto repair_abort;
10517                                 fix = 1;
10518                         }
10519                         cur_err = 1;
10520                 }
10521                 /*
10522                  * Although it's not a extent ref's problem, we reuse this
10523                  * routine for error reporting.
10524                  * No repair function yet.
10525                  */
10526                 if (rec->crossing_stripes) {
10527                         fprintf(stderr,
10528                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10529                                 rec->start, rec->start + rec->max_size);
10530                         cur_err = 1;
10531                 }
10532
10533                 if (rec->wrong_chunk_type) {
10534                         fprintf(stderr,
10535                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10536                                 rec->start, rec->start + rec->max_size);
10537                         cur_err = 1;
10538                 }
10539
10540                 remove_cache_extent(extent_cache, cache);
10541                 free_all_extent_backrefs(rec);
10542                 if (!init_extent_tree && repair && (!cur_err || fix))
10543                         clear_extent_dirty(root->fs_info->excluded_extents,
10544                                            rec->start,
10545                                            rec->start + rec->max_size - 1);
10546                 free(rec);
10547         }
10548 repair_abort:
10549         if (repair) {
10550                 if (ret && ret != -EAGAIN) {
10551                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10552                         exit(1);
10553                 } else if (!ret) {
10554                         struct btrfs_trans_handle *trans;
10555
10556                         root = root->fs_info->extent_root;
10557                         trans = btrfs_start_transaction(root, 1);
10558                         if (IS_ERR(trans)) {
10559                                 ret = PTR_ERR(trans);
10560                                 goto repair_abort;
10561                         }
10562
10563                         ret = btrfs_fix_block_accounting(trans, root);
10564                         if (ret)
10565                                 goto repair_abort;
10566                         ret = btrfs_commit_transaction(trans, root);
10567                         if (ret)
10568                                 goto repair_abort;
10569                 }
10570                 return ret;
10571         }
10572         return 0;
10573 }
10574
10575 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10576 {
10577         u64 stripe_size;
10578
10579         if (type & BTRFS_BLOCK_GROUP_RAID0) {
10580                 stripe_size = length;
10581                 stripe_size /= num_stripes;
10582         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10583                 stripe_size = length * 2;
10584                 stripe_size /= num_stripes;
10585         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10586                 stripe_size = length;
10587                 stripe_size /= (num_stripes - 1);
10588         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10589                 stripe_size = length;
10590                 stripe_size /= (num_stripes - 2);
10591         } else {
10592                 stripe_size = length;
10593         }
10594         return stripe_size;
10595 }
10596
10597 /*
10598  * Check the chunk with its block group/dev list ref:
10599  * Return 0 if all refs seems valid.
10600  * Return 1 if part of refs seems valid, need later check for rebuild ref
10601  * like missing block group and needs to search extent tree to rebuild them.
10602  * Return -1 if essential refs are missing and unable to rebuild.
10603  */
10604 static int check_chunk_refs(struct chunk_record *chunk_rec,
10605                             struct block_group_tree *block_group_cache,
10606                             struct device_extent_tree *dev_extent_cache,
10607                             int silent)
10608 {
10609         struct cache_extent *block_group_item;
10610         struct block_group_record *block_group_rec;
10611         struct cache_extent *dev_extent_item;
10612         struct device_extent_record *dev_extent_rec;
10613         u64 devid;
10614         u64 offset;
10615         u64 length;
10616         int metadump_v2 = 0;
10617         int i;
10618         int ret = 0;
10619
10620         block_group_item = lookup_cache_extent(&block_group_cache->tree,
10621                                                chunk_rec->offset,
10622                                                chunk_rec->length);
10623         if (block_group_item) {
10624                 block_group_rec = container_of(block_group_item,
10625                                                struct block_group_record,
10626                                                cache);
10627                 if (chunk_rec->length != block_group_rec->offset ||
10628                     chunk_rec->offset != block_group_rec->objectid ||
10629                     (!metadump_v2 &&
10630                      chunk_rec->type_flags != block_group_rec->flags)) {
10631                         if (!silent)
10632                                 fprintf(stderr,
10633                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10634                                         chunk_rec->objectid,
10635                                         chunk_rec->type,
10636                                         chunk_rec->offset,
10637                                         chunk_rec->length,
10638                                         chunk_rec->offset,
10639                                         chunk_rec->type_flags,
10640                                         block_group_rec->objectid,
10641                                         block_group_rec->type,
10642                                         block_group_rec->offset,
10643                                         block_group_rec->offset,
10644                                         block_group_rec->objectid,
10645                                         block_group_rec->flags);
10646                         ret = -1;
10647                 } else {
10648                         list_del_init(&block_group_rec->list);
10649                         chunk_rec->bg_rec = block_group_rec;
10650                 }
10651         } else {
10652                 if (!silent)
10653                         fprintf(stderr,
10654                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10655                                 chunk_rec->objectid,
10656                                 chunk_rec->type,
10657                                 chunk_rec->offset,
10658                                 chunk_rec->length,
10659                                 chunk_rec->offset,
10660                                 chunk_rec->type_flags);
10661                 ret = 1;
10662         }
10663
10664         if (metadump_v2)
10665                 return ret;
10666
10667         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10668                                     chunk_rec->num_stripes);
10669         for (i = 0; i < chunk_rec->num_stripes; ++i) {
10670                 devid = chunk_rec->stripes[i].devid;
10671                 offset = chunk_rec->stripes[i].offset;
10672                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10673                                                        devid, offset, length);
10674                 if (dev_extent_item) {
10675                         dev_extent_rec = container_of(dev_extent_item,
10676                                                 struct device_extent_record,
10677                                                 cache);
10678                         if (dev_extent_rec->objectid != devid ||
10679                             dev_extent_rec->offset != offset ||
10680                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
10681                             dev_extent_rec->length != length) {
10682                                 if (!silent)
10683                                         fprintf(stderr,
10684                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10685                                                 chunk_rec->objectid,
10686                                                 chunk_rec->type,
10687                                                 chunk_rec->offset,
10688                                                 chunk_rec->stripes[i].devid,
10689                                                 chunk_rec->stripes[i].offset,
10690                                                 dev_extent_rec->objectid,
10691                                                 dev_extent_rec->offset,
10692                                                 dev_extent_rec->length);
10693                                 ret = -1;
10694                         } else {
10695                                 list_move(&dev_extent_rec->chunk_list,
10696                                           &chunk_rec->dextents);
10697                         }
10698                 } else {
10699                         if (!silent)
10700                                 fprintf(stderr,
10701                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10702                                         chunk_rec->objectid,
10703                                         chunk_rec->type,
10704                                         chunk_rec->offset,
10705                                         chunk_rec->stripes[i].devid,
10706                                         chunk_rec->stripes[i].offset);
10707                         ret = -1;
10708                 }
10709         }
10710         return ret;
10711 }
10712
10713 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10714 int check_chunks(struct cache_tree *chunk_cache,
10715                  struct block_group_tree *block_group_cache,
10716                  struct device_extent_tree *dev_extent_cache,
10717                  struct list_head *good, struct list_head *bad,
10718                  struct list_head *rebuild, int silent)
10719 {
10720         struct cache_extent *chunk_item;
10721         struct chunk_record *chunk_rec;
10722         struct block_group_record *bg_rec;
10723         struct device_extent_record *dext_rec;
10724         int err;
10725         int ret = 0;
10726
10727         chunk_item = first_cache_extent(chunk_cache);
10728         while (chunk_item) {
10729                 chunk_rec = container_of(chunk_item, struct chunk_record,
10730                                          cache);
10731                 err = check_chunk_refs(chunk_rec, block_group_cache,
10732                                        dev_extent_cache, silent);
10733                 if (err < 0)
10734                         ret = err;
10735                 if (err == 0 && good)
10736                         list_add_tail(&chunk_rec->list, good);
10737                 if (err > 0 && rebuild)
10738                         list_add_tail(&chunk_rec->list, rebuild);
10739                 if (err < 0 && bad)
10740                         list_add_tail(&chunk_rec->list, bad);
10741                 chunk_item = next_cache_extent(chunk_item);
10742         }
10743
10744         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10745                 if (!silent)
10746                         fprintf(stderr,
10747                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10748                                 bg_rec->objectid,
10749                                 bg_rec->offset,
10750                                 bg_rec->flags);
10751                 if (!ret)
10752                         ret = 1;
10753         }
10754
10755         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10756                             chunk_list) {
10757                 if (!silent)
10758                         fprintf(stderr,
10759                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10760                                 dext_rec->objectid,
10761                                 dext_rec->offset,
10762                                 dext_rec->length);
10763                 if (!ret)
10764                         ret = 1;
10765         }
10766         return ret;
10767 }
10768
10769
10770 static int check_device_used(struct device_record *dev_rec,
10771                              struct device_extent_tree *dext_cache)
10772 {
10773         struct cache_extent *cache;
10774         struct device_extent_record *dev_extent_rec;
10775         u64 total_byte = 0;
10776
10777         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10778         while (cache) {
10779                 dev_extent_rec = container_of(cache,
10780                                               struct device_extent_record,
10781                                               cache);
10782                 if (dev_extent_rec->objectid != dev_rec->devid)
10783                         break;
10784
10785                 list_del_init(&dev_extent_rec->device_list);
10786                 total_byte += dev_extent_rec->length;
10787                 cache = next_cache_extent(cache);
10788         }
10789
10790         if (total_byte != dev_rec->byte_used) {
10791                 fprintf(stderr,
10792                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10793                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10794                         dev_rec->type, dev_rec->offset);
10795                 return -1;
10796         } else {
10797                 return 0;
10798         }
10799 }
10800
10801 /* check btrfs_dev_item -> btrfs_dev_extent */
10802 static int check_devices(struct rb_root *dev_cache,
10803                          struct device_extent_tree *dev_extent_cache)
10804 {
10805         struct rb_node *dev_node;
10806         struct device_record *dev_rec;
10807         struct device_extent_record *dext_rec;
10808         int err;
10809         int ret = 0;
10810
10811         dev_node = rb_first(dev_cache);
10812         while (dev_node) {
10813                 dev_rec = container_of(dev_node, struct device_record, node);
10814                 err = check_device_used(dev_rec, dev_extent_cache);
10815                 if (err)
10816                         ret = err;
10817
10818                 dev_node = rb_next(dev_node);
10819         }
10820         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10821                             device_list) {
10822                 fprintf(stderr,
10823                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10824                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
10825                 if (!ret)
10826                         ret = 1;
10827         }
10828         return ret;
10829 }
10830
10831 static int add_root_item_to_list(struct list_head *head,
10832                                   u64 objectid, u64 bytenr, u64 last_snapshot,
10833                                   u8 level, u8 drop_level,
10834                                   struct btrfs_key *drop_key)
10835 {
10836
10837         struct root_item_record *ri_rec;
10838         ri_rec = malloc(sizeof(*ri_rec));
10839         if (!ri_rec)
10840                 return -ENOMEM;
10841         ri_rec->bytenr = bytenr;
10842         ri_rec->objectid = objectid;
10843         ri_rec->level = level;
10844         ri_rec->drop_level = drop_level;
10845         ri_rec->last_snapshot = last_snapshot;
10846         if (drop_key)
10847                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10848         list_add_tail(&ri_rec->list, head);
10849
10850         return 0;
10851 }
10852
10853 static void free_root_item_list(struct list_head *list)
10854 {
10855         struct root_item_record *ri_rec;
10856
10857         while (!list_empty(list)) {
10858                 ri_rec = list_first_entry(list, struct root_item_record,
10859                                           list);
10860                 list_del_init(&ri_rec->list);
10861                 free(ri_rec);
10862         }
10863 }
10864
10865 static int deal_root_from_list(struct list_head *list,
10866                                struct btrfs_root *root,
10867                                struct block_info *bits,
10868                                int bits_nr,
10869                                struct cache_tree *pending,
10870                                struct cache_tree *seen,
10871                                struct cache_tree *reada,
10872                                struct cache_tree *nodes,
10873                                struct cache_tree *extent_cache,
10874                                struct cache_tree *chunk_cache,
10875                                struct rb_root *dev_cache,
10876                                struct block_group_tree *block_group_cache,
10877                                struct device_extent_tree *dev_extent_cache)
10878 {
10879         int ret = 0;
10880         u64 last;
10881
10882         while (!list_empty(list)) {
10883                 struct root_item_record *rec;
10884                 struct extent_buffer *buf;
10885                 rec = list_entry(list->next,
10886                                  struct root_item_record, list);
10887                 last = 0;
10888                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10889                 if (!extent_buffer_uptodate(buf)) {
10890                         free_extent_buffer(buf);
10891                         ret = -EIO;
10892                         break;
10893                 }
10894                 ret = add_root_to_pending(buf, extent_cache, pending,
10895                                     seen, nodes, rec->objectid);
10896                 if (ret < 0)
10897                         break;
10898                 /*
10899                  * To rebuild extent tree, we need deal with snapshot
10900                  * one by one, otherwise we deal with node firstly which
10901                  * can maximize readahead.
10902                  */
10903                 while (1) {
10904                         ret = run_next_block(root, bits, bits_nr, &last,
10905                                              pending, seen, reada, nodes,
10906                                              extent_cache, chunk_cache,
10907                                              dev_cache, block_group_cache,
10908                                              dev_extent_cache, rec);
10909                         if (ret != 0)
10910                                 break;
10911                 }
10912                 free_extent_buffer(buf);
10913                 list_del(&rec->list);
10914                 free(rec);
10915                 if (ret < 0)
10916                         break;
10917         }
10918         while (ret >= 0) {
10919                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10920                                      reada, nodes, extent_cache, chunk_cache,
10921                                      dev_cache, block_group_cache,
10922                                      dev_extent_cache, NULL);
10923                 if (ret != 0) {
10924                         if (ret > 0)
10925                                 ret = 0;
10926                         break;
10927                 }
10928         }
10929         return ret;
10930 }
10931
10932 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10933 {
10934         struct rb_root dev_cache;
10935         struct cache_tree chunk_cache;
10936         struct block_group_tree block_group_cache;
10937         struct device_extent_tree dev_extent_cache;
10938         struct cache_tree extent_cache;
10939         struct cache_tree seen;
10940         struct cache_tree pending;
10941         struct cache_tree reada;
10942         struct cache_tree nodes;
10943         struct extent_io_tree excluded_extents;
10944         struct cache_tree corrupt_blocks;
10945         struct btrfs_path path;
10946         struct btrfs_key key;
10947         struct btrfs_key found_key;
10948         int ret, err = 0;
10949         struct block_info *bits;
10950         int bits_nr;
10951         struct extent_buffer *leaf;
10952         int slot;
10953         struct btrfs_root_item ri;
10954         struct list_head dropping_trees;
10955         struct list_head normal_trees;
10956         struct btrfs_root *root1;
10957         struct btrfs_root *root;
10958         u64 objectid;
10959         u8 level;
10960
10961         root = fs_info->fs_root;
10962         dev_cache = RB_ROOT;
10963         cache_tree_init(&chunk_cache);
10964         block_group_tree_init(&block_group_cache);
10965         device_extent_tree_init(&dev_extent_cache);
10966
10967         cache_tree_init(&extent_cache);
10968         cache_tree_init(&seen);
10969         cache_tree_init(&pending);
10970         cache_tree_init(&nodes);
10971         cache_tree_init(&reada);
10972         cache_tree_init(&corrupt_blocks);
10973         extent_io_tree_init(&excluded_extents);
10974         INIT_LIST_HEAD(&dropping_trees);
10975         INIT_LIST_HEAD(&normal_trees);
10976
10977         if (repair) {
10978                 fs_info->excluded_extents = &excluded_extents;
10979                 fs_info->fsck_extent_cache = &extent_cache;
10980                 fs_info->free_extent_hook = free_extent_hook;
10981                 fs_info->corrupt_blocks = &corrupt_blocks;
10982         }
10983
10984         bits_nr = 1024;
10985         bits = malloc(bits_nr * sizeof(struct block_info));
10986         if (!bits) {
10987                 perror("malloc");
10988                 exit(1);
10989         }
10990
10991         if (ctx.progress_enabled) {
10992                 ctx.tp = TASK_EXTENTS;
10993                 task_start(ctx.info);
10994         }
10995
10996 again:
10997         root1 = fs_info->tree_root;
10998         level = btrfs_header_level(root1->node);
10999         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11000                                     root1->node->start, 0, level, 0, NULL);
11001         if (ret < 0)
11002                 goto out;
11003         root1 = fs_info->chunk_root;
11004         level = btrfs_header_level(root1->node);
11005         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11006                                     root1->node->start, 0, level, 0, NULL);
11007         if (ret < 0)
11008                 goto out;
11009         btrfs_init_path(&path);
11010         key.offset = 0;
11011         key.objectid = 0;
11012         key.type = BTRFS_ROOT_ITEM_KEY;
11013         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11014         if (ret < 0)
11015                 goto out;
11016         while(1) {
11017                 leaf = path.nodes[0];
11018                 slot = path.slots[0];
11019                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11020                         ret = btrfs_next_leaf(root, &path);
11021                         if (ret != 0)
11022                                 break;
11023                         leaf = path.nodes[0];
11024                         slot = path.slots[0];
11025                 }
11026                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11027                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11028                         unsigned long offset;
11029                         u64 last_snapshot;
11030
11031                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11032                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11033                         last_snapshot = btrfs_root_last_snapshot(&ri);
11034                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11035                                 level = btrfs_root_level(&ri);
11036                                 ret = add_root_item_to_list(&normal_trees,
11037                                                 found_key.objectid,
11038                                                 btrfs_root_bytenr(&ri),
11039                                                 last_snapshot, level,
11040                                                 0, NULL);
11041                                 if (ret < 0)
11042                                         goto out;
11043                         } else {
11044                                 level = btrfs_root_level(&ri);
11045                                 objectid = found_key.objectid;
11046                                 btrfs_disk_key_to_cpu(&found_key,
11047                                                       &ri.drop_progress);
11048                                 ret = add_root_item_to_list(&dropping_trees,
11049                                                 objectid,
11050                                                 btrfs_root_bytenr(&ri),
11051                                                 last_snapshot, level,
11052                                                 ri.drop_level, &found_key);
11053                                 if (ret < 0)
11054                                         goto out;
11055                         }
11056                 }
11057                 path.slots[0]++;
11058         }
11059         btrfs_release_path(&path);
11060
11061         /*
11062          * check_block can return -EAGAIN if it fixes something, please keep
11063          * this in mind when dealing with return values from these functions, if
11064          * we get -EAGAIN we want to fall through and restart the loop.
11065          */
11066         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11067                                   &seen, &reada, &nodes, &extent_cache,
11068                                   &chunk_cache, &dev_cache, &block_group_cache,
11069                                   &dev_extent_cache);
11070         if (ret < 0) {
11071                 if (ret == -EAGAIN)
11072                         goto loop;
11073                 goto out;
11074         }
11075         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11076                                   &pending, &seen, &reada, &nodes,
11077                                   &extent_cache, &chunk_cache, &dev_cache,
11078                                   &block_group_cache, &dev_extent_cache);
11079         if (ret < 0) {
11080                 if (ret == -EAGAIN)
11081                         goto loop;
11082                 goto out;
11083         }
11084
11085         ret = check_chunks(&chunk_cache, &block_group_cache,
11086                            &dev_extent_cache, NULL, NULL, NULL, 0);
11087         if (ret) {
11088                 if (ret == -EAGAIN)
11089                         goto loop;
11090                 err = ret;
11091         }
11092
11093         ret = check_extent_refs(root, &extent_cache);
11094         if (ret < 0) {
11095                 if (ret == -EAGAIN)
11096                         goto loop;
11097                 goto out;
11098         }
11099
11100         ret = check_devices(&dev_cache, &dev_extent_cache);
11101         if (ret && err)
11102                 ret = err;
11103
11104 out:
11105         task_stop(ctx.info);
11106         if (repair) {
11107                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11108                 extent_io_tree_cleanup(&excluded_extents);
11109                 fs_info->fsck_extent_cache = NULL;
11110                 fs_info->free_extent_hook = NULL;
11111                 fs_info->corrupt_blocks = NULL;
11112                 fs_info->excluded_extents = NULL;
11113         }
11114         free(bits);
11115         free_chunk_cache_tree(&chunk_cache);
11116         free_device_cache_tree(&dev_cache);
11117         free_block_group_tree(&block_group_cache);
11118         free_device_extent_tree(&dev_extent_cache);
11119         free_extent_cache_tree(&seen);
11120         free_extent_cache_tree(&pending);
11121         free_extent_cache_tree(&reada);
11122         free_extent_cache_tree(&nodes);
11123         free_root_item_list(&normal_trees);
11124         free_root_item_list(&dropping_trees);
11125         return ret;
11126 loop:
11127         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11128         free_extent_cache_tree(&seen);
11129         free_extent_cache_tree(&pending);
11130         free_extent_cache_tree(&reada);
11131         free_extent_cache_tree(&nodes);
11132         free_chunk_cache_tree(&chunk_cache);
11133         free_block_group_tree(&block_group_cache);
11134         free_device_cache_tree(&dev_cache);
11135         free_device_extent_tree(&dev_extent_cache);
11136         free_extent_record_cache(&extent_cache);
11137         free_root_item_list(&normal_trees);
11138         free_root_item_list(&dropping_trees);
11139         extent_io_tree_cleanup(&excluded_extents);
11140         goto again;
11141 }
11142
11143 /*
11144  * Check backrefs of a tree block given by @bytenr or @eb.
11145  *
11146  * @root:       the root containing the @bytenr or @eb
11147  * @eb:         tree block extent buffer, can be NULL
11148  * @bytenr:     bytenr of the tree block to search
11149  * @level:      tree level of the tree block
11150  * @owner:      owner of the tree block
11151  *
11152  * Return >0 for any error found and output error message
11153  * Return 0 for no error found
11154  */
11155 static int check_tree_block_ref(struct btrfs_root *root,
11156                                 struct extent_buffer *eb, u64 bytenr,
11157                                 int level, u64 owner)
11158 {
11159         struct btrfs_key key;
11160         struct btrfs_root *extent_root = root->fs_info->extent_root;
11161         struct btrfs_path path;
11162         struct btrfs_extent_item *ei;
11163         struct btrfs_extent_inline_ref *iref;
11164         struct extent_buffer *leaf;
11165         unsigned long end;
11166         unsigned long ptr;
11167         int slot;
11168         int skinny_level;
11169         int type;
11170         u32 nodesize = root->fs_info->nodesize;
11171         u32 item_size;
11172         u64 offset;
11173         int tree_reloc_root = 0;
11174         int found_ref = 0;
11175         int err = 0;
11176         int ret;
11177
11178         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
11179             btrfs_header_bytenr(root->node) == bytenr)
11180                 tree_reloc_root = 1;
11181
11182         btrfs_init_path(&path);
11183         key.objectid = bytenr;
11184         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11185                 key.type = BTRFS_METADATA_ITEM_KEY;
11186         else
11187                 key.type = BTRFS_EXTENT_ITEM_KEY;
11188         key.offset = (u64)-1;
11189
11190         /* Search for the backref in extent tree */
11191         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11192         if (ret < 0) {
11193                 err |= BACKREF_MISSING;
11194                 goto out;
11195         }
11196         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11197         if (ret) {
11198                 err |= BACKREF_MISSING;
11199                 goto out;
11200         }
11201
11202         leaf = path.nodes[0];
11203         slot = path.slots[0];
11204         btrfs_item_key_to_cpu(leaf, &key, slot);
11205
11206         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11207
11208         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11209                 skinny_level = (int)key.offset;
11210                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11211         } else {
11212                 struct btrfs_tree_block_info *info;
11213
11214                 info = (struct btrfs_tree_block_info *)(ei + 1);
11215                 skinny_level = btrfs_tree_block_level(leaf, info);
11216                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11217         }
11218
11219         if (eb) {
11220                 u64 header_gen;
11221                 u64 extent_gen;
11222
11223                 if (!(btrfs_extent_flags(leaf, ei) &
11224                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11225                         error(
11226                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11227                                 key.objectid, nodesize,
11228                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11229                         err = BACKREF_MISMATCH;
11230                 }
11231                 header_gen = btrfs_header_generation(eb);
11232                 extent_gen = btrfs_extent_generation(leaf, ei);
11233                 if (header_gen != extent_gen) {
11234                         error(
11235         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11236                                 key.objectid, nodesize, header_gen,
11237                                 extent_gen);
11238                         err = BACKREF_MISMATCH;
11239                 }
11240                 if (level != skinny_level) {
11241                         error(
11242                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11243                                 key.objectid, nodesize, level, skinny_level);
11244                         err = BACKREF_MISMATCH;
11245                 }
11246                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11247                         error(
11248                         "extent[%llu %u] is referred by other roots than %llu",
11249                                 key.objectid, nodesize, root->objectid);
11250                         err = BACKREF_MISMATCH;
11251                 }
11252         }
11253
11254         /*
11255          * Iterate the extent/metadata item to find the exact backref
11256          */
11257         item_size = btrfs_item_size_nr(leaf, slot);
11258         ptr = (unsigned long)iref;
11259         end = (unsigned long)ei + item_size;
11260         while (ptr < end) {
11261                 iref = (struct btrfs_extent_inline_ref *)ptr;
11262                 type = btrfs_extent_inline_ref_type(leaf, iref);
11263                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11264
11265                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11266                         (offset == root->objectid || offset == owner)) {
11267                         found_ref = 1;
11268                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11269                         /*
11270                          * Backref of tree reloc root points to itself, no need
11271                          * to check backref any more.
11272                          */
11273                         if (tree_reloc_root)
11274                                 found_ref = 1;
11275                         else
11276                         /* Check if the backref points to valid referencer */
11277                                 found_ref = !check_tree_block_ref(root, NULL,
11278                                                 offset, level + 1, owner);
11279                 }
11280
11281                 if (found_ref)
11282                         break;
11283                 ptr += btrfs_extent_inline_ref_size(type);
11284         }
11285
11286         /*
11287          * Inlined extent item doesn't have what we need, check
11288          * TREE_BLOCK_REF_KEY
11289          */
11290         if (!found_ref) {
11291                 btrfs_release_path(&path);
11292                 key.objectid = bytenr;
11293                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11294                 key.offset = root->objectid;
11295
11296                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11297                 if (!ret)
11298                         found_ref = 1;
11299         }
11300         if (!found_ref)
11301                 err |= BACKREF_MISSING;
11302 out:
11303         btrfs_release_path(&path);
11304         if (eb && (err & BACKREF_MISSING))
11305                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
11306                         bytenr, nodesize, owner, level);
11307         return err;
11308 }
11309
11310 /*
11311  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11312  *
11313  * Return >0 any error found and output error message
11314  * Return 0 for no error found
11315  */
11316 static int check_extent_data_item(struct btrfs_root *root,
11317                                   struct extent_buffer *eb, int slot)
11318 {
11319         struct btrfs_file_extent_item *fi;
11320         struct btrfs_path path;
11321         struct btrfs_root *extent_root = root->fs_info->extent_root;
11322         struct btrfs_key fi_key;
11323         struct btrfs_key dbref_key;
11324         struct extent_buffer *leaf;
11325         struct btrfs_extent_item *ei;
11326         struct btrfs_extent_inline_ref *iref;
11327         struct btrfs_extent_data_ref *dref;
11328         u64 owner;
11329         u64 disk_bytenr;
11330         u64 disk_num_bytes;
11331         u64 extent_num_bytes;
11332         u64 extent_flags;
11333         u32 item_size;
11334         unsigned long end;
11335         unsigned long ptr;
11336         int type;
11337         u64 ref_root;
11338         int found_dbackref = 0;
11339         int err = 0;
11340         int ret;
11341
11342         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11343         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11344
11345         /* Nothing to check for hole and inline data extents */
11346         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11347             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11348                 return 0;
11349
11350         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11351         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11352         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11353
11354         /* Check unaligned disk_num_bytes and num_bytes */
11355         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11356                 error(
11357 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11358                         fi_key.objectid, fi_key.offset, disk_num_bytes,
11359                         root->fs_info->sectorsize);
11360                 err |= BYTES_UNALIGNED;
11361         } else {
11362                 data_bytes_allocated += disk_num_bytes;
11363         }
11364         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11365                 error(
11366 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11367                         fi_key.objectid, fi_key.offset, extent_num_bytes,
11368                         root->fs_info->sectorsize);
11369                 err |= BYTES_UNALIGNED;
11370         } else {
11371                 data_bytes_referenced += extent_num_bytes;
11372         }
11373         owner = btrfs_header_owner(eb);
11374
11375         /* Check the extent item of the file extent in extent tree */
11376         btrfs_init_path(&path);
11377         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11378         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11379         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11380
11381         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11382         if (ret)
11383                 goto out;
11384
11385         leaf = path.nodes[0];
11386         slot = path.slots[0];
11387         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11388
11389         extent_flags = btrfs_extent_flags(leaf, ei);
11390
11391         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11392                 error(
11393                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11394                     disk_bytenr, disk_num_bytes,
11395                     BTRFS_EXTENT_FLAG_DATA);
11396                 err |= BACKREF_MISMATCH;
11397         }
11398
11399         /* Check data backref inside that extent item */
11400         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11401         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11402         ptr = (unsigned long)iref;
11403         end = (unsigned long)ei + item_size;
11404         while (ptr < end) {
11405                 iref = (struct btrfs_extent_inline_ref *)ptr;
11406                 type = btrfs_extent_inline_ref_type(leaf, iref);
11407                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11408
11409                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11410                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
11411                         if (ref_root == owner || ref_root == root->objectid)
11412                                 found_dbackref = 1;
11413                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11414                         found_dbackref = !check_tree_block_ref(root, NULL,
11415                                 btrfs_extent_inline_ref_offset(leaf, iref),
11416                                 0, owner);
11417                 }
11418
11419                 if (found_dbackref)
11420                         break;
11421                 ptr += btrfs_extent_inline_ref_size(type);
11422         }
11423
11424         if (!found_dbackref) {
11425                 btrfs_release_path(&path);
11426
11427                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11428                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11429                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11430                 dbref_key.offset = hash_extent_data_ref(root->objectid,
11431                                 fi_key.objectid, fi_key.offset);
11432
11433                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11434                                         &dbref_key, &path, 0, 0);
11435                 if (!ret) {
11436                         found_dbackref = 1;
11437                         goto out;
11438                 }
11439
11440                 btrfs_release_path(&path);
11441
11442                 /*
11443                  * Neither inlined nor EXTENT_DATA_REF found, try
11444                  * SHARED_DATA_REF as last chance.
11445                  */
11446                 dbref_key.objectid = disk_bytenr;
11447                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11448                 dbref_key.offset = eb->start;
11449
11450                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11451                                         &dbref_key, &path, 0, 0);
11452                 if (!ret) {
11453                         found_dbackref = 1;
11454                         goto out;
11455                 }
11456         }
11457
11458 out:
11459         if (!found_dbackref)
11460                 err |= BACKREF_MISSING;
11461         btrfs_release_path(&path);
11462         if (err & BACKREF_MISSING) {
11463                 error("data extent[%llu %llu] backref lost",
11464                       disk_bytenr, disk_num_bytes);
11465         }
11466         return err;
11467 }
11468
11469 /*
11470  * Get real tree block level for the case like shared block
11471  * Return >= 0 as tree level
11472  * Return <0 for error
11473  */
11474 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11475 {
11476         struct extent_buffer *eb;
11477         struct btrfs_path path;
11478         struct btrfs_key key;
11479         struct btrfs_extent_item *ei;
11480         u64 flags;
11481         u64 transid;
11482         u8 backref_level;
11483         u8 header_level;
11484         int ret;
11485
11486         /* Search extent tree for extent generation and level */
11487         key.objectid = bytenr;
11488         key.type = BTRFS_METADATA_ITEM_KEY;
11489         key.offset = (u64)-1;
11490
11491         btrfs_init_path(&path);
11492         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11493         if (ret < 0)
11494                 goto release_out;
11495         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11496         if (ret < 0)
11497                 goto release_out;
11498         if (ret > 0) {
11499                 ret = -ENOENT;
11500                 goto release_out;
11501         }
11502
11503         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11504         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11505                             struct btrfs_extent_item);
11506         flags = btrfs_extent_flags(path.nodes[0], ei);
11507         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11508                 ret = -ENOENT;
11509                 goto release_out;
11510         }
11511
11512         /* Get transid for later read_tree_block() check */
11513         transid = btrfs_extent_generation(path.nodes[0], ei);
11514
11515         /* Get backref level as one source */
11516         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11517                 backref_level = key.offset;
11518         } else {
11519                 struct btrfs_tree_block_info *info;
11520
11521                 info = (struct btrfs_tree_block_info *)(ei + 1);
11522                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11523         }
11524         btrfs_release_path(&path);
11525
11526         /* Get level from tree block as an alternative source */
11527         eb = read_tree_block(fs_info, bytenr, transid);
11528         if (!extent_buffer_uptodate(eb)) {
11529                 free_extent_buffer(eb);
11530                 return -EIO;
11531         }
11532         header_level = btrfs_header_level(eb);
11533         free_extent_buffer(eb);
11534
11535         if (header_level != backref_level)
11536                 return -EIO;
11537         return header_level;
11538
11539 release_out:
11540         btrfs_release_path(&path);
11541         return ret;
11542 }
11543
11544 /*
11545  * Check if a tree block backref is valid (points to a valid tree block)
11546  * if level == -1, level will be resolved
11547  * Return >0 for any error found and print error message
11548  */
11549 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11550                                     u64 bytenr, int level)
11551 {
11552         struct btrfs_root *root;
11553         struct btrfs_key key;
11554         struct btrfs_path path;
11555         struct extent_buffer *eb;
11556         struct extent_buffer *node;
11557         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11558         int err = 0;
11559         int ret;
11560
11561         /* Query level for level == -1 special case */
11562         if (level == -1)
11563                 level = query_tree_block_level(fs_info, bytenr);
11564         if (level < 0) {
11565                 err |= REFERENCER_MISSING;
11566                 goto out;
11567         }
11568
11569         key.objectid = root_id;
11570         key.type = BTRFS_ROOT_ITEM_KEY;
11571         key.offset = (u64)-1;
11572
11573         root = btrfs_read_fs_root(fs_info, &key);
11574         if (IS_ERR(root)) {
11575                 err |= REFERENCER_MISSING;
11576                 goto out;
11577         }
11578
11579         /* Read out the tree block to get item/node key */
11580         eb = read_tree_block(fs_info, bytenr, 0);
11581         if (!extent_buffer_uptodate(eb)) {
11582                 err |= REFERENCER_MISSING;
11583                 free_extent_buffer(eb);
11584                 goto out;
11585         }
11586
11587         /* Empty tree, no need to check key */
11588         if (!btrfs_header_nritems(eb) && !level) {
11589                 free_extent_buffer(eb);
11590                 goto out;
11591         }
11592
11593         if (level)
11594                 btrfs_node_key_to_cpu(eb, &key, 0);
11595         else
11596                 btrfs_item_key_to_cpu(eb, &key, 0);
11597
11598         free_extent_buffer(eb);
11599
11600         btrfs_init_path(&path);
11601         path.lowest_level = level;
11602         /* Search with the first key, to ensure we can reach it */
11603         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11604         if (ret < 0) {
11605                 err |= REFERENCER_MISSING;
11606                 goto release_out;
11607         }
11608
11609         node = path.nodes[level];
11610         if (btrfs_header_bytenr(node) != bytenr) {
11611                 error(
11612         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11613                         bytenr, nodesize, bytenr,
11614                         btrfs_header_bytenr(node));
11615                 err |= REFERENCER_MISMATCH;
11616         }
11617         if (btrfs_header_level(node) != level) {
11618                 error(
11619         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11620                         bytenr, nodesize, level,
11621                         btrfs_header_level(node));
11622                 err |= REFERENCER_MISMATCH;
11623         }
11624
11625 release_out:
11626         btrfs_release_path(&path);
11627 out:
11628         if (err & REFERENCER_MISSING) {
11629                 if (level < 0)
11630                         error("extent [%llu %d] lost referencer (owner: %llu)",
11631                                 bytenr, nodesize, root_id);
11632                 else
11633                         error(
11634                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11635                                 bytenr, nodesize, root_id, level);
11636         }
11637
11638         return err;
11639 }
11640
11641 /*
11642  * Check if tree block @eb is tree reloc root.
11643  * Return 0 if it's not or any problem happens
11644  * Return 1 if it's a tree reloc root
11645  */
11646 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11647                                  struct extent_buffer *eb)
11648 {
11649         struct btrfs_root *tree_reloc_root;
11650         struct btrfs_key key;
11651         u64 bytenr = btrfs_header_bytenr(eb);
11652         u64 owner = btrfs_header_owner(eb);
11653         int ret = 0;
11654
11655         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11656         key.offset = owner;
11657         key.type = BTRFS_ROOT_ITEM_KEY;
11658
11659         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11660         if (IS_ERR(tree_reloc_root))
11661                 return 0;
11662
11663         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11664                 ret = 1;
11665         btrfs_free_fs_root(tree_reloc_root);
11666         return ret;
11667 }
11668
11669 /*
11670  * Check referencer for shared block backref
11671  * If level == -1, this function will resolve the level.
11672  */
11673 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11674                                      u64 parent, u64 bytenr, int level)
11675 {
11676         struct extent_buffer *eb;
11677         u32 nr;
11678         int found_parent = 0;
11679         int i;
11680
11681         eb = read_tree_block(fs_info, parent, 0);
11682         if (!extent_buffer_uptodate(eb))
11683                 goto out;
11684
11685         if (level == -1)
11686                 level = query_tree_block_level(fs_info, bytenr);
11687         if (level < 0)
11688                 goto out;
11689
11690         /* It's possible it's a tree reloc root */
11691         if (parent == bytenr) {
11692                 if (is_tree_reloc_root(fs_info, eb))
11693                         found_parent = 1;
11694                 goto out;
11695         }
11696
11697         if (level + 1 != btrfs_header_level(eb))
11698                 goto out;
11699
11700         nr = btrfs_header_nritems(eb);
11701         for (i = 0; i < nr; i++) {
11702                 if (bytenr == btrfs_node_blockptr(eb, i)) {
11703                         found_parent = 1;
11704                         break;
11705                 }
11706         }
11707 out:
11708         free_extent_buffer(eb);
11709         if (!found_parent) {
11710                 error(
11711         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11712                         bytenr, fs_info->nodesize, parent, level);
11713                 return REFERENCER_MISSING;
11714         }
11715         return 0;
11716 }
11717
11718 /*
11719  * Check referencer for normal (inlined) data ref
11720  * If len == 0, it will be resolved by searching in extent tree
11721  */
11722 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
11723                                      u64 root_id, u64 objectid, u64 offset,
11724                                      u64 bytenr, u64 len, u32 count)
11725 {
11726         struct btrfs_root *root;
11727         struct btrfs_root *extent_root = fs_info->extent_root;
11728         struct btrfs_key key;
11729         struct btrfs_path path;
11730         struct extent_buffer *leaf;
11731         struct btrfs_file_extent_item *fi;
11732         u32 found_count = 0;
11733         int slot;
11734         int ret = 0;
11735
11736         if (!len) {
11737                 key.objectid = bytenr;
11738                 key.type = BTRFS_EXTENT_ITEM_KEY;
11739                 key.offset = (u64)-1;
11740
11741                 btrfs_init_path(&path);
11742                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11743                 if (ret < 0)
11744                         goto out;
11745                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11746                 if (ret)
11747                         goto out;
11748                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11749                 if (key.objectid != bytenr ||
11750                     key.type != BTRFS_EXTENT_ITEM_KEY)
11751                         goto out;
11752                 len = key.offset;
11753                 btrfs_release_path(&path);
11754         }
11755         key.objectid = root_id;
11756         key.type = BTRFS_ROOT_ITEM_KEY;
11757         key.offset = (u64)-1;
11758         btrfs_init_path(&path);
11759
11760         root = btrfs_read_fs_root(fs_info, &key);
11761         if (IS_ERR(root))
11762                 goto out;
11763
11764         key.objectid = objectid;
11765         key.type = BTRFS_EXTENT_DATA_KEY;
11766         /*
11767          * It can be nasty as data backref offset is
11768          * file offset - file extent offset, which is smaller or
11769          * equal to original backref offset.  The only special case is
11770          * overflow.  So we need to special check and do further search.
11771          */
11772         key.offset = offset & (1ULL << 63) ? 0 : offset;
11773
11774         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11775         if (ret < 0)
11776                 goto out;
11777
11778         /*
11779          * Search afterwards to get correct one
11780          * NOTE: As we must do a comprehensive check on the data backref to
11781          * make sure the dref count also matches, we must iterate all file
11782          * extents for that inode.
11783          */
11784         while (1) {
11785                 leaf = path.nodes[0];
11786                 slot = path.slots[0];
11787
11788                 if (slot >= btrfs_header_nritems(leaf))
11789                         goto next;
11790                 btrfs_item_key_to_cpu(leaf, &key, slot);
11791                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
11792                         break;
11793                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
11794                 /*
11795                  * Except normal disk bytenr and disk num bytes, we still
11796                  * need to do extra check on dbackref offset as
11797                  * dbackref offset = file_offset - file_extent_offset
11798                  */
11799                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
11800                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
11801                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
11802                     offset)
11803                         found_count++;
11804
11805 next:
11806                 ret = btrfs_next_item(root, &path);
11807                 if (ret)
11808                         break;
11809         }
11810 out:
11811         btrfs_release_path(&path);
11812         if (found_count != count) {
11813                 error(
11814 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
11815                         bytenr, len, root_id, objectid, offset, count, found_count);
11816                 return REFERENCER_MISSING;
11817         }
11818         return 0;
11819 }
11820
11821 /*
11822  * Check if the referencer of a shared data backref exists
11823  */
11824 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
11825                                      u64 parent, u64 bytenr)
11826 {
11827         struct extent_buffer *eb;
11828         struct btrfs_key key;
11829         struct btrfs_file_extent_item *fi;
11830         u32 nr;
11831         int found_parent = 0;
11832         int i;
11833
11834         eb = read_tree_block(fs_info, parent, 0);
11835         if (!extent_buffer_uptodate(eb))
11836                 goto out;
11837
11838         nr = btrfs_header_nritems(eb);
11839         for (i = 0; i < nr; i++) {
11840                 btrfs_item_key_to_cpu(eb, &key, i);
11841                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11842                         continue;
11843
11844                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
11845                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
11846                         continue;
11847
11848                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
11849                         found_parent = 1;
11850                         break;
11851                 }
11852         }
11853
11854 out:
11855         free_extent_buffer(eb);
11856         if (!found_parent) {
11857                 error("shared extent %llu referencer lost (parent: %llu)",
11858                         bytenr, parent);
11859                 return REFERENCER_MISSING;
11860         }
11861         return 0;
11862 }
11863
11864 /*
11865  * This function will check a given extent item, including its backref and
11866  * itself (like crossing stripe boundary and type)
11867  *
11868  * Since we don't use extent_record anymore, introduce new error bit
11869  */
11870 static int check_extent_item(struct btrfs_fs_info *fs_info,
11871                              struct extent_buffer *eb, int slot)
11872 {
11873         struct btrfs_extent_item *ei;
11874         struct btrfs_extent_inline_ref *iref;
11875         struct btrfs_extent_data_ref *dref;
11876         unsigned long end;
11877         unsigned long ptr;
11878         int type;
11879         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11880         u32 item_size = btrfs_item_size_nr(eb, slot);
11881         u64 flags;
11882         u64 offset;
11883         int metadata = 0;
11884         int level;
11885         struct btrfs_key key;
11886         int ret;
11887         int err = 0;
11888
11889         btrfs_item_key_to_cpu(eb, &key, slot);
11890         if (key.type == BTRFS_EXTENT_ITEM_KEY)
11891                 bytes_used += key.offset;
11892         else
11893                 bytes_used += nodesize;
11894
11895         if (item_size < sizeof(*ei)) {
11896                 /*
11897                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
11898                  * old thing when on disk format is still un-determined.
11899                  * No need to care about it anymore
11900                  */
11901                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
11902                 return -ENOTTY;
11903         }
11904
11905         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
11906         flags = btrfs_extent_flags(eb, ei);
11907
11908         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11909                 metadata = 1;
11910         if (metadata && check_crossing_stripes(global_info, key.objectid,
11911                                                eb->len)) {
11912                 error("bad metadata [%llu, %llu) crossing stripe boundary",
11913                       key.objectid, key.objectid + nodesize);
11914                 err |= CROSSING_STRIPE_BOUNDARY;
11915         }
11916
11917         ptr = (unsigned long)(ei + 1);
11918
11919         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11920                 /* Old EXTENT_ITEM metadata */
11921                 struct btrfs_tree_block_info *info;
11922
11923                 info = (struct btrfs_tree_block_info *)ptr;
11924                 level = btrfs_tree_block_level(eb, info);
11925                 ptr += sizeof(struct btrfs_tree_block_info);
11926         } else {
11927                 /* New METADATA_ITEM */
11928                 level = key.offset;
11929         }
11930         end = (unsigned long)ei + item_size;
11931
11932 next:
11933         /* Reached extent item end normally */
11934         if (ptr == end)
11935                 goto out;
11936
11937         /* Beyond extent item end, wrong item size */
11938         if (ptr > end) {
11939                 err |= ITEM_SIZE_MISMATCH;
11940                 error("extent item at bytenr %llu slot %d has wrong size",
11941                         eb->start, slot);
11942                 goto out;
11943         }
11944
11945         /* Now check every backref in this extent item */
11946         iref = (struct btrfs_extent_inline_ref *)ptr;
11947         type = btrfs_extent_inline_ref_type(eb, iref);
11948         offset = btrfs_extent_inline_ref_offset(eb, iref);
11949         switch (type) {
11950         case BTRFS_TREE_BLOCK_REF_KEY:
11951                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11952                                                level);
11953                 err |= ret;
11954                 break;
11955         case BTRFS_SHARED_BLOCK_REF_KEY:
11956                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11957                                                  level);
11958                 err |= ret;
11959                 break;
11960         case BTRFS_EXTENT_DATA_REF_KEY:
11961                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11962                 ret = check_extent_data_backref(fs_info,
11963                                 btrfs_extent_data_ref_root(eb, dref),
11964                                 btrfs_extent_data_ref_objectid(eb, dref),
11965                                 btrfs_extent_data_ref_offset(eb, dref),
11966                                 key.objectid, key.offset,
11967                                 btrfs_extent_data_ref_count(eb, dref));
11968                 err |= ret;
11969                 break;
11970         case BTRFS_SHARED_DATA_REF_KEY:
11971                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11972                 err |= ret;
11973                 break;
11974         default:
11975                 error("extent[%llu %d %llu] has unknown ref type: %d",
11976                         key.objectid, key.type, key.offset, type);
11977                 err |= UNKNOWN_TYPE;
11978                 goto out;
11979         }
11980
11981         ptr += btrfs_extent_inline_ref_size(type);
11982         goto next;
11983
11984 out:
11985         return err;
11986 }
11987
11988 /*
11989  * Check if a dev extent item is referred correctly by its chunk
11990  */
11991 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11992                                  struct extent_buffer *eb, int slot)
11993 {
11994         struct btrfs_root *chunk_root = fs_info->chunk_root;
11995         struct btrfs_dev_extent *ptr;
11996         struct btrfs_path path;
11997         struct btrfs_key chunk_key;
11998         struct btrfs_key devext_key;
11999         struct btrfs_chunk *chunk;
12000         struct extent_buffer *l;
12001         int num_stripes;
12002         u64 length;
12003         int i;
12004         int found_chunk = 0;
12005         int ret;
12006
12007         btrfs_item_key_to_cpu(eb, &devext_key, slot);
12008         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12009         length = btrfs_dev_extent_length(eb, ptr);
12010
12011         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12012         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12013         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12014
12015         btrfs_init_path(&path);
12016         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12017         if (ret)
12018                 goto out;
12019
12020         l = path.nodes[0];
12021         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12022         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12023                                       chunk_key.offset);
12024         if (ret < 0)
12025                 goto out;
12026
12027         if (btrfs_stripe_length(fs_info, l, chunk) != length)
12028                 goto out;
12029
12030         num_stripes = btrfs_chunk_num_stripes(l, chunk);
12031         for (i = 0; i < num_stripes; i++) {
12032                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12033                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12034
12035                 if (devid == devext_key.objectid &&
12036                     offset == devext_key.offset) {
12037                         found_chunk = 1;
12038                         break;
12039                 }
12040         }
12041 out:
12042         btrfs_release_path(&path);
12043         if (!found_chunk) {
12044                 error(
12045                 "device extent[%llu, %llu, %llu] did not find the related chunk",
12046                         devext_key.objectid, devext_key.offset, length);
12047                 return REFERENCER_MISSING;
12048         }
12049         return 0;
12050 }
12051
12052 /*
12053  * Check if the used space is correct with the dev item
12054  */
12055 static int check_dev_item(struct btrfs_fs_info *fs_info,
12056                           struct extent_buffer *eb, int slot)
12057 {
12058         struct btrfs_root *dev_root = fs_info->dev_root;
12059         struct btrfs_dev_item *dev_item;
12060         struct btrfs_path path;
12061         struct btrfs_key key;
12062         struct btrfs_dev_extent *ptr;
12063         u64 dev_id;
12064         u64 used;
12065         u64 total = 0;
12066         int ret;
12067
12068         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12069         dev_id = btrfs_device_id(eb, dev_item);
12070         used = btrfs_device_bytes_used(eb, dev_item);
12071
12072         key.objectid = dev_id;
12073         key.type = BTRFS_DEV_EXTENT_KEY;
12074         key.offset = 0;
12075
12076         btrfs_init_path(&path);
12077         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12078         if (ret < 0) {
12079                 btrfs_item_key_to_cpu(eb, &key, slot);
12080                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12081                         key.objectid, key.type, key.offset);
12082                 btrfs_release_path(&path);
12083                 return REFERENCER_MISSING;
12084         }
12085
12086         /* Iterate dev_extents to calculate the used space of a device */
12087         while (1) {
12088                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12089                         goto next;
12090
12091                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12092                 if (key.objectid > dev_id)
12093                         break;
12094                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12095                         goto next;
12096
12097                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12098                                      struct btrfs_dev_extent);
12099                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12100 next:
12101                 ret = btrfs_next_item(dev_root, &path);
12102                 if (ret)
12103                         break;
12104         }
12105         btrfs_release_path(&path);
12106
12107         if (used != total) {
12108                 btrfs_item_key_to_cpu(eb, &key, slot);
12109                 error(
12110 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12111                         total, used, BTRFS_ROOT_TREE_OBJECTID,
12112                         BTRFS_DEV_EXTENT_KEY, dev_id);
12113                 return ACCOUNTING_MISMATCH;
12114         }
12115         return 0;
12116 }
12117
12118 /*
12119  * Check a block group item with its referener (chunk) and its used space
12120  * with extent/metadata item
12121  */
12122 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12123                                   struct extent_buffer *eb, int slot)
12124 {
12125         struct btrfs_root *extent_root = fs_info->extent_root;
12126         struct btrfs_root *chunk_root = fs_info->chunk_root;
12127         struct btrfs_block_group_item *bi;
12128         struct btrfs_block_group_item bg_item;
12129         struct btrfs_path path;
12130         struct btrfs_key bg_key;
12131         struct btrfs_key chunk_key;
12132         struct btrfs_key extent_key;
12133         struct btrfs_chunk *chunk;
12134         struct extent_buffer *leaf;
12135         struct btrfs_extent_item *ei;
12136         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12137         u64 flags;
12138         u64 bg_flags;
12139         u64 used;
12140         u64 total = 0;
12141         int ret;
12142         int err = 0;
12143
12144         btrfs_item_key_to_cpu(eb, &bg_key, slot);
12145         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12146         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12147         used = btrfs_block_group_used(&bg_item);
12148         bg_flags = btrfs_block_group_flags(&bg_item);
12149
12150         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12151         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12152         chunk_key.offset = bg_key.objectid;
12153
12154         btrfs_init_path(&path);
12155         /* Search for the referencer chunk */
12156         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12157         if (ret) {
12158                 error(
12159                 "block group[%llu %llu] did not find the related chunk item",
12160                         bg_key.objectid, bg_key.offset);
12161                 err |= REFERENCER_MISSING;
12162         } else {
12163                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12164                                         struct btrfs_chunk);
12165                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12166                                                 bg_key.offset) {
12167                         error(
12168         "block group[%llu %llu] related chunk item length does not match",
12169                                 bg_key.objectid, bg_key.offset);
12170                         err |= REFERENCER_MISMATCH;
12171                 }
12172         }
12173         btrfs_release_path(&path);
12174
12175         /* Search from the block group bytenr */
12176         extent_key.objectid = bg_key.objectid;
12177         extent_key.type = 0;
12178         extent_key.offset = 0;
12179
12180         btrfs_init_path(&path);
12181         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12182         if (ret < 0)
12183                 goto out;
12184
12185         /* Iterate extent tree to account used space */
12186         while (1) {
12187                 leaf = path.nodes[0];
12188
12189                 /* Search slot can point to the last item beyond leaf nritems */
12190                 if (path.slots[0] >= btrfs_header_nritems(leaf))
12191                         goto next;
12192
12193                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12194                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12195                         break;
12196
12197                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12198                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12199                         goto next;
12200                 if (extent_key.objectid < bg_key.objectid)
12201                         goto next;
12202
12203                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12204                         total += nodesize;
12205                 else
12206                         total += extent_key.offset;
12207
12208                 ei = btrfs_item_ptr(leaf, path.slots[0],
12209                                     struct btrfs_extent_item);
12210                 flags = btrfs_extent_flags(leaf, ei);
12211                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12212                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12213                                 error(
12214                         "bad extent[%llu, %llu) type mismatch with chunk",
12215                                         extent_key.objectid,
12216                                         extent_key.objectid + extent_key.offset);
12217                                 err |= CHUNK_TYPE_MISMATCH;
12218                         }
12219                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12220                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12221                                     BTRFS_BLOCK_GROUP_METADATA))) {
12222                                 error(
12223                         "bad extent[%llu, %llu) type mismatch with chunk",
12224                                         extent_key.objectid,
12225                                         extent_key.objectid + nodesize);
12226                                 err |= CHUNK_TYPE_MISMATCH;
12227                         }
12228                 }
12229 next:
12230                 ret = btrfs_next_item(extent_root, &path);
12231                 if (ret)
12232                         break;
12233         }
12234
12235 out:
12236         btrfs_release_path(&path);
12237
12238         if (total != used) {
12239                 error(
12240                 "block group[%llu %llu] used %llu but extent items used %llu",
12241                         bg_key.objectid, bg_key.offset, used, total);
12242                 err |= ACCOUNTING_MISMATCH;
12243         }
12244         return err;
12245 }
12246
12247 /*
12248  * Check a chunk item.
12249  * Including checking all referred dev_extents and block group
12250  */
12251 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12252                             struct extent_buffer *eb, int slot)
12253 {
12254         struct btrfs_root *extent_root = fs_info->extent_root;
12255         struct btrfs_root *dev_root = fs_info->dev_root;
12256         struct btrfs_path path;
12257         struct btrfs_key chunk_key;
12258         struct btrfs_key bg_key;
12259         struct btrfs_key devext_key;
12260         struct btrfs_chunk *chunk;
12261         struct extent_buffer *leaf;
12262         struct btrfs_block_group_item *bi;
12263         struct btrfs_block_group_item bg_item;
12264         struct btrfs_dev_extent *ptr;
12265         u64 length;
12266         u64 chunk_end;
12267         u64 stripe_len;
12268         u64 type;
12269         int num_stripes;
12270         u64 offset;
12271         u64 objectid;
12272         int i;
12273         int ret;
12274         int err = 0;
12275
12276         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12277         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12278         length = btrfs_chunk_length(eb, chunk);
12279         chunk_end = chunk_key.offset + length;
12280         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12281                                       chunk_key.offset);
12282         if (ret < 0) {
12283                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12284                         chunk_end);
12285                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12286                 goto out;
12287         }
12288         type = btrfs_chunk_type(eb, chunk);
12289
12290         bg_key.objectid = chunk_key.offset;
12291         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12292         bg_key.offset = length;
12293
12294         btrfs_init_path(&path);
12295         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12296         if (ret) {
12297                 error(
12298                 "chunk[%llu %llu) did not find the related block group item",
12299                         chunk_key.offset, chunk_end);
12300                 err |= REFERENCER_MISSING;
12301         } else{
12302                 leaf = path.nodes[0];
12303                 bi = btrfs_item_ptr(leaf, path.slots[0],
12304                                     struct btrfs_block_group_item);
12305                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12306                                    sizeof(bg_item));
12307                 if (btrfs_block_group_flags(&bg_item) != type) {
12308                         error(
12309 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12310                                 chunk_key.offset, chunk_end, type,
12311                                 btrfs_block_group_flags(&bg_item));
12312                         err |= REFERENCER_MISSING;
12313                 }
12314         }
12315
12316         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12317         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12318         for (i = 0; i < num_stripes; i++) {
12319                 btrfs_release_path(&path);
12320                 btrfs_init_path(&path);
12321                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12322                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12323                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12324
12325                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12326                                         0, 0);
12327                 if (ret)
12328                         goto not_match_dev;
12329
12330                 leaf = path.nodes[0];
12331                 ptr = btrfs_item_ptr(leaf, path.slots[0],
12332                                      struct btrfs_dev_extent);
12333                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12334                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12335                 if (objectid != chunk_key.objectid ||
12336                     offset != chunk_key.offset ||
12337                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12338                         goto not_match_dev;
12339                 continue;
12340 not_match_dev:
12341                 err |= BACKREF_MISSING;
12342                 error(
12343                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12344                         chunk_key.objectid, chunk_end, i);
12345                 continue;
12346         }
12347         btrfs_release_path(&path);
12348 out:
12349         return err;
12350 }
12351
12352 /*
12353  * Main entry function to check known items and update related accounting info
12354  */
12355 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
12356 {
12357         struct btrfs_fs_info *fs_info = root->fs_info;
12358         struct btrfs_key key;
12359         int slot = 0;
12360         int type;
12361         struct btrfs_extent_data_ref *dref;
12362         int ret;
12363         int err = 0;
12364
12365 next:
12366         btrfs_item_key_to_cpu(eb, &key, slot);
12367         type = key.type;
12368
12369         switch (type) {
12370         case BTRFS_EXTENT_DATA_KEY:
12371                 ret = check_extent_data_item(root, eb, slot);
12372                 err |= ret;
12373                 break;
12374         case BTRFS_BLOCK_GROUP_ITEM_KEY:
12375                 ret = check_block_group_item(fs_info, eb, slot);
12376                 err |= ret;
12377                 break;
12378         case BTRFS_DEV_ITEM_KEY:
12379                 ret = check_dev_item(fs_info, eb, slot);
12380                 err |= ret;
12381                 break;
12382         case BTRFS_CHUNK_ITEM_KEY:
12383                 ret = check_chunk_item(fs_info, eb, slot);
12384                 err |= ret;
12385                 break;
12386         case BTRFS_DEV_EXTENT_KEY:
12387                 ret = check_dev_extent_item(fs_info, eb, slot);
12388                 err |= ret;
12389                 break;
12390         case BTRFS_EXTENT_ITEM_KEY:
12391         case BTRFS_METADATA_ITEM_KEY:
12392                 ret = check_extent_item(fs_info, eb, slot);
12393                 err |= ret;
12394                 break;
12395         case BTRFS_EXTENT_CSUM_KEY:
12396                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12397                 break;
12398         case BTRFS_TREE_BLOCK_REF_KEY:
12399                 ret = check_tree_block_backref(fs_info, key.offset,
12400                                                key.objectid, -1);
12401                 err |= ret;
12402                 break;
12403         case BTRFS_EXTENT_DATA_REF_KEY:
12404                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12405                 ret = check_extent_data_backref(fs_info,
12406                                 btrfs_extent_data_ref_root(eb, dref),
12407                                 btrfs_extent_data_ref_objectid(eb, dref),
12408                                 btrfs_extent_data_ref_offset(eb, dref),
12409                                 key.objectid, 0,
12410                                 btrfs_extent_data_ref_count(eb, dref));
12411                 err |= ret;
12412                 break;
12413         case BTRFS_SHARED_BLOCK_REF_KEY:
12414                 ret = check_shared_block_backref(fs_info, key.offset,
12415                                                  key.objectid, -1);
12416                 err |= ret;
12417                 break;
12418         case BTRFS_SHARED_DATA_REF_KEY:
12419                 ret = check_shared_data_backref(fs_info, key.offset,
12420                                                 key.objectid);
12421                 err |= ret;
12422                 break;
12423         default:
12424                 break;
12425         }
12426
12427         if (++slot < btrfs_header_nritems(eb))
12428                 goto next;
12429
12430         return err;
12431 }
12432
12433 /*
12434  * Helper function for later fs/subvol tree check.  To determine if a tree
12435  * block should be checked.
12436  * This function will ensure only the direct referencer with lowest rootid to
12437  * check a fs/subvolume tree block.
12438  *
12439  * Backref check at extent tree would detect errors like missing subvolume
12440  * tree, so we can do aggressive check to reduce duplicated checks.
12441  */
12442 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
12443 {
12444         struct btrfs_root *extent_root = root->fs_info->extent_root;
12445         struct btrfs_key key;
12446         struct btrfs_path path;
12447         struct extent_buffer *leaf;
12448         int slot;
12449         struct btrfs_extent_item *ei;
12450         unsigned long ptr;
12451         unsigned long end;
12452         int type;
12453         u32 item_size;
12454         u64 offset;
12455         struct btrfs_extent_inline_ref *iref;
12456         int ret;
12457
12458         btrfs_init_path(&path);
12459         key.objectid = btrfs_header_bytenr(eb);
12460         key.type = BTRFS_METADATA_ITEM_KEY;
12461         key.offset = (u64)-1;
12462
12463         /*
12464          * Any failure in backref resolving means we can't determine
12465          * whom the tree block belongs to.
12466          * So in that case, we need to check that tree block
12467          */
12468         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12469         if (ret < 0)
12470                 goto need_check;
12471
12472         ret = btrfs_previous_extent_item(extent_root, &path,
12473                                          btrfs_header_bytenr(eb));
12474         if (ret)
12475                 goto need_check;
12476
12477         leaf = path.nodes[0];
12478         slot = path.slots[0];
12479         btrfs_item_key_to_cpu(leaf, &key, slot);
12480         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12481
12482         if (key.type == BTRFS_METADATA_ITEM_KEY) {
12483                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12484         } else {
12485                 struct btrfs_tree_block_info *info;
12486
12487                 info = (struct btrfs_tree_block_info *)(ei + 1);
12488                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
12489         }
12490
12491         item_size = btrfs_item_size_nr(leaf, slot);
12492         ptr = (unsigned long)iref;
12493         end = (unsigned long)ei + item_size;
12494         while (ptr < end) {
12495                 iref = (struct btrfs_extent_inline_ref *)ptr;
12496                 type = btrfs_extent_inline_ref_type(leaf, iref);
12497                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
12498
12499                 /*
12500                  * We only check the tree block if current root is
12501                  * the lowest referencer of it.
12502                  */
12503                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
12504                     offset < root->objectid) {
12505                         btrfs_release_path(&path);
12506                         return 0;
12507                 }
12508
12509                 ptr += btrfs_extent_inline_ref_size(type);
12510         }
12511         /*
12512          * Normally we should also check keyed tree block ref, but that may be
12513          * very time consuming.  Inlined ref should already make us skip a lot
12514          * of refs now.  So skip search keyed tree block ref.
12515          */
12516
12517 need_check:
12518         btrfs_release_path(&path);
12519         return 1;
12520 }
12521
12522 /*
12523  * Traversal function for tree block. We will do:
12524  * 1) Skip shared fs/subvolume tree blocks
12525  * 2) Update related bytes accounting
12526  * 3) Pre-order traversal
12527  */
12528 static int traverse_tree_block(struct btrfs_root *root,
12529                                 struct extent_buffer *node)
12530 {
12531         struct extent_buffer *eb;
12532         struct btrfs_key key;
12533         struct btrfs_key drop_key;
12534         int level;
12535         u64 nr;
12536         int i;
12537         int err = 0;
12538         int ret;
12539
12540         /*
12541          * Skip shared fs/subvolume tree block, in that case they will
12542          * be checked by referencer with lowest rootid
12543          */
12544         if (is_fstree(root->objectid) && !should_check(root, node))
12545                 return 0;
12546
12547         /* Update bytes accounting */
12548         total_btree_bytes += node->len;
12549         if (fs_root_objectid(btrfs_header_owner(node)))
12550                 total_fs_tree_bytes += node->len;
12551         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
12552                 total_extent_tree_bytes += node->len;
12553
12554         /* pre-order tranversal, check itself first */
12555         level = btrfs_header_level(node);
12556         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
12557                                    btrfs_header_level(node),
12558                                    btrfs_header_owner(node));
12559         err |= ret;
12560         if (err)
12561                 error(
12562         "check %s failed root %llu bytenr %llu level %d, force continue check",
12563                         level ? "node":"leaf", root->objectid,
12564                         btrfs_header_bytenr(node), btrfs_header_level(node));
12565
12566         if (!level) {
12567                 btree_space_waste += btrfs_leaf_free_space(root, node);
12568                 ret = check_leaf_items(root, node);
12569                 err |= ret;
12570                 return err;
12571         }
12572
12573         nr = btrfs_header_nritems(node);
12574         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
12575         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
12576                 sizeof(struct btrfs_key_ptr);
12577
12578         /* Then check all its children */
12579         for (i = 0; i < nr; i++) {
12580                 u64 blocknr = btrfs_node_blockptr(node, i);
12581
12582                 btrfs_node_key_to_cpu(node, &key, i);
12583                 if (level == root->root_item.drop_level &&
12584                     is_dropped_key(&key, &drop_key))
12585                         continue;
12586
12587                 /*
12588                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
12589                  * to call the function itself.
12590                  */
12591                 eb = read_tree_block(root->fs_info, blocknr, 0);
12592                 if (extent_buffer_uptodate(eb)) {
12593                         ret = traverse_tree_block(root, eb);
12594                         err |= ret;
12595                 }
12596                 free_extent_buffer(eb);
12597         }
12598
12599         return err;
12600 }
12601
12602 /*
12603  * Low memory usage version check_chunks_and_extents.
12604  */
12605 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12606 {
12607         struct btrfs_path path;
12608         struct btrfs_key key;
12609         struct btrfs_root *root1;
12610         struct btrfs_root *root;
12611         struct btrfs_root *cur_root;
12612         int err = 0;
12613         int ret;
12614
12615         root = fs_info->fs_root;
12616
12617         root1 = root->fs_info->chunk_root;
12618         ret = traverse_tree_block(root1, root1->node);
12619         err |= ret;
12620
12621         root1 = root->fs_info->tree_root;
12622         ret = traverse_tree_block(root1, root1->node);
12623         err |= ret;
12624
12625         btrfs_init_path(&path);
12626         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12627         key.offset = 0;
12628         key.type = BTRFS_ROOT_ITEM_KEY;
12629
12630         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12631         if (ret) {
12632                 error("cannot find extent treet in tree_root");
12633                 goto out;
12634         }
12635
12636         while (1) {
12637                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12638                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12639                         goto next;
12640                 key.offset = (u64)-1;
12641
12642                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12643                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12644                                         &key);
12645                 else
12646                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
12647                 if (IS_ERR(cur_root) || !cur_root) {
12648                         error("failed to read tree: %lld", key.objectid);
12649                         goto next;
12650                 }
12651
12652                 ret = traverse_tree_block(cur_root, cur_root->node);
12653                 err |= ret;
12654
12655                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12656                         btrfs_free_fs_root(cur_root);
12657 next:
12658                 ret = btrfs_next_item(root1, &path);
12659                 if (ret)
12660                         goto out;
12661         }
12662
12663 out:
12664         btrfs_release_path(&path);
12665         return err;
12666 }
12667
12668 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
12669 {
12670         int ret;
12671
12672         if (!ctx.progress_enabled)
12673                 fprintf(stderr, "checking extents\n");
12674         if (check_mode == CHECK_MODE_LOWMEM)
12675                 ret = check_chunks_and_extents_v2(fs_info);
12676         else
12677                 ret = check_chunks_and_extents(fs_info);
12678
12679         return ret;
12680 }
12681
12682 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
12683                            struct btrfs_root *root, int overwrite)
12684 {
12685         struct extent_buffer *c;
12686         struct extent_buffer *old = root->node;
12687         int level;
12688         int ret;
12689         struct btrfs_disk_key disk_key = {0,0,0};
12690
12691         level = 0;
12692
12693         if (overwrite) {
12694                 c = old;
12695                 extent_buffer_get(c);
12696                 goto init;
12697         }
12698         c = btrfs_alloc_free_block(trans, root,
12699                                    root->fs_info->nodesize,
12700                                    root->root_key.objectid,
12701                                    &disk_key, level, 0, 0);
12702         if (IS_ERR(c)) {
12703                 c = old;
12704                 extent_buffer_get(c);
12705                 overwrite = 1;
12706         }
12707 init:
12708         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
12709         btrfs_set_header_level(c, level);
12710         btrfs_set_header_bytenr(c, c->start);
12711         btrfs_set_header_generation(c, trans->transid);
12712         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
12713         btrfs_set_header_owner(c, root->root_key.objectid);
12714
12715         write_extent_buffer(c, root->fs_info->fsid,
12716                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
12717
12718         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
12719                             btrfs_header_chunk_tree_uuid(c),
12720                             BTRFS_UUID_SIZE);
12721
12722         btrfs_mark_buffer_dirty(c);
12723         /*
12724          * this case can happen in the following case:
12725          *
12726          * 1.overwrite previous root.
12727          *
12728          * 2.reinit reloc data root, this is because we skip pin
12729          * down reloc data tree before which means we can allocate
12730          * same block bytenr here.
12731          */
12732         if (old->start == c->start) {
12733                 btrfs_set_root_generation(&root->root_item,
12734                                           trans->transid);
12735                 root->root_item.level = btrfs_header_level(root->node);
12736                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
12737                                         &root->root_key, &root->root_item);
12738                 if (ret) {
12739                         free_extent_buffer(c);
12740                         return ret;
12741                 }
12742         }
12743         free_extent_buffer(old);
12744         root->node = c;
12745         add_root_to_dirty_list(root);
12746         return 0;
12747 }
12748
12749 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
12750                                 struct extent_buffer *eb, int tree_root)
12751 {
12752         struct extent_buffer *tmp;
12753         struct btrfs_root_item *ri;
12754         struct btrfs_key key;
12755         u64 bytenr;
12756         int level = btrfs_header_level(eb);
12757         int nritems;
12758         int ret;
12759         int i;
12760
12761         /*
12762          * If we have pinned this block before, don't pin it again.
12763          * This can not only avoid forever loop with broken filesystem
12764          * but also give us some speedups.
12765          */
12766         if (test_range_bit(&fs_info->pinned_extents, eb->start,
12767                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
12768                 return 0;
12769
12770         btrfs_pin_extent(fs_info, eb->start, eb->len);
12771
12772         nritems = btrfs_header_nritems(eb);
12773         for (i = 0; i < nritems; i++) {
12774                 if (level == 0) {
12775                         btrfs_item_key_to_cpu(eb, &key, i);
12776                         if (key.type != BTRFS_ROOT_ITEM_KEY)
12777                                 continue;
12778                         /* Skip the extent root and reloc roots */
12779                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
12780                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
12781                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
12782                                 continue;
12783                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
12784                         bytenr = btrfs_disk_root_bytenr(eb, ri);
12785
12786                         /*
12787                          * If at any point we start needing the real root we
12788                          * will have to build a stump root for the root we are
12789                          * in, but for now this doesn't actually use the root so
12790                          * just pass in extent_root.
12791                          */
12792                         tmp = read_tree_block(fs_info, bytenr, 0);
12793                         if (!extent_buffer_uptodate(tmp)) {
12794                                 fprintf(stderr, "Error reading root block\n");
12795                                 return -EIO;
12796                         }
12797                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
12798                         free_extent_buffer(tmp);
12799                         if (ret)
12800                                 return ret;
12801                 } else {
12802                         bytenr = btrfs_node_blockptr(eb, i);
12803
12804                         /* If we aren't the tree root don't read the block */
12805                         if (level == 1 && !tree_root) {
12806                                 btrfs_pin_extent(fs_info, bytenr,
12807                                                 fs_info->nodesize);
12808                                 continue;
12809                         }
12810
12811                         tmp = read_tree_block(fs_info, bytenr, 0);
12812                         if (!extent_buffer_uptodate(tmp)) {
12813                                 fprintf(stderr, "Error reading tree block\n");
12814                                 return -EIO;
12815                         }
12816                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
12817                         free_extent_buffer(tmp);
12818                         if (ret)
12819                                 return ret;
12820                 }
12821         }
12822
12823         return 0;
12824 }
12825
12826 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
12827 {
12828         int ret;
12829
12830         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
12831         if (ret)
12832                 return ret;
12833
12834         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
12835 }
12836
12837 static int reset_block_groups(struct btrfs_fs_info *fs_info)
12838 {
12839         struct btrfs_block_group_cache *cache;
12840         struct btrfs_path path;
12841         struct extent_buffer *leaf;
12842         struct btrfs_chunk *chunk;
12843         struct btrfs_key key;
12844         int ret;
12845         u64 start;
12846
12847         btrfs_init_path(&path);
12848         key.objectid = 0;
12849         key.type = BTRFS_CHUNK_ITEM_KEY;
12850         key.offset = 0;
12851         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
12852         if (ret < 0) {
12853                 btrfs_release_path(&path);
12854                 return ret;
12855         }
12856
12857         /*
12858          * We do this in case the block groups were screwed up and had alloc
12859          * bits that aren't actually set on the chunks.  This happens with
12860          * restored images every time and could happen in real life I guess.
12861          */
12862         fs_info->avail_data_alloc_bits = 0;
12863         fs_info->avail_metadata_alloc_bits = 0;
12864         fs_info->avail_system_alloc_bits = 0;
12865
12866         /* First we need to create the in-memory block groups */
12867         while (1) {
12868                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12869                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
12870                         if (ret < 0) {
12871                                 btrfs_release_path(&path);
12872                                 return ret;
12873                         }
12874                         if (ret) {
12875                                 ret = 0;
12876                                 break;
12877                         }
12878                 }
12879                 leaf = path.nodes[0];
12880                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12881                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
12882                         path.slots[0]++;
12883                         continue;
12884                 }
12885
12886                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
12887                 btrfs_add_block_group(fs_info, 0,
12888                                       btrfs_chunk_type(leaf, chunk),
12889                                       key.objectid, key.offset,
12890                                       btrfs_chunk_length(leaf, chunk));
12891                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
12892                                  key.offset + btrfs_chunk_length(leaf, chunk));
12893                 path.slots[0]++;
12894         }
12895         start = 0;
12896         while (1) {
12897                 cache = btrfs_lookup_first_block_group(fs_info, start);
12898                 if (!cache)
12899                         break;
12900                 cache->cached = 1;
12901                 start = cache->key.objectid + cache->key.offset;
12902         }
12903
12904         btrfs_release_path(&path);
12905         return 0;
12906 }
12907
12908 static int reset_balance(struct btrfs_trans_handle *trans,
12909                          struct btrfs_fs_info *fs_info)
12910 {
12911         struct btrfs_root *root = fs_info->tree_root;
12912         struct btrfs_path path;
12913         struct extent_buffer *leaf;
12914         struct btrfs_key key;
12915         int del_slot, del_nr = 0;
12916         int ret;
12917         int found = 0;
12918
12919         btrfs_init_path(&path);
12920         key.objectid = BTRFS_BALANCE_OBJECTID;
12921         key.type = BTRFS_BALANCE_ITEM_KEY;
12922         key.offset = 0;
12923         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12924         if (ret) {
12925                 if (ret > 0)
12926                         ret = 0;
12927                 if (!ret)
12928                         goto reinit_data_reloc;
12929                 else
12930                         goto out;
12931         }
12932
12933         ret = btrfs_del_item(trans, root, &path);
12934         if (ret)
12935                 goto out;
12936         btrfs_release_path(&path);
12937
12938         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12939         key.type = BTRFS_ROOT_ITEM_KEY;
12940         key.offset = 0;
12941         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12942         if (ret < 0)
12943                 goto out;
12944         while (1) {
12945                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12946                         if (!found)
12947                                 break;
12948
12949                         if (del_nr) {
12950                                 ret = btrfs_del_items(trans, root, &path,
12951                                                       del_slot, del_nr);
12952                                 del_nr = 0;
12953                                 if (ret)
12954                                         goto out;
12955                         }
12956                         key.offset++;
12957                         btrfs_release_path(&path);
12958
12959                         found = 0;
12960                         ret = btrfs_search_slot(trans, root, &key, &path,
12961                                                 -1, 1);
12962                         if (ret < 0)
12963                                 goto out;
12964                         continue;
12965                 }
12966                 found = 1;
12967                 leaf = path.nodes[0];
12968                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12969                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12970                         break;
12971                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12972                         path.slots[0]++;
12973                         continue;
12974                 }
12975                 if (!del_nr) {
12976                         del_slot = path.slots[0];
12977                         del_nr = 1;
12978                 } else {
12979                         del_nr++;
12980                 }
12981                 path.slots[0]++;
12982         }
12983
12984         if (del_nr) {
12985                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12986                 if (ret)
12987                         goto out;
12988         }
12989         btrfs_release_path(&path);
12990
12991 reinit_data_reloc:
12992         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12993         key.type = BTRFS_ROOT_ITEM_KEY;
12994         key.offset = (u64)-1;
12995         root = btrfs_read_fs_root(fs_info, &key);
12996         if (IS_ERR(root)) {
12997                 fprintf(stderr, "Error reading data reloc tree\n");
12998                 ret = PTR_ERR(root);
12999                 goto out;
13000         }
13001         record_root_in_trans(trans, root);
13002         ret = btrfs_fsck_reinit_root(trans, root, 0);
13003         if (ret)
13004                 goto out;
13005         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13006 out:
13007         btrfs_release_path(&path);
13008         return ret;
13009 }
13010
13011 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13012                               struct btrfs_fs_info *fs_info)
13013 {
13014         u64 start = 0;
13015         int ret;
13016
13017         /*
13018          * The only reason we don't do this is because right now we're just
13019          * walking the trees we find and pinning down their bytes, we don't look
13020          * at any of the leaves.  In order to do mixed groups we'd have to check
13021          * the leaves of any fs roots and pin down the bytes for any file
13022          * extents we find.  Not hard but why do it if we don't have to?
13023          */
13024         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13025                 fprintf(stderr, "We don't support re-initing the extent tree "
13026                         "for mixed block groups yet, please notify a btrfs "
13027                         "developer you want to do this so they can add this "
13028                         "functionality.\n");
13029                 return -EINVAL;
13030         }
13031
13032         /*
13033          * first we need to walk all of the trees except the extent tree and pin
13034          * down the bytes that are in use so we don't overwrite any existing
13035          * metadata.
13036          */
13037         ret = pin_metadata_blocks(fs_info);
13038         if (ret) {
13039                 fprintf(stderr, "error pinning down used bytes\n");
13040                 return ret;
13041         }
13042
13043         /*
13044          * Need to drop all the block groups since we're going to recreate all
13045          * of them again.
13046          */
13047         btrfs_free_block_groups(fs_info);
13048         ret = reset_block_groups(fs_info);
13049         if (ret) {
13050                 fprintf(stderr, "error resetting the block groups\n");
13051                 return ret;
13052         }
13053
13054         /* Ok we can allocate now, reinit the extent root */
13055         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13056         if (ret) {
13057                 fprintf(stderr, "extent root initialization failed\n");
13058                 /*
13059                  * When the transaction code is updated we should end the
13060                  * transaction, but for now progs only knows about commit so
13061                  * just return an error.
13062                  */
13063                 return ret;
13064         }
13065
13066         /*
13067          * Now we have all the in-memory block groups setup so we can make
13068          * allocations properly, and the metadata we care about is safe since we
13069          * pinned all of it above.
13070          */
13071         while (1) {
13072                 struct btrfs_block_group_cache *cache;
13073
13074                 cache = btrfs_lookup_first_block_group(fs_info, start);
13075                 if (!cache)
13076                         break;
13077                 start = cache->key.objectid + cache->key.offset;
13078                 ret = btrfs_insert_item(trans, fs_info->extent_root,
13079                                         &cache->key, &cache->item,
13080                                         sizeof(cache->item));
13081                 if (ret) {
13082                         fprintf(stderr, "Error adding block group\n");
13083                         return ret;
13084                 }
13085                 btrfs_extent_post_op(trans, fs_info->extent_root);
13086         }
13087
13088         ret = reset_balance(trans, fs_info);
13089         if (ret)
13090                 fprintf(stderr, "error resetting the pending balance\n");
13091
13092         return ret;
13093 }
13094
13095 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13096 {
13097         struct btrfs_path path;
13098         struct btrfs_trans_handle *trans;
13099         struct btrfs_key key;
13100         int ret;
13101
13102         printf("Recowing metadata block %llu\n", eb->start);
13103         key.objectid = btrfs_header_owner(eb);
13104         key.type = BTRFS_ROOT_ITEM_KEY;
13105         key.offset = (u64)-1;
13106
13107         root = btrfs_read_fs_root(root->fs_info, &key);
13108         if (IS_ERR(root)) {
13109                 fprintf(stderr, "Couldn't find owner root %llu\n",
13110                         key.objectid);
13111                 return PTR_ERR(root);
13112         }
13113
13114         trans = btrfs_start_transaction(root, 1);
13115         if (IS_ERR(trans))
13116                 return PTR_ERR(trans);
13117
13118         btrfs_init_path(&path);
13119         path.lowest_level = btrfs_header_level(eb);
13120         if (path.lowest_level)
13121                 btrfs_node_key_to_cpu(eb, &key, 0);
13122         else
13123                 btrfs_item_key_to_cpu(eb, &key, 0);
13124
13125         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13126         btrfs_commit_transaction(trans, root);
13127         btrfs_release_path(&path);
13128         return ret;
13129 }
13130
13131 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13132 {
13133         struct btrfs_path path;
13134         struct btrfs_trans_handle *trans;
13135         struct btrfs_key key;
13136         int ret;
13137
13138         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13139                bad->key.type, bad->key.offset);
13140         key.objectid = bad->root_id;
13141         key.type = BTRFS_ROOT_ITEM_KEY;
13142         key.offset = (u64)-1;
13143
13144         root = btrfs_read_fs_root(root->fs_info, &key);
13145         if (IS_ERR(root)) {
13146                 fprintf(stderr, "Couldn't find owner root %llu\n",
13147                         key.objectid);
13148                 return PTR_ERR(root);
13149         }
13150
13151         trans = btrfs_start_transaction(root, 1);
13152         if (IS_ERR(trans))
13153                 return PTR_ERR(trans);
13154
13155         btrfs_init_path(&path);
13156         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13157         if (ret) {
13158                 if (ret > 0)
13159                         ret = 0;
13160                 goto out;
13161         }
13162         ret = btrfs_del_item(trans, root, &path);
13163 out:
13164         btrfs_commit_transaction(trans, root);
13165         btrfs_release_path(&path);
13166         return ret;
13167 }
13168
13169 static int zero_log_tree(struct btrfs_root *root)
13170 {
13171         struct btrfs_trans_handle *trans;
13172         int ret;
13173
13174         trans = btrfs_start_transaction(root, 1);
13175         if (IS_ERR(trans)) {
13176                 ret = PTR_ERR(trans);
13177                 return ret;
13178         }
13179         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13180         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13181         ret = btrfs_commit_transaction(trans, root);
13182         return ret;
13183 }
13184
13185 static int populate_csum(struct btrfs_trans_handle *trans,
13186                          struct btrfs_root *csum_root, char *buf, u64 start,
13187                          u64 len)
13188 {
13189         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13190         u64 offset = 0;
13191         u64 sectorsize;
13192         int ret = 0;
13193
13194         while (offset < len) {
13195                 sectorsize = fs_info->sectorsize;
13196                 ret = read_extent_data(fs_info, buf, start + offset,
13197                                        &sectorsize, 0);
13198                 if (ret)
13199                         break;
13200                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13201                                             start + offset, buf, sectorsize);
13202                 if (ret)
13203                         break;
13204                 offset += sectorsize;
13205         }
13206         return ret;
13207 }
13208
13209 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13210                                       struct btrfs_root *csum_root,
13211                                       struct btrfs_root *cur_root)
13212 {
13213         struct btrfs_path path;
13214         struct btrfs_key key;
13215         struct extent_buffer *node;
13216         struct btrfs_file_extent_item *fi;
13217         char *buf = NULL;
13218         u64 start = 0;
13219         u64 len = 0;
13220         int slot = 0;
13221         int ret = 0;
13222
13223         buf = malloc(cur_root->fs_info->sectorsize);
13224         if (!buf)
13225                 return -ENOMEM;
13226
13227         btrfs_init_path(&path);
13228         key.objectid = 0;
13229         key.offset = 0;
13230         key.type = 0;
13231         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13232         if (ret < 0)
13233                 goto out;
13234         /* Iterate all regular file extents and fill its csum */
13235         while (1) {
13236                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13237
13238                 if (key.type != BTRFS_EXTENT_DATA_KEY)
13239                         goto next;
13240                 node = path.nodes[0];
13241                 slot = path.slots[0];
13242                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13243                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13244                         goto next;
13245                 start = btrfs_file_extent_disk_bytenr(node, fi);
13246                 len = btrfs_file_extent_disk_num_bytes(node, fi);
13247
13248                 ret = populate_csum(trans, csum_root, buf, start, len);
13249                 if (ret == -EEXIST)
13250                         ret = 0;
13251                 if (ret < 0)
13252                         goto out;
13253 next:
13254                 /*
13255                  * TODO: if next leaf is corrupted, jump to nearest next valid
13256                  * leaf.
13257                  */
13258                 ret = btrfs_next_item(cur_root, &path);
13259                 if (ret < 0)
13260                         goto out;
13261                 if (ret > 0) {
13262                         ret = 0;
13263                         goto out;
13264                 }
13265         }
13266
13267 out:
13268         btrfs_release_path(&path);
13269         free(buf);
13270         return ret;
13271 }
13272
13273 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13274                                   struct btrfs_root *csum_root)
13275 {
13276         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13277         struct btrfs_path path;
13278         struct btrfs_root *tree_root = fs_info->tree_root;
13279         struct btrfs_root *cur_root;
13280         struct extent_buffer *node;
13281         struct btrfs_key key;
13282         int slot = 0;
13283         int ret = 0;
13284
13285         btrfs_init_path(&path);
13286         key.objectid = BTRFS_FS_TREE_OBJECTID;
13287         key.offset = 0;
13288         key.type = BTRFS_ROOT_ITEM_KEY;
13289         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13290         if (ret < 0)
13291                 goto out;
13292         if (ret > 0) {
13293                 ret = -ENOENT;
13294                 goto out;
13295         }
13296
13297         while (1) {
13298                 node = path.nodes[0];
13299                 slot = path.slots[0];
13300                 btrfs_item_key_to_cpu(node, &key, slot);
13301                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13302                         goto out;
13303                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13304                         goto next;
13305                 if (!is_fstree(key.objectid))
13306                         goto next;
13307                 key.offset = (u64)-1;
13308
13309                 cur_root = btrfs_read_fs_root(fs_info, &key);
13310                 if (IS_ERR(cur_root) || !cur_root) {
13311                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13312                                 key.objectid);
13313                         goto out;
13314                 }
13315                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13316                                 cur_root);
13317                 if (ret < 0)
13318                         goto out;
13319 next:
13320                 ret = btrfs_next_item(tree_root, &path);
13321                 if (ret > 0) {
13322                         ret = 0;
13323                         goto out;
13324                 }
13325                 if (ret < 0)
13326                         goto out;
13327         }
13328
13329 out:
13330         btrfs_release_path(&path);
13331         return ret;
13332 }
13333
13334 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13335                                       struct btrfs_root *csum_root)
13336 {
13337         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13338         struct btrfs_path path;
13339         struct btrfs_extent_item *ei;
13340         struct extent_buffer *leaf;
13341         char *buf;
13342         struct btrfs_key key;
13343         int ret;
13344
13345         btrfs_init_path(&path);
13346         key.objectid = 0;
13347         key.type = BTRFS_EXTENT_ITEM_KEY;
13348         key.offset = 0;
13349         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13350         if (ret < 0) {
13351                 btrfs_release_path(&path);
13352                 return ret;
13353         }
13354
13355         buf = malloc(csum_root->fs_info->sectorsize);
13356         if (!buf) {
13357                 btrfs_release_path(&path);
13358                 return -ENOMEM;
13359         }
13360
13361         while (1) {
13362                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13363                         ret = btrfs_next_leaf(extent_root, &path);
13364                         if (ret < 0)
13365                                 break;
13366                         if (ret) {
13367                                 ret = 0;
13368                                 break;
13369                         }
13370                 }
13371                 leaf = path.nodes[0];
13372
13373                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13374                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13375                         path.slots[0]++;
13376                         continue;
13377                 }
13378
13379                 ei = btrfs_item_ptr(leaf, path.slots[0],
13380                                     struct btrfs_extent_item);
13381                 if (!(btrfs_extent_flags(leaf, ei) &
13382                       BTRFS_EXTENT_FLAG_DATA)) {
13383                         path.slots[0]++;
13384                         continue;
13385                 }
13386
13387                 ret = populate_csum(trans, csum_root, buf, key.objectid,
13388                                     key.offset);
13389                 if (ret)
13390                         break;
13391                 path.slots[0]++;
13392         }
13393
13394         btrfs_release_path(&path);
13395         free(buf);
13396         return ret;
13397 }
13398
13399 /*
13400  * Recalculate the csum and put it into the csum tree.
13401  *
13402  * Extent tree init will wipe out all the extent info, so in that case, we
13403  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
13404  * will use fs/subvol trees to init the csum tree.
13405  */
13406 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13407                           struct btrfs_root *csum_root,
13408                           int search_fs_tree)
13409 {
13410         if (search_fs_tree)
13411                 return fill_csum_tree_from_fs(trans, csum_root);
13412         else
13413                 return fill_csum_tree_from_extent(trans, csum_root);
13414 }
13415
13416 static void free_roots_info_cache(void)
13417 {
13418         if (!roots_info_cache)
13419                 return;
13420
13421         while (!cache_tree_empty(roots_info_cache)) {
13422                 struct cache_extent *entry;
13423                 struct root_item_info *rii;
13424
13425                 entry = first_cache_extent(roots_info_cache);
13426                 if (!entry)
13427                         break;
13428                 remove_cache_extent(roots_info_cache, entry);
13429                 rii = container_of(entry, struct root_item_info, cache_extent);
13430                 free(rii);
13431         }
13432
13433         free(roots_info_cache);
13434         roots_info_cache = NULL;
13435 }
13436
13437 static int build_roots_info_cache(struct btrfs_fs_info *info)
13438 {
13439         int ret = 0;
13440         struct btrfs_key key;
13441         struct extent_buffer *leaf;
13442         struct btrfs_path path;
13443
13444         if (!roots_info_cache) {
13445                 roots_info_cache = malloc(sizeof(*roots_info_cache));
13446                 if (!roots_info_cache)
13447                         return -ENOMEM;
13448                 cache_tree_init(roots_info_cache);
13449         }
13450
13451         btrfs_init_path(&path);
13452         key.objectid = 0;
13453         key.type = BTRFS_EXTENT_ITEM_KEY;
13454         key.offset = 0;
13455         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13456         if (ret < 0)
13457                 goto out;
13458         leaf = path.nodes[0];
13459
13460         while (1) {
13461                 struct btrfs_key found_key;
13462                 struct btrfs_extent_item *ei;
13463                 struct btrfs_extent_inline_ref *iref;
13464                 int slot = path.slots[0];
13465                 int type;
13466                 u64 flags;
13467                 u64 root_id;
13468                 u8 level;
13469                 struct cache_extent *entry;
13470                 struct root_item_info *rii;
13471
13472                 if (slot >= btrfs_header_nritems(leaf)) {
13473                         ret = btrfs_next_leaf(info->extent_root, &path);
13474                         if (ret < 0) {
13475                                 break;
13476                         } else if (ret) {
13477                                 ret = 0;
13478                                 break;
13479                         }
13480                         leaf = path.nodes[0];
13481                         slot = path.slots[0];
13482                 }
13483
13484                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13485
13486                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13487                     found_key.type != BTRFS_METADATA_ITEM_KEY)
13488                         goto next;
13489
13490                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13491                 flags = btrfs_extent_flags(leaf, ei);
13492
13493                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13494                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13495                         goto next;
13496
13497                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13498                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13499                         level = found_key.offset;
13500                 } else {
13501                         struct btrfs_tree_block_info *binfo;
13502
13503                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
13504                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13505                         level = btrfs_tree_block_level(leaf, binfo);
13506                 }
13507
13508                 /*
13509                  * For a root extent, it must be of the following type and the
13510                  * first (and only one) iref in the item.
13511                  */
13512                 type = btrfs_extent_inline_ref_type(leaf, iref);
13513                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13514                         goto next;
13515
13516                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13517                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13518                 if (!entry) {
13519                         rii = malloc(sizeof(struct root_item_info));
13520                         if (!rii) {
13521                                 ret = -ENOMEM;
13522                                 goto out;
13523                         }
13524                         rii->cache_extent.start = root_id;
13525                         rii->cache_extent.size = 1;
13526                         rii->level = (u8)-1;
13527                         entry = &rii->cache_extent;
13528                         ret = insert_cache_extent(roots_info_cache, entry);
13529                         ASSERT(ret == 0);
13530                 } else {
13531                         rii = container_of(entry, struct root_item_info,
13532                                            cache_extent);
13533                 }
13534
13535                 ASSERT(rii->cache_extent.start == root_id);
13536                 ASSERT(rii->cache_extent.size == 1);
13537
13538                 if (level > rii->level || rii->level == (u8)-1) {
13539                         rii->level = level;
13540                         rii->bytenr = found_key.objectid;
13541                         rii->gen = btrfs_extent_generation(leaf, ei);
13542                         rii->node_count = 1;
13543                 } else if (level == rii->level) {
13544                         rii->node_count++;
13545                 }
13546 next:
13547                 path.slots[0]++;
13548         }
13549
13550 out:
13551         btrfs_release_path(&path);
13552
13553         return ret;
13554 }
13555
13556 static int maybe_repair_root_item(struct btrfs_path *path,
13557                                   const struct btrfs_key *root_key,
13558                                   const int read_only_mode)
13559 {
13560         const u64 root_id = root_key->objectid;
13561         struct cache_extent *entry;
13562         struct root_item_info *rii;
13563         struct btrfs_root_item ri;
13564         unsigned long offset;
13565
13566         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13567         if (!entry) {
13568                 fprintf(stderr,
13569                         "Error: could not find extent items for root %llu\n",
13570                         root_key->objectid);
13571                 return -ENOENT;
13572         }
13573
13574         rii = container_of(entry, struct root_item_info, cache_extent);
13575         ASSERT(rii->cache_extent.start == root_id);
13576         ASSERT(rii->cache_extent.size == 1);
13577
13578         if (rii->node_count != 1) {
13579                 fprintf(stderr,
13580                         "Error: could not find btree root extent for root %llu\n",
13581                         root_id);
13582                 return -ENOENT;
13583         }
13584
13585         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13586         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13587
13588         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13589             btrfs_root_level(&ri) != rii->level ||
13590             btrfs_root_generation(&ri) != rii->gen) {
13591
13592                 /*
13593                  * If we're in repair mode but our caller told us to not update
13594                  * the root item, i.e. just check if it needs to be updated, don't
13595                  * print this message, since the caller will call us again shortly
13596                  * for the same root item without read only mode (the caller will
13597                  * open a transaction first).
13598                  */
13599                 if (!(read_only_mode && repair))
13600                         fprintf(stderr,
13601                                 "%sroot item for root %llu,"
13602                                 " current bytenr %llu, current gen %llu, current level %u,"
13603                                 " new bytenr %llu, new gen %llu, new level %u\n",
13604                                 (read_only_mode ? "" : "fixing "),
13605                                 root_id,
13606                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13607                                 btrfs_root_level(&ri),
13608                                 rii->bytenr, rii->gen, rii->level);
13609
13610                 if (btrfs_root_generation(&ri) > rii->gen) {
13611                         fprintf(stderr,
13612                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13613                                 root_id, btrfs_root_generation(&ri), rii->gen);
13614                         return -EINVAL;
13615                 }
13616
13617                 if (!read_only_mode) {
13618                         btrfs_set_root_bytenr(&ri, rii->bytenr);
13619                         btrfs_set_root_level(&ri, rii->level);
13620                         btrfs_set_root_generation(&ri, rii->gen);
13621                         write_extent_buffer(path->nodes[0], &ri,
13622                                             offset, sizeof(ri));
13623                 }
13624
13625                 return 1;
13626         }
13627
13628         return 0;
13629 }
13630
13631 /*
13632  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13633  * caused read-only snapshots to be corrupted if they were created at a moment
13634  * when the source subvolume/snapshot had orphan items. The issue was that the
13635  * on-disk root items became incorrect, referring to the pre orphan cleanup root
13636  * node instead of the post orphan cleanup root node.
13637  * So this function, and its callees, just detects and fixes those cases. Even
13638  * though the regression was for read-only snapshots, this function applies to
13639  * any snapshot/subvolume root.
13640  * This must be run before any other repair code - not doing it so, makes other
13641  * repair code delete or modify backrefs in the extent tree for example, which
13642  * will result in an inconsistent fs after repairing the root items.
13643  */
13644 static int repair_root_items(struct btrfs_fs_info *info)
13645 {
13646         struct btrfs_path path;
13647         struct btrfs_key key;
13648         struct extent_buffer *leaf;
13649         struct btrfs_trans_handle *trans = NULL;
13650         int ret = 0;
13651         int bad_roots = 0;
13652         int need_trans = 0;
13653
13654         btrfs_init_path(&path);
13655
13656         ret = build_roots_info_cache(info);
13657         if (ret)
13658                 goto out;
13659
13660         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
13661         key.type = BTRFS_ROOT_ITEM_KEY;
13662         key.offset = 0;
13663
13664 again:
13665         /*
13666          * Avoid opening and committing transactions if a leaf doesn't have
13667          * any root items that need to be fixed, so that we avoid rotating
13668          * backup roots unnecessarily.
13669          */
13670         if (need_trans) {
13671                 trans = btrfs_start_transaction(info->tree_root, 1);
13672                 if (IS_ERR(trans)) {
13673                         ret = PTR_ERR(trans);
13674                         goto out;
13675                 }
13676         }
13677
13678         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
13679                                 0, trans ? 1 : 0);
13680         if (ret < 0)
13681                 goto out;
13682         leaf = path.nodes[0];
13683
13684         while (1) {
13685                 struct btrfs_key found_key;
13686
13687                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
13688                         int no_more_keys = find_next_key(&path, &key);
13689
13690                         btrfs_release_path(&path);
13691                         if (trans) {
13692                                 ret = btrfs_commit_transaction(trans,
13693                                                                info->tree_root);
13694                                 trans = NULL;
13695                                 if (ret < 0)
13696                                         goto out;
13697                         }
13698                         need_trans = 0;
13699                         if (no_more_keys)
13700                                 break;
13701                         goto again;
13702                 }
13703
13704                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13705
13706                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
13707                         goto next;
13708                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13709                         goto next;
13710
13711                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
13712                 if (ret < 0)
13713                         goto out;
13714                 if (ret) {
13715                         if (!trans && repair) {
13716                                 need_trans = 1;
13717                                 key = found_key;
13718                                 btrfs_release_path(&path);
13719                                 goto again;
13720                         }
13721                         bad_roots++;
13722                 }
13723 next:
13724                 path.slots[0]++;
13725         }
13726         ret = 0;
13727 out:
13728         free_roots_info_cache();
13729         btrfs_release_path(&path);
13730         if (trans)
13731                 btrfs_commit_transaction(trans, info->tree_root);
13732         if (ret < 0)
13733                 return ret;
13734
13735         return bad_roots;
13736 }
13737
13738 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
13739 {
13740         struct btrfs_trans_handle *trans;
13741         struct btrfs_block_group_cache *bg_cache;
13742         u64 current = 0;
13743         int ret = 0;
13744
13745         /* Clear all free space cache inodes and its extent data */
13746         while (1) {
13747                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
13748                 if (!bg_cache)
13749                         break;
13750                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
13751                 if (ret < 0)
13752                         return ret;
13753                 current = bg_cache->key.objectid + bg_cache->key.offset;
13754         }
13755
13756         /* Don't forget to set cache_generation to -1 */
13757         trans = btrfs_start_transaction(fs_info->tree_root, 0);
13758         if (IS_ERR(trans)) {
13759                 error("failed to update super block cache generation");
13760                 return PTR_ERR(trans);
13761         }
13762         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
13763         btrfs_commit_transaction(trans, fs_info->tree_root);
13764
13765         return ret;
13766 }
13767
13768 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
13769                 int clear_version)
13770 {
13771         int ret = 0;
13772
13773         if (clear_version == 1) {
13774                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13775                         error(
13776                 "free space cache v2 detected, use --clear-space-cache v2");
13777                         ret = 1;
13778                         goto close_out;
13779                 }
13780                 printf("Clearing free space cache\n");
13781                 ret = clear_free_space_cache(fs_info);
13782                 if (ret) {
13783                         error("failed to clear free space cache");
13784                         ret = 1;
13785                 } else {
13786                         printf("Free space cache cleared\n");
13787                 }
13788         } else if (clear_version == 2) {
13789                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
13790                         printf("no free space cache v2 to clear\n");
13791                         ret = 0;
13792                         goto close_out;
13793                 }
13794                 printf("Clear free space cache v2\n");
13795                 ret = btrfs_clear_free_space_tree(fs_info);
13796                 if (ret) {
13797                         error("failed to clear free space cache v2: %d", ret);
13798                         ret = 1;
13799                 } else {
13800                         printf("free space cache v2 cleared\n");
13801                 }
13802         }
13803 close_out:
13804         return ret;
13805 }
13806
13807 const char * const cmd_check_usage[] = {
13808         "btrfs check [options] <device>",
13809         "Check structural integrity of a filesystem (unmounted).",
13810         "Check structural integrity of an unmounted filesystem. Verify internal",
13811         "trees' consistency and item connectivity. In the repair mode try to",
13812         "fix the problems found. ",
13813         "WARNING: the repair mode is considered dangerous",
13814         "",
13815         "-s|--super <superblock>     use this superblock copy",
13816         "-b|--backup                 use the first valid backup root copy",
13817         "--force                     skip mount checks, repair is not possible",
13818         "--repair                    try to repair the filesystem",
13819         "--readonly                  run in read-only mode (default)",
13820         "--init-csum-tree            create a new CRC tree",
13821         "--init-extent-tree          create a new extent tree",
13822         "--mode <MODE>               allows choice of memory/IO trade-offs",
13823         "                            where MODE is one of:",
13824         "                            original - read inodes and extents to memory (requires",
13825         "                                       more memory, does less IO)",
13826         "                            lowmem   - try to use less memory but read blocks again",
13827         "                                       when needed",
13828         "--check-data-csum           verify checksums of data blocks",
13829         "-Q|--qgroup-report          print a report on qgroup consistency",
13830         "-E|--subvol-extents <subvolid>",
13831         "                            print subvolume extents and sharing state",
13832         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
13833         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
13834         "-p|--progress               indicate progress",
13835         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
13836         NULL
13837 };
13838
13839 int cmd_check(int argc, char **argv)
13840 {
13841         struct cache_tree root_cache;
13842         struct btrfs_root *root;
13843         struct btrfs_fs_info *info;
13844         u64 bytenr = 0;
13845         u64 subvolid = 0;
13846         u64 tree_root_bytenr = 0;
13847         u64 chunk_root_bytenr = 0;
13848         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
13849         int ret = 0;
13850         int err = 0;
13851         u64 num;
13852         int init_csum_tree = 0;
13853         int readonly = 0;
13854         int clear_space_cache = 0;
13855         int qgroup_report = 0;
13856         int qgroups_repaired = 0;
13857         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
13858         int force = 0;
13859
13860         while(1) {
13861                 int c;
13862                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
13863                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
13864                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
13865                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
13866                         GETOPT_VAL_FORCE };
13867                 static const struct option long_options[] = {
13868                         { "super", required_argument, NULL, 's' },
13869                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
13870                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
13871                         { "init-csum-tree", no_argument, NULL,
13872                                 GETOPT_VAL_INIT_CSUM },
13873                         { "init-extent-tree", no_argument, NULL,
13874                                 GETOPT_VAL_INIT_EXTENT },
13875                         { "check-data-csum", no_argument, NULL,
13876                                 GETOPT_VAL_CHECK_CSUM },
13877                         { "backup", no_argument, NULL, 'b' },
13878                         { "subvol-extents", required_argument, NULL, 'E' },
13879                         { "qgroup-report", no_argument, NULL, 'Q' },
13880                         { "tree-root", required_argument, NULL, 'r' },
13881                         { "chunk-root", required_argument, NULL,
13882                                 GETOPT_VAL_CHUNK_TREE },
13883                         { "progress", no_argument, NULL, 'p' },
13884                         { "mode", required_argument, NULL,
13885                                 GETOPT_VAL_MODE },
13886                         { "clear-space-cache", required_argument, NULL,
13887                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
13888                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
13889                         { NULL, 0, NULL, 0}
13890                 };
13891
13892                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
13893                 if (c < 0)
13894                         break;
13895                 switch(c) {
13896                         case 'a': /* ignored */ break;
13897                         case 'b':
13898                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
13899                                 break;
13900                         case 's':
13901                                 num = arg_strtou64(optarg);
13902                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
13903                                         error(
13904                                         "super mirror should be less than %d",
13905                                                 BTRFS_SUPER_MIRROR_MAX);
13906                                         exit(1);
13907                                 }
13908                                 bytenr = btrfs_sb_offset(((int)num));
13909                                 printf("using SB copy %llu, bytenr %llu\n", num,
13910                                        (unsigned long long)bytenr);
13911                                 break;
13912                         case 'Q':
13913                                 qgroup_report = 1;
13914                                 break;
13915                         case 'E':
13916                                 subvolid = arg_strtou64(optarg);
13917                                 break;
13918                         case 'r':
13919                                 tree_root_bytenr = arg_strtou64(optarg);
13920                                 break;
13921                         case GETOPT_VAL_CHUNK_TREE:
13922                                 chunk_root_bytenr = arg_strtou64(optarg);
13923                                 break;
13924                         case 'p':
13925                                 ctx.progress_enabled = true;
13926                                 break;
13927                         case '?':
13928                         case 'h':
13929                                 usage(cmd_check_usage);
13930                         case GETOPT_VAL_REPAIR:
13931                                 printf("enabling repair mode\n");
13932                                 repair = 1;
13933                                 ctree_flags |= OPEN_CTREE_WRITES;
13934                                 break;
13935                         case GETOPT_VAL_READONLY:
13936                                 readonly = 1;
13937                                 break;
13938                         case GETOPT_VAL_INIT_CSUM:
13939                                 printf("Creating a new CRC tree\n");
13940                                 init_csum_tree = 1;
13941                                 repair = 1;
13942                                 ctree_flags |= OPEN_CTREE_WRITES;
13943                                 break;
13944                         case GETOPT_VAL_INIT_EXTENT:
13945                                 init_extent_tree = 1;
13946                                 ctree_flags |= (OPEN_CTREE_WRITES |
13947                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
13948                                 repair = 1;
13949                                 break;
13950                         case GETOPT_VAL_CHECK_CSUM:
13951                                 check_data_csum = 1;
13952                                 break;
13953                         case GETOPT_VAL_MODE:
13954                                 check_mode = parse_check_mode(optarg);
13955                                 if (check_mode == CHECK_MODE_UNKNOWN) {
13956                                         error("unknown mode: %s", optarg);
13957                                         exit(1);
13958                                 }
13959                                 break;
13960                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
13961                                 if (strcmp(optarg, "v1") == 0) {
13962                                         clear_space_cache = 1;
13963                                 } else if (strcmp(optarg, "v2") == 0) {
13964                                         clear_space_cache = 2;
13965                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13966                                 } else {
13967                                         error(
13968                 "invalid argument to --clear-space-cache, must be v1 or v2");
13969                                         exit(1);
13970                                 }
13971                                 ctree_flags |= OPEN_CTREE_WRITES;
13972                                 break;
13973                         case GETOPT_VAL_FORCE:
13974                                 force = 1;
13975                                 break;
13976                 }
13977         }
13978
13979         if (check_argc_exact(argc - optind, 1))
13980                 usage(cmd_check_usage);
13981
13982         if (ctx.progress_enabled) {
13983                 ctx.tp = TASK_NOTHING;
13984                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13985         }
13986
13987         /* This check is the only reason for --readonly to exist */
13988         if (readonly && repair) {
13989                 error("repair options are not compatible with --readonly");
13990                 exit(1);
13991         }
13992
13993         /*
13994          * experimental and dangerous
13995          */
13996         if (repair && check_mode == CHECK_MODE_LOWMEM)
13997                 warning("low-memory mode repair support is only partial");
13998
13999         radix_tree_init();
14000         cache_tree_init(&root_cache);
14001
14002         ret = check_mounted(argv[optind]);
14003         if (!force) {
14004                 if (ret < 0) {
14005                         error("could not check mount status: %s",
14006                                         strerror(-ret));
14007                         err |= !!ret;
14008                         goto err_out;
14009                 } else if (ret) {
14010                         error(
14011 "%s is currently mounted, use --force if you really intend to check the filesystem",
14012                                 argv[optind]);
14013                         ret = -EBUSY;
14014                         err |= !!ret;
14015                         goto err_out;
14016                 }
14017         } else {
14018                 if (repair) {
14019                         error("repair and --force is not yet supported");
14020                         ret = 1;
14021                         err |= !!ret;
14022                         goto err_out;
14023                 }
14024                 if (ret < 0) {
14025                         warning(
14026 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14027                                 argv[optind]);
14028                 } else if (ret) {
14029                         warning(
14030                         "filesystem mounted, continuing because of --force");
14031                 }
14032                 /* A block device is mounted in exclusive mode by kernel */
14033                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14034         }
14035
14036         /* only allow partial opening under repair mode */
14037         if (repair)
14038                 ctree_flags |= OPEN_CTREE_PARTIAL;
14039
14040         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14041                                   chunk_root_bytenr, ctree_flags);
14042         if (!info) {
14043                 error("cannot open file system");
14044                 ret = -EIO;
14045                 err |= !!ret;
14046                 goto err_out;
14047         }
14048
14049         global_info = info;
14050         root = info->fs_root;
14051         uuid_unparse(info->super_copy->fsid, uuidbuf);
14052
14053         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14054
14055         /*
14056          * Check the bare minimum before starting anything else that could rely
14057          * on it, namely the tree roots, any local consistency checks
14058          */
14059         if (!extent_buffer_uptodate(info->tree_root->node) ||
14060             !extent_buffer_uptodate(info->dev_root->node) ||
14061             !extent_buffer_uptodate(info->chunk_root->node)) {
14062                 error("critical roots corrupted, unable to check the filesystem");
14063                 err |= !!ret;
14064                 ret = -EIO;
14065                 goto close_out;
14066         }
14067
14068         if (clear_space_cache) {
14069                 ret = do_clear_free_space_cache(info, clear_space_cache);
14070                 err |= !!ret;
14071                 goto close_out;
14072         }
14073
14074         /*
14075          * repair mode will force us to commit transaction which
14076          * will make us fail to load log tree when mounting.
14077          */
14078         if (repair && btrfs_super_log_root(info->super_copy)) {
14079                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14080                 if (!ret) {
14081                         ret = 1;
14082                         err |= !!ret;
14083                         goto close_out;
14084                 }
14085                 ret = zero_log_tree(root);
14086                 err |= !!ret;
14087                 if (ret) {
14088                         error("failed to zero log tree: %d", ret);
14089                         goto close_out;
14090                 }
14091         }
14092
14093         if (qgroup_report) {
14094                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14095                        uuidbuf);
14096                 ret = qgroup_verify_all(info);
14097                 err |= !!ret;
14098                 if (ret == 0)
14099                         report_qgroups(1);
14100                 goto close_out;
14101         }
14102         if (subvolid) {
14103                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14104                        subvolid, argv[optind], uuidbuf);
14105                 ret = print_extent_state(info, subvolid);
14106                 err |= !!ret;
14107                 goto close_out;
14108         }
14109
14110         if (init_extent_tree || init_csum_tree) {
14111                 struct btrfs_trans_handle *trans;
14112
14113                 trans = btrfs_start_transaction(info->extent_root, 0);
14114                 if (IS_ERR(trans)) {
14115                         error("error starting transaction");
14116                         ret = PTR_ERR(trans);
14117                         err |= !!ret;
14118                         goto close_out;
14119                 }
14120
14121                 if (init_extent_tree) {
14122                         printf("Creating a new extent tree\n");
14123                         ret = reinit_extent_tree(trans, info);
14124                         err |= !!ret;
14125                         if (ret)
14126                                 goto close_out;
14127                 }
14128
14129                 if (init_csum_tree) {
14130                         printf("Reinitialize checksum tree\n");
14131                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14132                         if (ret) {
14133                                 error("checksum tree initialization failed: %d",
14134                                                 ret);
14135                                 ret = -EIO;
14136                                 err |= !!ret;
14137                                 goto close_out;
14138                         }
14139
14140                         ret = fill_csum_tree(trans, info->csum_root,
14141                                              init_extent_tree);
14142                         err |= !!ret;
14143                         if (ret) {
14144                                 error("checksum tree refilling failed: %d", ret);
14145                                 return -EIO;
14146                         }
14147                 }
14148                 /*
14149                  * Ok now we commit and run the normal fsck, which will add
14150                  * extent entries for all of the items it finds.
14151                  */
14152                 ret = btrfs_commit_transaction(trans, info->extent_root);
14153                 err |= !!ret;
14154                 if (ret)
14155                         goto close_out;
14156         }
14157         if (!extent_buffer_uptodate(info->extent_root->node)) {
14158                 error("critical: extent_root, unable to check the filesystem");
14159                 ret = -EIO;
14160                 err |= !!ret;
14161                 goto close_out;
14162         }
14163         if (!extent_buffer_uptodate(info->csum_root->node)) {
14164                 error("critical: csum_root, unable to check the filesystem");
14165                 ret = -EIO;
14166                 err |= !!ret;
14167                 goto close_out;
14168         }
14169
14170         ret = do_check_chunks_and_extents(info);
14171         err |= !!ret;
14172         if (ret)
14173                 error(
14174                 "errors found in extent allocation tree or chunk allocation");
14175
14176         ret = repair_root_items(info);
14177         err |= !!ret;
14178         if (ret < 0) {
14179                 error("failed to repair root items: %s", strerror(-ret));
14180                 goto close_out;
14181         }
14182         if (repair) {
14183                 fprintf(stderr, "Fixed %d roots.\n", ret);
14184                 ret = 0;
14185         } else if (ret > 0) {
14186                 fprintf(stderr,
14187                        "Found %d roots with an outdated root item.\n",
14188                        ret);
14189                 fprintf(stderr,
14190                         "Please run a filesystem check with the option --repair to fix them.\n");
14191                 ret = 1;
14192                 err |= !!ret;
14193                 goto close_out;
14194         }
14195
14196         if (!ctx.progress_enabled) {
14197                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14198                         fprintf(stderr, "checking free space tree\n");
14199                 else
14200                         fprintf(stderr, "checking free space cache\n");
14201         }
14202         ret = check_space_cache(root);
14203         err |= !!ret;
14204         if (ret) {
14205                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14206                         error("errors found in free space tree");
14207                 else
14208                         error("errors found in free space cache");
14209                 goto out;
14210         }
14211
14212         /*
14213          * We used to have to have these hole extents in between our real
14214          * extents so if we don't have this flag set we need to make sure there
14215          * are no gaps in the file extents for inodes, otherwise we can just
14216          * ignore it when this happens.
14217          */
14218         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14219         ret = do_check_fs_roots(info, &root_cache);
14220         err |= !!ret;
14221         if (ret) {
14222                 error("errors found in fs roots");
14223                 goto out;
14224         }
14225
14226         fprintf(stderr, "checking csums\n");
14227         ret = check_csums(root);
14228         err |= !!ret;
14229         if (ret) {
14230                 error("errors found in csum tree");
14231                 goto out;
14232         }
14233
14234         fprintf(stderr, "checking root refs\n");
14235         /* For low memory mode, check_fs_roots_v2 handles root refs */
14236         if (check_mode != CHECK_MODE_LOWMEM) {
14237                 ret = check_root_refs(root, &root_cache);
14238                 err |= !!ret;
14239                 if (ret) {
14240                         error("errors found in root refs");
14241                         goto out;
14242                 }
14243         }
14244
14245         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14246                 struct extent_buffer *eb;
14247
14248                 eb = list_first_entry(&root->fs_info->recow_ebs,
14249                                       struct extent_buffer, recow);
14250                 list_del_init(&eb->recow);
14251                 ret = recow_extent_buffer(root, eb);
14252                 err |= !!ret;
14253                 if (ret) {
14254                         error("fails to fix transid errors");
14255                         break;
14256                 }
14257         }
14258
14259         while (!list_empty(&delete_items)) {
14260                 struct bad_item *bad;
14261
14262                 bad = list_first_entry(&delete_items, struct bad_item, list);
14263                 list_del_init(&bad->list);
14264                 if (repair) {
14265                         ret = delete_bad_item(root, bad);
14266                         err |= !!ret;
14267                 }
14268                 free(bad);
14269         }
14270
14271         if (info->quota_enabled) {
14272                 fprintf(stderr, "checking quota groups\n");
14273                 ret = qgroup_verify_all(info);
14274                 err |= !!ret;
14275                 if (ret) {
14276                         error("failed to check quota groups");
14277                         goto out;
14278                 }
14279                 report_qgroups(0);
14280                 ret = repair_qgroups(info, &qgroups_repaired);
14281                 err |= !!ret;
14282                 if (err) {
14283                         error("failed to repair quota groups");
14284                         goto out;
14285                 }
14286                 ret = 0;
14287         }
14288
14289         if (!list_empty(&root->fs_info->recow_ebs)) {
14290                 error("transid errors in file system");
14291                 ret = 1;
14292                 err |= !!ret;
14293         }
14294 out:
14295         printf("found %llu bytes used, ",
14296                (unsigned long long)bytes_used);
14297         if (err)
14298                 printf("error(s) found\n");
14299         else
14300                 printf("no error found\n");
14301         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14302         printf("total tree bytes: %llu\n",
14303                (unsigned long long)total_btree_bytes);
14304         printf("total fs tree bytes: %llu\n",
14305                (unsigned long long)total_fs_tree_bytes);
14306         printf("total extent tree bytes: %llu\n",
14307                (unsigned long long)total_extent_tree_bytes);
14308         printf("btree space waste bytes: %llu\n",
14309                (unsigned long long)btree_space_waste);
14310         printf("file data blocks allocated: %llu\n referenced %llu\n",
14311                 (unsigned long long)data_bytes_allocated,
14312                 (unsigned long long)data_bytes_referenced);
14313
14314         free_qgroup_counts();
14315         free_root_recs_tree(&root_cache);
14316 close_out:
14317         close_ctree(root);
14318 err_out:
14319         if (ctx.progress_enabled)
14320                 task_deinit(ctx.info);
14321
14322         return err;
14323 }