btrfs-progs: check: fix the return value bug of cmd_check()
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "ulist.h"
44 #include "hash.h"
45
46 enum task_position {
47         TASK_EXTENTS,
48         TASK_FREE_SPACE,
49         TASK_FS_ROOTS,
50         TASK_NOTHING, /* have to be the last element */
51 };
52
53 struct task_ctx {
54         int progress_enabled;
55         enum task_position tp;
56
57         struct task_info *info;
58 };
59
60 static u64 bytes_used = 0;
61 static u64 total_csum_bytes = 0;
62 static u64 total_btree_bytes = 0;
63 static u64 total_fs_tree_bytes = 0;
64 static u64 total_extent_tree_bytes = 0;
65 static u64 btree_space_waste = 0;
66 static u64 data_bytes_allocated = 0;
67 static u64 data_bytes_referenced = 0;
68 static int found_old_backref = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct list_head list;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 {
98         return list_entry(entry, struct extent_backref, list);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 {
118         return container_of(back, struct data_backref, node);
119 }
120
121 /*
122  * Much like data_backref, just removed the undetermined members
123  * and change it to use list_head.
124  * During extent scan, it is stored in root->orphan_data_extent.
125  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
126  */
127 struct orphan_data_extent {
128         struct list_head list;
129         u64 root;
130         u64 objectid;
131         u64 offset;
132         u64 disk_bytenr;
133         u64 disk_len;
134 };
135
136 struct tree_backref {
137         struct extent_backref node;
138         union {
139                 u64 parent;
140                 u64 root;
141         };
142 };
143
144 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
145 {
146         return container_of(back, struct tree_backref, node);
147 }
148
149 /* Explicit initialization for extent_record::flag_block_full_backref */
150 enum { FLAG_UNSET = 2 };
151
152 struct extent_record {
153         struct list_head backrefs;
154         struct list_head dups;
155         struct list_head list;
156         struct cache_extent cache;
157         struct btrfs_disk_key parent_key;
158         u64 start;
159         u64 max_size;
160         u64 nr;
161         u64 refs;
162         u64 extent_item_refs;
163         u64 generation;
164         u64 parent_generation;
165         u64 info_objectid;
166         u32 num_duplicates;
167         u8 info_level;
168         unsigned int flag_block_full_backref:2;
169         unsigned int found_rec:1;
170         unsigned int content_checked:1;
171         unsigned int owner_ref_checked:1;
172         unsigned int is_root:1;
173         unsigned int metadata:1;
174         unsigned int bad_full_backref:1;
175         unsigned int crossing_stripes:1;
176         unsigned int wrong_chunk_type:1;
177 };
178
179 static inline struct extent_record* to_extent_record(struct list_head *entry)
180 {
181         return container_of(entry, struct extent_record, list);
182 }
183
184 struct inode_backref {
185         struct list_head list;
186         unsigned int found_dir_item:1;
187         unsigned int found_dir_index:1;
188         unsigned int found_inode_ref:1;
189         u8 filetype;
190         u8 ref_type;
191         int errors;
192         u64 dir;
193         u64 index;
194         u16 namelen;
195         char name[0];
196 };
197
198 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
199 {
200         return list_entry(entry, struct inode_backref, list);
201 }
202
203 struct root_item_record {
204         struct list_head list;
205         u64 objectid;
206         u64 bytenr;
207         u64 last_snapshot;
208         u8 level;
209         u8 drop_level;
210         int level_size;
211         struct btrfs_key drop_key;
212 };
213
214 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
215 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
216 #define REF_ERR_NO_INODE_REF            (1 << 2)
217 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
218 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
219 #define REF_ERR_DUP_INODE_REF           (1 << 5)
220 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
221 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
222 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
223 #define REF_ERR_NO_ROOT_REF             (1 << 9)
224 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
225 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
226 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
227
228 struct file_extent_hole {
229         struct rb_node node;
230         u64 start;
231         u64 len;
232 };
233
234 struct inode_record {
235         struct list_head backrefs;
236         unsigned int checked:1;
237         unsigned int merging:1;
238         unsigned int found_inode_item:1;
239         unsigned int found_dir_item:1;
240         unsigned int found_file_extent:1;
241         unsigned int found_csum_item:1;
242         unsigned int some_csum_missing:1;
243         unsigned int nodatasum:1;
244         int errors;
245
246         u64 ino;
247         u32 nlink;
248         u32 imode;
249         u64 isize;
250         u64 nbytes;
251
252         u32 found_link;
253         u64 found_size;
254         u64 extent_start;
255         u64 extent_end;
256         struct rb_root holes;
257         struct list_head orphan_extents;
258
259         u32 refs;
260 };
261
262 #define I_ERR_NO_INODE_ITEM             (1 << 0)
263 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
264 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
265 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
266 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
267 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
268 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
269 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
270 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
271 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
272 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
273 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
274 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
275 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
276 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
277
278 struct root_backref {
279         struct list_head list;
280         unsigned int found_dir_item:1;
281         unsigned int found_dir_index:1;
282         unsigned int found_back_ref:1;
283         unsigned int found_forward_ref:1;
284         unsigned int reachable:1;
285         int errors;
286         u64 ref_root;
287         u64 dir;
288         u64 index;
289         u16 namelen;
290         char name[0];
291 };
292
293 static inline struct root_backref* to_root_backref(struct list_head *entry)
294 {
295         return list_entry(entry, struct root_backref, list);
296 }
297
298 struct root_record {
299         struct list_head backrefs;
300         struct cache_extent cache;
301         unsigned int found_root_item:1;
302         u64 objectid;
303         u32 found_ref;
304 };
305
306 struct ptr_node {
307         struct cache_extent cache;
308         void *data;
309 };
310
311 struct shared_node {
312         struct cache_extent cache;
313         struct cache_tree root_cache;
314         struct cache_tree inode_cache;
315         struct inode_record *current;
316         u32 refs;
317 };
318
319 struct block_info {
320         u64 start;
321         u32 size;
322 };
323
324 struct walk_control {
325         struct cache_tree shared;
326         struct shared_node *nodes[BTRFS_MAX_LEVEL];
327         int active_node;
328         int root_level;
329 };
330
331 struct bad_item {
332         struct btrfs_key key;
333         u64 root_id;
334         struct list_head list;
335 };
336
337 struct extent_entry {
338         u64 bytenr;
339         u64 bytes;
340         int count;
341         int broken;
342         struct list_head list;
343 };
344
345 struct root_item_info {
346         /* level of the root */
347         u8 level;
348         /* number of nodes at this level, must be 1 for a root */
349         int node_count;
350         u64 bytenr;
351         u64 gen;
352         struct cache_extent cache_extent;
353 };
354
355 /*
356  * Error bit for low memory mode check.
357  *
358  * Currently no caller cares about it yet.  Just internal use for error
359  * classification.
360  */
361 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
362 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
363 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
364 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
365 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
366 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
367 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
368 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
369 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
370 #define CHUNK_TYPE_MISMATCH     (1 << 8)
371
372 static void *print_status_check(void *p)
373 {
374         struct task_ctx *priv = p;
375         const char work_indicator[] = { '.', 'o', 'O', 'o' };
376         uint32_t count = 0;
377         static char *task_position_string[] = {
378                 "checking extents",
379                 "checking free space cache",
380                 "checking fs roots",
381         };
382
383         task_period_start(priv->info, 1000 /* 1s */);
384
385         if (priv->tp == TASK_NOTHING)
386                 return NULL;
387
388         while (1) {
389                 printf("%s [%c]\r", task_position_string[priv->tp],
390                                 work_indicator[count % 4]);
391                 count++;
392                 fflush(stdout);
393                 task_period_wait(priv->info);
394         }
395         return NULL;
396 }
397
398 static int print_status_return(void *p)
399 {
400         printf("\n");
401         fflush(stdout);
402
403         return 0;
404 }
405
406 static enum btrfs_check_mode parse_check_mode(const char *str)
407 {
408         if (strcmp(str, "lowmem") == 0)
409                 return CHECK_MODE_LOWMEM;
410         if (strcmp(str, "orig") == 0)
411                 return CHECK_MODE_ORIGINAL;
412         if (strcmp(str, "original") == 0)
413                 return CHECK_MODE_ORIGINAL;
414
415         return CHECK_MODE_UNKNOWN;
416 }
417
418 /* Compatible function to allow reuse of old codes */
419 static u64 first_extent_gap(struct rb_root *holes)
420 {
421         struct file_extent_hole *hole;
422
423         if (RB_EMPTY_ROOT(holes))
424                 return (u64)-1;
425
426         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
427         return hole->start;
428 }
429
430 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
431 {
432         struct file_extent_hole *hole1;
433         struct file_extent_hole *hole2;
434
435         hole1 = rb_entry(node1, struct file_extent_hole, node);
436         hole2 = rb_entry(node2, struct file_extent_hole, node);
437
438         if (hole1->start > hole2->start)
439                 return -1;
440         if (hole1->start < hole2->start)
441                 return 1;
442         /* Now hole1->start == hole2->start */
443         if (hole1->len >= hole2->len)
444                 /*
445                  * Hole 1 will be merge center
446                  * Same hole will be merged later
447                  */
448                 return -1;
449         /* Hole 2 will be merge center */
450         return 1;
451 }
452
453 /*
454  * Add a hole to the record
455  *
456  * This will do hole merge for copy_file_extent_holes(),
457  * which will ensure there won't be continuous holes.
458  */
459 static int add_file_extent_hole(struct rb_root *holes,
460                                 u64 start, u64 len)
461 {
462         struct file_extent_hole *hole;
463         struct file_extent_hole *prev = NULL;
464         struct file_extent_hole *next = NULL;
465
466         hole = malloc(sizeof(*hole));
467         if (!hole)
468                 return -ENOMEM;
469         hole->start = start;
470         hole->len = len;
471         /* Since compare will not return 0, no -EEXIST will happen */
472         rb_insert(holes, &hole->node, compare_hole);
473
474         /* simple merge with previous hole */
475         if (rb_prev(&hole->node))
476                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
477                                 node);
478         if (prev && prev->start + prev->len >= hole->start) {
479                 hole->len = hole->start + hole->len - prev->start;
480                 hole->start = prev->start;
481                 rb_erase(&prev->node, holes);
482                 free(prev);
483                 prev = NULL;
484         }
485
486         /* iterate merge with next holes */
487         while (1) {
488                 if (!rb_next(&hole->node))
489                         break;
490                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
491                                         node);
492                 if (hole->start + hole->len >= next->start) {
493                         if (hole->start + hole->len <= next->start + next->len)
494                                 hole->len = next->start + next->len -
495                                             hole->start;
496                         rb_erase(&next->node, holes);
497                         free(next);
498                         next = NULL;
499                 } else
500                         break;
501         }
502         return 0;
503 }
504
505 static int compare_hole_range(struct rb_node *node, void *data)
506 {
507         struct file_extent_hole *hole;
508         u64 start;
509
510         hole = (struct file_extent_hole *)data;
511         start = hole->start;
512
513         hole = rb_entry(node, struct file_extent_hole, node);
514         if (start < hole->start)
515                 return -1;
516         if (start >= hole->start && start < hole->start + hole->len)
517                 return 0;
518         return 1;
519 }
520
521 /*
522  * Delete a hole in the record
523  *
524  * This will do the hole split and is much restrict than add.
525  */
526 static int del_file_extent_hole(struct rb_root *holes,
527                                 u64 start, u64 len)
528 {
529         struct file_extent_hole *hole;
530         struct file_extent_hole tmp;
531         u64 prev_start = 0;
532         u64 prev_len = 0;
533         u64 next_start = 0;
534         u64 next_len = 0;
535         struct rb_node *node;
536         int have_prev = 0;
537         int have_next = 0;
538         int ret = 0;
539
540         tmp.start = start;
541         tmp.len = len;
542         node = rb_search(holes, &tmp, compare_hole_range, NULL);
543         if (!node)
544                 return -EEXIST;
545         hole = rb_entry(node, struct file_extent_hole, node);
546         if (start + len > hole->start + hole->len)
547                 return -EEXIST;
548
549         /*
550          * Now there will be no overlap, delete the hole and re-add the
551          * split(s) if they exists.
552          */
553         if (start > hole->start) {
554                 prev_start = hole->start;
555                 prev_len = start - hole->start;
556                 have_prev = 1;
557         }
558         if (hole->start + hole->len > start + len) {
559                 next_start = start + len;
560                 next_len = hole->start + hole->len - start - len;
561                 have_next = 1;
562         }
563         rb_erase(node, holes);
564         free(hole);
565         if (have_prev) {
566                 ret = add_file_extent_hole(holes, prev_start, prev_len);
567                 if (ret < 0)
568                         return ret;
569         }
570         if (have_next) {
571                 ret = add_file_extent_hole(holes, next_start, next_len);
572                 if (ret < 0)
573                         return ret;
574         }
575         return 0;
576 }
577
578 static int copy_file_extent_holes(struct rb_root *dst,
579                                   struct rb_root *src)
580 {
581         struct file_extent_hole *hole;
582         struct rb_node *node;
583         int ret = 0;
584
585         node = rb_first(src);
586         while (node) {
587                 hole = rb_entry(node, struct file_extent_hole, node);
588                 ret = add_file_extent_hole(dst, hole->start, hole->len);
589                 if (ret)
590                         break;
591                 node = rb_next(node);
592         }
593         return ret;
594 }
595
596 static void free_file_extent_holes(struct rb_root *holes)
597 {
598         struct rb_node *node;
599         struct file_extent_hole *hole;
600
601         node = rb_first(holes);
602         while (node) {
603                 hole = rb_entry(node, struct file_extent_hole, node);
604                 rb_erase(node, holes);
605                 free(hole);
606                 node = rb_first(holes);
607         }
608 }
609
610 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
611
612 static void record_root_in_trans(struct btrfs_trans_handle *trans,
613                                  struct btrfs_root *root)
614 {
615         if (root->last_trans != trans->transid) {
616                 root->track_dirty = 1;
617                 root->last_trans = trans->transid;
618                 root->commit_root = root->node;
619                 extent_buffer_get(root->node);
620         }
621 }
622
623 static u8 imode_to_type(u32 imode)
624 {
625 #define S_SHIFT 12
626         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
627                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
628                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
629                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
630                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
631                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
632                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
633                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
634         };
635
636         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
637 #undef S_SHIFT
638 }
639
640 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
641 {
642         struct device_record *rec1;
643         struct device_record *rec2;
644
645         rec1 = rb_entry(node1, struct device_record, node);
646         rec2 = rb_entry(node2, struct device_record, node);
647         if (rec1->devid > rec2->devid)
648                 return -1;
649         else if (rec1->devid < rec2->devid)
650                 return 1;
651         else
652                 return 0;
653 }
654
655 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
656 {
657         struct inode_record *rec;
658         struct inode_backref *backref;
659         struct inode_backref *orig;
660         struct inode_backref *tmp;
661         struct orphan_data_extent *src_orphan;
662         struct orphan_data_extent *dst_orphan;
663         struct rb_node *rb;
664         size_t size;
665         int ret;
666
667         rec = malloc(sizeof(*rec));
668         if (!rec)
669                 return ERR_PTR(-ENOMEM);
670         memcpy(rec, orig_rec, sizeof(*rec));
671         rec->refs = 1;
672         INIT_LIST_HEAD(&rec->backrefs);
673         INIT_LIST_HEAD(&rec->orphan_extents);
674         rec->holes = RB_ROOT;
675
676         list_for_each_entry(orig, &orig_rec->backrefs, list) {
677                 size = sizeof(*orig) + orig->namelen + 1;
678                 backref = malloc(size);
679                 if (!backref) {
680                         ret = -ENOMEM;
681                         goto cleanup;
682                 }
683                 memcpy(backref, orig, size);
684                 list_add_tail(&backref->list, &rec->backrefs);
685         }
686         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
687                 dst_orphan = malloc(sizeof(*dst_orphan));
688                 if (!dst_orphan) {
689                         ret = -ENOMEM;
690                         goto cleanup;
691                 }
692                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
693                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
694         }
695         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
696         if (ret < 0)
697                 goto cleanup_rb;
698
699         return rec;
700
701 cleanup_rb:
702         rb = rb_first(&rec->holes);
703         while (rb) {
704                 struct file_extent_hole *hole;
705
706                 hole = rb_entry(rb, struct file_extent_hole, node);
707                 rb = rb_next(rb);
708                 free(hole);
709         }
710
711 cleanup:
712         if (!list_empty(&rec->backrefs))
713                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
714                         list_del(&orig->list);
715                         free(orig);
716                 }
717
718         if (!list_empty(&rec->orphan_extents))
719                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
720                         list_del(&orig->list);
721                         free(orig);
722                 }
723
724         free(rec);
725
726         return ERR_PTR(ret);
727 }
728
729 static void print_orphan_data_extents(struct list_head *orphan_extents,
730                                       u64 objectid)
731 {
732         struct orphan_data_extent *orphan;
733
734         if (list_empty(orphan_extents))
735                 return;
736         printf("The following data extent is lost in tree %llu:\n",
737                objectid);
738         list_for_each_entry(orphan, orphan_extents, list) {
739                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
740                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
741                        orphan->disk_len);
742         }
743 }
744
745 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
746 {
747         u64 root_objectid = root->root_key.objectid;
748         int errors = rec->errors;
749
750         if (!errors)
751                 return;
752         /* reloc root errors, we print its corresponding fs root objectid*/
753         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
754                 root_objectid = root->root_key.offset;
755                 fprintf(stderr, "reloc");
756         }
757         fprintf(stderr, "root %llu inode %llu errors %x",
758                 (unsigned long long) root_objectid,
759                 (unsigned long long) rec->ino, rec->errors);
760
761         if (errors & I_ERR_NO_INODE_ITEM)
762                 fprintf(stderr, ", no inode item");
763         if (errors & I_ERR_NO_ORPHAN_ITEM)
764                 fprintf(stderr, ", no orphan item");
765         if (errors & I_ERR_DUP_INODE_ITEM)
766                 fprintf(stderr, ", dup inode item");
767         if (errors & I_ERR_DUP_DIR_INDEX)
768                 fprintf(stderr, ", dup dir index");
769         if (errors & I_ERR_ODD_DIR_ITEM)
770                 fprintf(stderr, ", odd dir item");
771         if (errors & I_ERR_ODD_FILE_EXTENT)
772                 fprintf(stderr, ", odd file extent");
773         if (errors & I_ERR_BAD_FILE_EXTENT)
774                 fprintf(stderr, ", bad file extent");
775         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
776                 fprintf(stderr, ", file extent overlap");
777         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
778                 fprintf(stderr, ", file extent discount");
779         if (errors & I_ERR_DIR_ISIZE_WRONG)
780                 fprintf(stderr, ", dir isize wrong");
781         if (errors & I_ERR_FILE_NBYTES_WRONG)
782                 fprintf(stderr, ", nbytes wrong");
783         if (errors & I_ERR_ODD_CSUM_ITEM)
784                 fprintf(stderr, ", odd csum item");
785         if (errors & I_ERR_SOME_CSUM_MISSING)
786                 fprintf(stderr, ", some csum missing");
787         if (errors & I_ERR_LINK_COUNT_WRONG)
788                 fprintf(stderr, ", link count wrong");
789         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
790                 fprintf(stderr, ", orphan file extent");
791         fprintf(stderr, "\n");
792         /* Print the orphan extents if needed */
793         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
794                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
795
796         /* Print the holes if needed */
797         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
798                 struct file_extent_hole *hole;
799                 struct rb_node *node;
800                 int found = 0;
801
802                 node = rb_first(&rec->holes);
803                 fprintf(stderr, "Found file extent holes:\n");
804                 while (node) {
805                         found = 1;
806                         hole = rb_entry(node, struct file_extent_hole, node);
807                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
808                                 hole->start, hole->len);
809                         node = rb_next(node);
810                 }
811                 if (!found)
812                         fprintf(stderr, "\tstart: 0, len: %llu\n",
813                                 round_up(rec->isize, root->sectorsize));
814         }
815 }
816
817 static void print_ref_error(int errors)
818 {
819         if (errors & REF_ERR_NO_DIR_ITEM)
820                 fprintf(stderr, ", no dir item");
821         if (errors & REF_ERR_NO_DIR_INDEX)
822                 fprintf(stderr, ", no dir index");
823         if (errors & REF_ERR_NO_INODE_REF)
824                 fprintf(stderr, ", no inode ref");
825         if (errors & REF_ERR_DUP_DIR_ITEM)
826                 fprintf(stderr, ", dup dir item");
827         if (errors & REF_ERR_DUP_DIR_INDEX)
828                 fprintf(stderr, ", dup dir index");
829         if (errors & REF_ERR_DUP_INODE_REF)
830                 fprintf(stderr, ", dup inode ref");
831         if (errors & REF_ERR_INDEX_UNMATCH)
832                 fprintf(stderr, ", index mismatch");
833         if (errors & REF_ERR_FILETYPE_UNMATCH)
834                 fprintf(stderr, ", filetype mismatch");
835         if (errors & REF_ERR_NAME_TOO_LONG)
836                 fprintf(stderr, ", name too long");
837         if (errors & REF_ERR_NO_ROOT_REF)
838                 fprintf(stderr, ", no root ref");
839         if (errors & REF_ERR_NO_ROOT_BACKREF)
840                 fprintf(stderr, ", no root backref");
841         if (errors & REF_ERR_DUP_ROOT_REF)
842                 fprintf(stderr, ", dup root ref");
843         if (errors & REF_ERR_DUP_ROOT_BACKREF)
844                 fprintf(stderr, ", dup root backref");
845         fprintf(stderr, "\n");
846 }
847
848 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
849                                           u64 ino, int mod)
850 {
851         struct ptr_node *node;
852         struct cache_extent *cache;
853         struct inode_record *rec = NULL;
854         int ret;
855
856         cache = lookup_cache_extent(inode_cache, ino, 1);
857         if (cache) {
858                 node = container_of(cache, struct ptr_node, cache);
859                 rec = node->data;
860                 if (mod && rec->refs > 1) {
861                         node->data = clone_inode_rec(rec);
862                         if (IS_ERR(node->data))
863                                 return node->data;
864                         rec->refs--;
865                         rec = node->data;
866                 }
867         } else if (mod) {
868                 rec = calloc(1, sizeof(*rec));
869                 if (!rec)
870                         return ERR_PTR(-ENOMEM);
871                 rec->ino = ino;
872                 rec->extent_start = (u64)-1;
873                 rec->refs = 1;
874                 INIT_LIST_HEAD(&rec->backrefs);
875                 INIT_LIST_HEAD(&rec->orphan_extents);
876                 rec->holes = RB_ROOT;
877
878                 node = malloc(sizeof(*node));
879                 if (!node) {
880                         free(rec);
881                         return ERR_PTR(-ENOMEM);
882                 }
883                 node->cache.start = ino;
884                 node->cache.size = 1;
885                 node->data = rec;
886
887                 if (ino == BTRFS_FREE_INO_OBJECTID)
888                         rec->found_link = 1;
889
890                 ret = insert_cache_extent(inode_cache, &node->cache);
891                 if (ret)
892                         return ERR_PTR(-EEXIST);
893         }
894         return rec;
895 }
896
897 static void free_orphan_data_extents(struct list_head *orphan_extents)
898 {
899         struct orphan_data_extent *orphan;
900
901         while (!list_empty(orphan_extents)) {
902                 orphan = list_entry(orphan_extents->next,
903                                     struct orphan_data_extent, list);
904                 list_del(&orphan->list);
905                 free(orphan);
906         }
907 }
908
909 static void free_inode_rec(struct inode_record *rec)
910 {
911         struct inode_backref *backref;
912
913         if (--rec->refs > 0)
914                 return;
915
916         while (!list_empty(&rec->backrefs)) {
917                 backref = to_inode_backref(rec->backrefs.next);
918                 list_del(&backref->list);
919                 free(backref);
920         }
921         free_orphan_data_extents(&rec->orphan_extents);
922         free_file_extent_holes(&rec->holes);
923         free(rec);
924 }
925
926 static int can_free_inode_rec(struct inode_record *rec)
927 {
928         if (!rec->errors && rec->checked && rec->found_inode_item &&
929             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
930                 return 1;
931         return 0;
932 }
933
934 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
935                                  struct inode_record *rec)
936 {
937         struct cache_extent *cache;
938         struct inode_backref *tmp, *backref;
939         struct ptr_node *node;
940         u8 filetype;
941
942         if (!rec->found_inode_item)
943                 return;
944
945         filetype = imode_to_type(rec->imode);
946         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
947                 if (backref->found_dir_item && backref->found_dir_index) {
948                         if (backref->filetype != filetype)
949                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
950                         if (!backref->errors && backref->found_inode_ref &&
951                             rec->nlink == rec->found_link) {
952                                 list_del(&backref->list);
953                                 free(backref);
954                         }
955                 }
956         }
957
958         if (!rec->checked || rec->merging)
959                 return;
960
961         if (S_ISDIR(rec->imode)) {
962                 if (rec->found_size != rec->isize)
963                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
964                 if (rec->found_file_extent)
965                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
966         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
967                 if (rec->found_dir_item)
968                         rec->errors |= I_ERR_ODD_DIR_ITEM;
969                 if (rec->found_size != rec->nbytes)
970                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
971                 if (rec->nlink > 0 && !no_holes &&
972                     (rec->extent_end < rec->isize ||
973                      first_extent_gap(&rec->holes) < rec->isize))
974                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
975         }
976
977         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
978                 if (rec->found_csum_item && rec->nodatasum)
979                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
980                 if (rec->some_csum_missing && !rec->nodatasum)
981                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
982         }
983
984         BUG_ON(rec->refs != 1);
985         if (can_free_inode_rec(rec)) {
986                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
987                 node = container_of(cache, struct ptr_node, cache);
988                 BUG_ON(node->data != rec);
989                 remove_cache_extent(inode_cache, &node->cache);
990                 free(node);
991                 free_inode_rec(rec);
992         }
993 }
994
995 static int check_orphan_item(struct btrfs_root *root, u64 ino)
996 {
997         struct btrfs_path path;
998         struct btrfs_key key;
999         int ret;
1000
1001         key.objectid = BTRFS_ORPHAN_OBJECTID;
1002         key.type = BTRFS_ORPHAN_ITEM_KEY;
1003         key.offset = ino;
1004
1005         btrfs_init_path(&path);
1006         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1007         btrfs_release_path(&path);
1008         if (ret > 0)
1009                 ret = -ENOENT;
1010         return ret;
1011 }
1012
1013 static int process_inode_item(struct extent_buffer *eb,
1014                               int slot, struct btrfs_key *key,
1015                               struct shared_node *active_node)
1016 {
1017         struct inode_record *rec;
1018         struct btrfs_inode_item *item;
1019
1020         rec = active_node->current;
1021         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1022         if (rec->found_inode_item) {
1023                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1024                 return 1;
1025         }
1026         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1027         rec->nlink = btrfs_inode_nlink(eb, item);
1028         rec->isize = btrfs_inode_size(eb, item);
1029         rec->nbytes = btrfs_inode_nbytes(eb, item);
1030         rec->imode = btrfs_inode_mode(eb, item);
1031         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1032                 rec->nodatasum = 1;
1033         rec->found_inode_item = 1;
1034         if (rec->nlink == 0)
1035                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1036         maybe_free_inode_rec(&active_node->inode_cache, rec);
1037         return 0;
1038 }
1039
1040 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1041                                                 const char *name,
1042                                                 int namelen, u64 dir)
1043 {
1044         struct inode_backref *backref;
1045
1046         list_for_each_entry(backref, &rec->backrefs, list) {
1047                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1048                         break;
1049                 if (backref->dir != dir || backref->namelen != namelen)
1050                         continue;
1051                 if (memcmp(name, backref->name, namelen))
1052                         continue;
1053                 return backref;
1054         }
1055
1056         backref = malloc(sizeof(*backref) + namelen + 1);
1057         if (!backref)
1058                 return NULL;
1059         memset(backref, 0, sizeof(*backref));
1060         backref->dir = dir;
1061         backref->namelen = namelen;
1062         memcpy(backref->name, name, namelen);
1063         backref->name[namelen] = '\0';
1064         list_add_tail(&backref->list, &rec->backrefs);
1065         return backref;
1066 }
1067
1068 static int add_inode_backref(struct cache_tree *inode_cache,
1069                              u64 ino, u64 dir, u64 index,
1070                              const char *name, int namelen,
1071                              u8 filetype, u8 itemtype, int errors)
1072 {
1073         struct inode_record *rec;
1074         struct inode_backref *backref;
1075
1076         rec = get_inode_rec(inode_cache, ino, 1);
1077         BUG_ON(IS_ERR(rec));
1078         backref = get_inode_backref(rec, name, namelen, dir);
1079         BUG_ON(!backref);
1080         if (errors)
1081                 backref->errors |= errors;
1082         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1083                 if (backref->found_dir_index)
1084                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1085                 if (backref->found_inode_ref && backref->index != index)
1086                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1087                 if (backref->found_dir_item && backref->filetype != filetype)
1088                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1089
1090                 backref->index = index;
1091                 backref->filetype = filetype;
1092                 backref->found_dir_index = 1;
1093         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1094                 rec->found_link++;
1095                 if (backref->found_dir_item)
1096                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1097                 if (backref->found_dir_index && backref->filetype != filetype)
1098                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1099
1100                 backref->filetype = filetype;
1101                 backref->found_dir_item = 1;
1102         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1103                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1104                 if (backref->found_inode_ref)
1105                         backref->errors |= REF_ERR_DUP_INODE_REF;
1106                 if (backref->found_dir_index && backref->index != index)
1107                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1108                 else
1109                         backref->index = index;
1110
1111                 backref->ref_type = itemtype;
1112                 backref->found_inode_ref = 1;
1113         } else {
1114                 BUG_ON(1);
1115         }
1116
1117         maybe_free_inode_rec(inode_cache, rec);
1118         return 0;
1119 }
1120
1121 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1122                             struct cache_tree *dst_cache)
1123 {
1124         struct inode_backref *backref;
1125         u32 dir_count = 0;
1126         int ret = 0;
1127
1128         dst->merging = 1;
1129         list_for_each_entry(backref, &src->backrefs, list) {
1130                 if (backref->found_dir_index) {
1131                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1132                                         backref->index, backref->name,
1133                                         backref->namelen, backref->filetype,
1134                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1135                 }
1136                 if (backref->found_dir_item) {
1137                         dir_count++;
1138                         add_inode_backref(dst_cache, dst->ino,
1139                                         backref->dir, 0, backref->name,
1140                                         backref->namelen, backref->filetype,
1141                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1142                 }
1143                 if (backref->found_inode_ref) {
1144                         add_inode_backref(dst_cache, dst->ino,
1145                                         backref->dir, backref->index,
1146                                         backref->name, backref->namelen, 0,
1147                                         backref->ref_type, backref->errors);
1148                 }
1149         }
1150
1151         if (src->found_dir_item)
1152                 dst->found_dir_item = 1;
1153         if (src->found_file_extent)
1154                 dst->found_file_extent = 1;
1155         if (src->found_csum_item)
1156                 dst->found_csum_item = 1;
1157         if (src->some_csum_missing)
1158                 dst->some_csum_missing = 1;
1159         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1160                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1161                 if (ret < 0)
1162                         return ret;
1163         }
1164
1165         BUG_ON(src->found_link < dir_count);
1166         dst->found_link += src->found_link - dir_count;
1167         dst->found_size += src->found_size;
1168         if (src->extent_start != (u64)-1) {
1169                 if (dst->extent_start == (u64)-1) {
1170                         dst->extent_start = src->extent_start;
1171                         dst->extent_end = src->extent_end;
1172                 } else {
1173                         if (dst->extent_end > src->extent_start)
1174                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1175                         else if (dst->extent_end < src->extent_start) {
1176                                 ret = add_file_extent_hole(&dst->holes,
1177                                         dst->extent_end,
1178                                         src->extent_start - dst->extent_end);
1179                         }
1180                         if (dst->extent_end < src->extent_end)
1181                                 dst->extent_end = src->extent_end;
1182                 }
1183         }
1184
1185         dst->errors |= src->errors;
1186         if (src->found_inode_item) {
1187                 if (!dst->found_inode_item) {
1188                         dst->nlink = src->nlink;
1189                         dst->isize = src->isize;
1190                         dst->nbytes = src->nbytes;
1191                         dst->imode = src->imode;
1192                         dst->nodatasum = src->nodatasum;
1193                         dst->found_inode_item = 1;
1194                 } else {
1195                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1196                 }
1197         }
1198         dst->merging = 0;
1199
1200         return 0;
1201 }
1202
1203 static int splice_shared_node(struct shared_node *src_node,
1204                               struct shared_node *dst_node)
1205 {
1206         struct cache_extent *cache;
1207         struct ptr_node *node, *ins;
1208         struct cache_tree *src, *dst;
1209         struct inode_record *rec, *conflict;
1210         u64 current_ino = 0;
1211         int splice = 0;
1212         int ret;
1213
1214         if (--src_node->refs == 0)
1215                 splice = 1;
1216         if (src_node->current)
1217                 current_ino = src_node->current->ino;
1218
1219         src = &src_node->root_cache;
1220         dst = &dst_node->root_cache;
1221 again:
1222         cache = search_cache_extent(src, 0);
1223         while (cache) {
1224                 node = container_of(cache, struct ptr_node, cache);
1225                 rec = node->data;
1226                 cache = next_cache_extent(cache);
1227
1228                 if (splice) {
1229                         remove_cache_extent(src, &node->cache);
1230                         ins = node;
1231                 } else {
1232                         ins = malloc(sizeof(*ins));
1233                         BUG_ON(!ins);
1234                         ins->cache.start = node->cache.start;
1235                         ins->cache.size = node->cache.size;
1236                         ins->data = rec;
1237                         rec->refs++;
1238                 }
1239                 ret = insert_cache_extent(dst, &ins->cache);
1240                 if (ret == -EEXIST) {
1241                         conflict = get_inode_rec(dst, rec->ino, 1);
1242                         BUG_ON(IS_ERR(conflict));
1243                         merge_inode_recs(rec, conflict, dst);
1244                         if (rec->checked) {
1245                                 conflict->checked = 1;
1246                                 if (dst_node->current == conflict)
1247                                         dst_node->current = NULL;
1248                         }
1249                         maybe_free_inode_rec(dst, conflict);
1250                         free_inode_rec(rec);
1251                         free(ins);
1252                 } else {
1253                         BUG_ON(ret);
1254                 }
1255         }
1256
1257         if (src == &src_node->root_cache) {
1258                 src = &src_node->inode_cache;
1259                 dst = &dst_node->inode_cache;
1260                 goto again;
1261         }
1262
1263         if (current_ino > 0 && (!dst_node->current ||
1264             current_ino > dst_node->current->ino)) {
1265                 if (dst_node->current) {
1266                         dst_node->current->checked = 1;
1267                         maybe_free_inode_rec(dst, dst_node->current);
1268                 }
1269                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1270                 BUG_ON(IS_ERR(dst_node->current));
1271         }
1272         return 0;
1273 }
1274
1275 static void free_inode_ptr(struct cache_extent *cache)
1276 {
1277         struct ptr_node *node;
1278         struct inode_record *rec;
1279
1280         node = container_of(cache, struct ptr_node, cache);
1281         rec = node->data;
1282         free_inode_rec(rec);
1283         free(node);
1284 }
1285
1286 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1287
1288 static struct shared_node *find_shared_node(struct cache_tree *shared,
1289                                             u64 bytenr)
1290 {
1291         struct cache_extent *cache;
1292         struct shared_node *node;
1293
1294         cache = lookup_cache_extent(shared, bytenr, 1);
1295         if (cache) {
1296                 node = container_of(cache, struct shared_node, cache);
1297                 return node;
1298         }
1299         return NULL;
1300 }
1301
1302 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1303 {
1304         int ret;
1305         struct shared_node *node;
1306
1307         node = calloc(1, sizeof(*node));
1308         if (!node)
1309                 return -ENOMEM;
1310         node->cache.start = bytenr;
1311         node->cache.size = 1;
1312         cache_tree_init(&node->root_cache);
1313         cache_tree_init(&node->inode_cache);
1314         node->refs = refs;
1315
1316         ret = insert_cache_extent(shared, &node->cache);
1317
1318         return ret;
1319 }
1320
1321 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1322                              struct walk_control *wc, int level)
1323 {
1324         struct shared_node *node;
1325         struct shared_node *dest;
1326         int ret;
1327
1328         if (level == wc->active_node)
1329                 return 0;
1330
1331         BUG_ON(wc->active_node <= level);
1332         node = find_shared_node(&wc->shared, bytenr);
1333         if (!node) {
1334                 ret = add_shared_node(&wc->shared, bytenr, refs);
1335                 BUG_ON(ret);
1336                 node = find_shared_node(&wc->shared, bytenr);
1337                 wc->nodes[level] = node;
1338                 wc->active_node = level;
1339                 return 0;
1340         }
1341
1342         if (wc->root_level == wc->active_node &&
1343             btrfs_root_refs(&root->root_item) == 0) {
1344                 if (--node->refs == 0) {
1345                         free_inode_recs_tree(&node->root_cache);
1346                         free_inode_recs_tree(&node->inode_cache);
1347                         remove_cache_extent(&wc->shared, &node->cache);
1348                         free(node);
1349                 }
1350                 return 1;
1351         }
1352
1353         dest = wc->nodes[wc->active_node];
1354         splice_shared_node(node, dest);
1355         if (node->refs == 0) {
1356                 remove_cache_extent(&wc->shared, &node->cache);
1357                 free(node);
1358         }
1359         return 1;
1360 }
1361
1362 static int leave_shared_node(struct btrfs_root *root,
1363                              struct walk_control *wc, int level)
1364 {
1365         struct shared_node *node;
1366         struct shared_node *dest;
1367         int i;
1368
1369         if (level == wc->root_level)
1370                 return 0;
1371
1372         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1373                 if (wc->nodes[i])
1374                         break;
1375         }
1376         BUG_ON(i >= BTRFS_MAX_LEVEL);
1377
1378         node = wc->nodes[wc->active_node];
1379         wc->nodes[wc->active_node] = NULL;
1380         wc->active_node = i;
1381
1382         dest = wc->nodes[wc->active_node];
1383         if (wc->active_node < wc->root_level ||
1384             btrfs_root_refs(&root->root_item) > 0) {
1385                 BUG_ON(node->refs <= 1);
1386                 splice_shared_node(node, dest);
1387         } else {
1388                 BUG_ON(node->refs < 2);
1389                 node->refs--;
1390         }
1391         return 0;
1392 }
1393
1394 /*
1395  * Returns:
1396  * < 0 - on error
1397  * 1   - if the root with id child_root_id is a child of root parent_root_id
1398  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1399  *       has other root(s) as parent(s)
1400  * 2   - if the root child_root_id doesn't have any parent roots
1401  */
1402 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1403                          u64 child_root_id)
1404 {
1405         struct btrfs_path path;
1406         struct btrfs_key key;
1407         struct extent_buffer *leaf;
1408         int has_parent = 0;
1409         int ret;
1410
1411         btrfs_init_path(&path);
1412
1413         key.objectid = parent_root_id;
1414         key.type = BTRFS_ROOT_REF_KEY;
1415         key.offset = child_root_id;
1416         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1417                                 0, 0);
1418         if (ret < 0)
1419                 return ret;
1420         btrfs_release_path(&path);
1421         if (!ret)
1422                 return 1;
1423
1424         key.objectid = child_root_id;
1425         key.type = BTRFS_ROOT_BACKREF_KEY;
1426         key.offset = 0;
1427         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1428                                 0, 0);
1429         if (ret < 0)
1430                 goto out;
1431
1432         while (1) {
1433                 leaf = path.nodes[0];
1434                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1435                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1436                         if (ret)
1437                                 break;
1438                         leaf = path.nodes[0];
1439                 }
1440
1441                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1442                 if (key.objectid != child_root_id ||
1443                     key.type != BTRFS_ROOT_BACKREF_KEY)
1444                         break;
1445
1446                 has_parent = 1;
1447
1448                 if (key.offset == parent_root_id) {
1449                         btrfs_release_path(&path);
1450                         return 1;
1451                 }
1452
1453                 path.slots[0]++;
1454         }
1455 out:
1456         btrfs_release_path(&path);
1457         if (ret < 0)
1458                 return ret;
1459         return has_parent ? 0 : 2;
1460 }
1461
1462 static int process_dir_item(struct btrfs_root *root,
1463                             struct extent_buffer *eb,
1464                             int slot, struct btrfs_key *key,
1465                             struct shared_node *active_node)
1466 {
1467         u32 total;
1468         u32 cur = 0;
1469         u32 len;
1470         u32 name_len;
1471         u32 data_len;
1472         int error;
1473         int nritems = 0;
1474         u8 filetype;
1475         struct btrfs_dir_item *di;
1476         struct inode_record *rec;
1477         struct cache_tree *root_cache;
1478         struct cache_tree *inode_cache;
1479         struct btrfs_key location;
1480         char namebuf[BTRFS_NAME_LEN];
1481
1482         root_cache = &active_node->root_cache;
1483         inode_cache = &active_node->inode_cache;
1484         rec = active_node->current;
1485         rec->found_dir_item = 1;
1486
1487         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1488         total = btrfs_item_size_nr(eb, slot);
1489         while (cur < total) {
1490                 nritems++;
1491                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1492                 name_len = btrfs_dir_name_len(eb, di);
1493                 data_len = btrfs_dir_data_len(eb, di);
1494                 filetype = btrfs_dir_type(eb, di);
1495
1496                 rec->found_size += name_len;
1497                 if (name_len <= BTRFS_NAME_LEN) {
1498                         len = name_len;
1499                         error = 0;
1500                 } else {
1501                         len = BTRFS_NAME_LEN;
1502                         error = REF_ERR_NAME_TOO_LONG;
1503                 }
1504                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1505
1506                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1507                         add_inode_backref(inode_cache, location.objectid,
1508                                           key->objectid, key->offset, namebuf,
1509                                           len, filetype, key->type, error);
1510                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1511                         add_inode_backref(root_cache, location.objectid,
1512                                           key->objectid, key->offset,
1513                                           namebuf, len, filetype,
1514                                           key->type, error);
1515                 } else {
1516                         fprintf(stderr, "invalid location in dir item %u\n",
1517                                 location.type);
1518                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1519                                           key->objectid, key->offset, namebuf,
1520                                           len, filetype, key->type, error);
1521                 }
1522
1523                 len = sizeof(*di) + name_len + data_len;
1524                 di = (struct btrfs_dir_item *)((char *)di + len);
1525                 cur += len;
1526         }
1527         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1528                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1529
1530         return 0;
1531 }
1532
1533 static int process_inode_ref(struct extent_buffer *eb,
1534                              int slot, struct btrfs_key *key,
1535                              struct shared_node *active_node)
1536 {
1537         u32 total;
1538         u32 cur = 0;
1539         u32 len;
1540         u32 name_len;
1541         u64 index;
1542         int error;
1543         struct cache_tree *inode_cache;
1544         struct btrfs_inode_ref *ref;
1545         char namebuf[BTRFS_NAME_LEN];
1546
1547         inode_cache = &active_node->inode_cache;
1548
1549         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1550         total = btrfs_item_size_nr(eb, slot);
1551         while (cur < total) {
1552                 name_len = btrfs_inode_ref_name_len(eb, ref);
1553                 index = btrfs_inode_ref_index(eb, ref);
1554                 if (name_len <= BTRFS_NAME_LEN) {
1555                         len = name_len;
1556                         error = 0;
1557                 } else {
1558                         len = BTRFS_NAME_LEN;
1559                         error = REF_ERR_NAME_TOO_LONG;
1560                 }
1561                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1562                 add_inode_backref(inode_cache, key->objectid, key->offset,
1563                                   index, namebuf, len, 0, key->type, error);
1564
1565                 len = sizeof(*ref) + name_len;
1566                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1567                 cur += len;
1568         }
1569         return 0;
1570 }
1571
1572 static int process_inode_extref(struct extent_buffer *eb,
1573                                 int slot, struct btrfs_key *key,
1574                                 struct shared_node *active_node)
1575 {
1576         u32 total;
1577         u32 cur = 0;
1578         u32 len;
1579         u32 name_len;
1580         u64 index;
1581         u64 parent;
1582         int error;
1583         struct cache_tree *inode_cache;
1584         struct btrfs_inode_extref *extref;
1585         char namebuf[BTRFS_NAME_LEN];
1586
1587         inode_cache = &active_node->inode_cache;
1588
1589         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1590         total = btrfs_item_size_nr(eb, slot);
1591         while (cur < total) {
1592                 name_len = btrfs_inode_extref_name_len(eb, extref);
1593                 index = btrfs_inode_extref_index(eb, extref);
1594                 parent = btrfs_inode_extref_parent(eb, extref);
1595                 if (name_len <= BTRFS_NAME_LEN) {
1596                         len = name_len;
1597                         error = 0;
1598                 } else {
1599                         len = BTRFS_NAME_LEN;
1600                         error = REF_ERR_NAME_TOO_LONG;
1601                 }
1602                 read_extent_buffer(eb, namebuf,
1603                                    (unsigned long)(extref + 1), len);
1604                 add_inode_backref(inode_cache, key->objectid, parent,
1605                                   index, namebuf, len, 0, key->type, error);
1606
1607                 len = sizeof(*extref) + name_len;
1608                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1609                 cur += len;
1610         }
1611         return 0;
1612
1613 }
1614
1615 static int count_csum_range(struct btrfs_root *root, u64 start,
1616                             u64 len, u64 *found)
1617 {
1618         struct btrfs_key key;
1619         struct btrfs_path path;
1620         struct extent_buffer *leaf;
1621         int ret;
1622         size_t size;
1623         *found = 0;
1624         u64 csum_end;
1625         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1626
1627         btrfs_init_path(&path);
1628
1629         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1630         key.offset = start;
1631         key.type = BTRFS_EXTENT_CSUM_KEY;
1632
1633         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1634                                 &key, &path, 0, 0);
1635         if (ret < 0)
1636                 goto out;
1637         if (ret > 0 && path.slots[0] > 0) {
1638                 leaf = path.nodes[0];
1639                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1640                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1641                     key.type == BTRFS_EXTENT_CSUM_KEY)
1642                         path.slots[0]--;
1643         }
1644
1645         while (len > 0) {
1646                 leaf = path.nodes[0];
1647                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1648                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1649                         if (ret > 0)
1650                                 break;
1651                         else if (ret < 0)
1652                                 goto out;
1653                         leaf = path.nodes[0];
1654                 }
1655
1656                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1657                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1658                     key.type != BTRFS_EXTENT_CSUM_KEY)
1659                         break;
1660
1661                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1662                 if (key.offset >= start + len)
1663                         break;
1664
1665                 if (key.offset > start)
1666                         start = key.offset;
1667
1668                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1669                 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1670                 if (csum_end > start) {
1671                         size = min(csum_end - start, len);
1672                         len -= size;
1673                         start += size;
1674                         *found += size;
1675                 }
1676
1677                 path.slots[0]++;
1678         }
1679 out:
1680         btrfs_release_path(&path);
1681         if (ret < 0)
1682                 return ret;
1683         return 0;
1684 }
1685
1686 static int process_file_extent(struct btrfs_root *root,
1687                                 struct extent_buffer *eb,
1688                                 int slot, struct btrfs_key *key,
1689                                 struct shared_node *active_node)
1690 {
1691         struct inode_record *rec;
1692         struct btrfs_file_extent_item *fi;
1693         u64 num_bytes = 0;
1694         u64 disk_bytenr = 0;
1695         u64 extent_offset = 0;
1696         u64 mask = root->sectorsize - 1;
1697         int extent_type;
1698         int ret;
1699
1700         rec = active_node->current;
1701         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1702         rec->found_file_extent = 1;
1703
1704         if (rec->extent_start == (u64)-1) {
1705                 rec->extent_start = key->offset;
1706                 rec->extent_end = key->offset;
1707         }
1708
1709         if (rec->extent_end > key->offset)
1710                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1711         else if (rec->extent_end < key->offset) {
1712                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1713                                            key->offset - rec->extent_end);
1714                 if (ret < 0)
1715                         return ret;
1716         }
1717
1718         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1719         extent_type = btrfs_file_extent_type(eb, fi);
1720
1721         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1722                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1723                 if (num_bytes == 0)
1724                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1725                 rec->found_size += num_bytes;
1726                 num_bytes = (num_bytes + mask) & ~mask;
1727         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1728                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1729                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1730                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1731                 extent_offset = btrfs_file_extent_offset(eb, fi);
1732                 if (num_bytes == 0 || (num_bytes & mask))
1733                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1734                 if (num_bytes + extent_offset >
1735                     btrfs_file_extent_ram_bytes(eb, fi))
1736                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1737                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1738                     (btrfs_file_extent_compression(eb, fi) ||
1739                      btrfs_file_extent_encryption(eb, fi) ||
1740                      btrfs_file_extent_other_encoding(eb, fi)))
1741                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1742                 if (disk_bytenr > 0)
1743                         rec->found_size += num_bytes;
1744         } else {
1745                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1746         }
1747         rec->extent_end = key->offset + num_bytes;
1748
1749         /*
1750          * The data reloc tree will copy full extents into its inode and then
1751          * copy the corresponding csums.  Because the extent it copied could be
1752          * a preallocated extent that hasn't been written to yet there may be no
1753          * csums to copy, ergo we won't have csums for our file extent.  This is
1754          * ok so just don't bother checking csums if the inode belongs to the
1755          * data reloc tree.
1756          */
1757         if (disk_bytenr > 0 &&
1758             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1759                 u64 found;
1760                 if (btrfs_file_extent_compression(eb, fi))
1761                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1762                 else
1763                         disk_bytenr += extent_offset;
1764
1765                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1766                 if (ret < 0)
1767                         return ret;
1768                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1769                         if (found > 0)
1770                                 rec->found_csum_item = 1;
1771                         if (found < num_bytes)
1772                                 rec->some_csum_missing = 1;
1773                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1774                         if (found > 0)
1775                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1776                 }
1777         }
1778         return 0;
1779 }
1780
1781 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1782                             struct walk_control *wc)
1783 {
1784         struct btrfs_key key;
1785         u32 nritems;
1786         int i;
1787         int ret = 0;
1788         struct cache_tree *inode_cache;
1789         struct shared_node *active_node;
1790
1791         if (wc->root_level == wc->active_node &&
1792             btrfs_root_refs(&root->root_item) == 0)
1793                 return 0;
1794
1795         active_node = wc->nodes[wc->active_node];
1796         inode_cache = &active_node->inode_cache;
1797         nritems = btrfs_header_nritems(eb);
1798         for (i = 0; i < nritems; i++) {
1799                 btrfs_item_key_to_cpu(eb, &key, i);
1800
1801                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1802                         continue;
1803                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1804                         continue;
1805
1806                 if (active_node->current == NULL ||
1807                     active_node->current->ino < key.objectid) {
1808                         if (active_node->current) {
1809                                 active_node->current->checked = 1;
1810                                 maybe_free_inode_rec(inode_cache,
1811                                                      active_node->current);
1812                         }
1813                         active_node->current = get_inode_rec(inode_cache,
1814                                                              key.objectid, 1);
1815                         BUG_ON(IS_ERR(active_node->current));
1816                 }
1817                 switch (key.type) {
1818                 case BTRFS_DIR_ITEM_KEY:
1819                 case BTRFS_DIR_INDEX_KEY:
1820                         ret = process_dir_item(root, eb, i, &key, active_node);
1821                         break;
1822                 case BTRFS_INODE_REF_KEY:
1823                         ret = process_inode_ref(eb, i, &key, active_node);
1824                         break;
1825                 case BTRFS_INODE_EXTREF_KEY:
1826                         ret = process_inode_extref(eb, i, &key, active_node);
1827                         break;
1828                 case BTRFS_INODE_ITEM_KEY:
1829                         ret = process_inode_item(eb, i, &key, active_node);
1830                         break;
1831                 case BTRFS_EXTENT_DATA_KEY:
1832                         ret = process_file_extent(root, eb, i, &key,
1833                                                   active_node);
1834                         break;
1835                 default:
1836                         break;
1837                 };
1838         }
1839         return ret;
1840 }
1841
1842 static void reada_walk_down(struct btrfs_root *root,
1843                             struct extent_buffer *node, int slot)
1844 {
1845         u64 bytenr;
1846         u64 ptr_gen;
1847         u32 nritems;
1848         u32 blocksize;
1849         int i;
1850         int level;
1851
1852         level = btrfs_header_level(node);
1853         if (level != 1)
1854                 return;
1855
1856         nritems = btrfs_header_nritems(node);
1857         blocksize = root->nodesize;
1858         for (i = slot; i < nritems; i++) {
1859                 bytenr = btrfs_node_blockptr(node, i);
1860                 ptr_gen = btrfs_node_ptr_generation(node, i);
1861                 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1862         }
1863 }
1864
1865 /*
1866  * Check the child node/leaf by the following condition:
1867  * 1. the first item key of the node/leaf should be the same with the one
1868  *    in parent.
1869  * 2. block in parent node should match the child node/leaf.
1870  * 3. generation of parent node and child's header should be consistent.
1871  *
1872  * Or the child node/leaf pointed by the key in parent is not valid.
1873  *
1874  * We hope to check leaf owner too, but since subvol may share leaves,
1875  * which makes leaf owner check not so strong, key check should be
1876  * sufficient enough for that case.
1877  */
1878 static int check_child_node(struct btrfs_root *root,
1879                             struct extent_buffer *parent, int slot,
1880                             struct extent_buffer *child)
1881 {
1882         struct btrfs_key parent_key;
1883         struct btrfs_key child_key;
1884         int ret = 0;
1885
1886         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1887         if (btrfs_header_level(child) == 0)
1888                 btrfs_item_key_to_cpu(child, &child_key, 0);
1889         else
1890                 btrfs_node_key_to_cpu(child, &child_key, 0);
1891
1892         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1893                 ret = -EINVAL;
1894                 fprintf(stderr,
1895                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1896                         parent_key.objectid, parent_key.type, parent_key.offset,
1897                         child_key.objectid, child_key.type, child_key.offset);
1898         }
1899         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1900                 ret = -EINVAL;
1901                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1902                         btrfs_node_blockptr(parent, slot),
1903                         btrfs_header_bytenr(child));
1904         }
1905         if (btrfs_node_ptr_generation(parent, slot) !=
1906             btrfs_header_generation(child)) {
1907                 ret = -EINVAL;
1908                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1909                         btrfs_header_generation(child),
1910                         btrfs_node_ptr_generation(parent, slot));
1911         }
1912         return ret;
1913 }
1914
1915 struct node_refs {
1916         u64 bytenr[BTRFS_MAX_LEVEL];
1917         u64 refs[BTRFS_MAX_LEVEL];
1918 };
1919
1920 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1921                           struct walk_control *wc, int *level,
1922                           struct node_refs *nrefs)
1923 {
1924         enum btrfs_tree_block_status status;
1925         u64 bytenr;
1926         u64 ptr_gen;
1927         struct extent_buffer *next;
1928         struct extent_buffer *cur;
1929         u32 blocksize;
1930         int ret, err = 0;
1931         u64 refs;
1932
1933         WARN_ON(*level < 0);
1934         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1935
1936         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1937                 refs = nrefs->refs[*level];
1938                 ret = 0;
1939         } else {
1940                 ret = btrfs_lookup_extent_info(NULL, root,
1941                                        path->nodes[*level]->start,
1942                                        *level, 1, &refs, NULL);
1943                 if (ret < 0) {
1944                         err = ret;
1945                         goto out;
1946                 }
1947                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1948                 nrefs->refs[*level] = refs;
1949         }
1950
1951         if (refs > 1) {
1952                 ret = enter_shared_node(root, path->nodes[*level]->start,
1953                                         refs, wc, *level);
1954                 if (ret > 0) {
1955                         err = ret;
1956                         goto out;
1957                 }
1958         }
1959
1960         while (*level >= 0) {
1961                 WARN_ON(*level < 0);
1962                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1963                 cur = path->nodes[*level];
1964
1965                 if (btrfs_header_level(cur) != *level)
1966                         WARN_ON(1);
1967
1968                 if (path->slots[*level] >= btrfs_header_nritems(cur))
1969                         break;
1970                 if (*level == 0) {
1971                         ret = process_one_leaf(root, cur, wc);
1972                         if (ret < 0)
1973                                 err = ret;
1974                         break;
1975                 }
1976                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1977                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1978                 blocksize = root->nodesize;
1979
1980                 if (bytenr == nrefs->bytenr[*level - 1]) {
1981                         refs = nrefs->refs[*level - 1];
1982                 } else {
1983                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1984                                         *level - 1, 1, &refs, NULL);
1985                         if (ret < 0) {
1986                                 refs = 0;
1987                         } else {
1988                                 nrefs->bytenr[*level - 1] = bytenr;
1989                                 nrefs->refs[*level - 1] = refs;
1990                         }
1991                 }
1992
1993                 if (refs > 1) {
1994                         ret = enter_shared_node(root, bytenr, refs,
1995                                                 wc, *level - 1);
1996                         if (ret > 0) {
1997                                 path->slots[*level]++;
1998                                 continue;
1999                         }
2000                 }
2001
2002                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2003                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2004                         free_extent_buffer(next);
2005                         reada_walk_down(root, cur, path->slots[*level]);
2006                         next = read_tree_block(root, bytenr, blocksize,
2007                                                ptr_gen);
2008                         if (!extent_buffer_uptodate(next)) {
2009                                 struct btrfs_key node_key;
2010
2011                                 btrfs_node_key_to_cpu(path->nodes[*level],
2012                                                       &node_key,
2013                                                       path->slots[*level]);
2014                                 btrfs_add_corrupt_extent_record(root->fs_info,
2015                                                 &node_key,
2016                                                 path->nodes[*level]->start,
2017                                                 root->nodesize, *level);
2018                                 err = -EIO;
2019                                 goto out;
2020                         }
2021                 }
2022
2023                 ret = check_child_node(root, cur, path->slots[*level], next);
2024                 if (ret) {
2025                         err = ret;
2026                         goto out;
2027                 }
2028
2029                 if (btrfs_is_leaf(next))
2030                         status = btrfs_check_leaf(root, NULL, next);
2031                 else
2032                         status = btrfs_check_node(root, NULL, next);
2033                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2034                         free_extent_buffer(next);
2035                         err = -EIO;
2036                         goto out;
2037                 }
2038
2039                 *level = *level - 1;
2040                 free_extent_buffer(path->nodes[*level]);
2041                 path->nodes[*level] = next;
2042                 path->slots[*level] = 0;
2043         }
2044 out:
2045         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2046         return err;
2047 }
2048
2049 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2050                         struct walk_control *wc, int *level)
2051 {
2052         int i;
2053         struct extent_buffer *leaf;
2054
2055         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2056                 leaf = path->nodes[i];
2057                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2058                         path->slots[i]++;
2059                         *level = i;
2060                         return 0;
2061                 } else {
2062                         free_extent_buffer(path->nodes[*level]);
2063                         path->nodes[*level] = NULL;
2064                         BUG_ON(*level > wc->active_node);
2065                         if (*level == wc->active_node)
2066                                 leave_shared_node(root, wc, *level);
2067                         *level = i + 1;
2068                 }
2069         }
2070         return 1;
2071 }
2072
2073 static int check_root_dir(struct inode_record *rec)
2074 {
2075         struct inode_backref *backref;
2076         int ret = -1;
2077
2078         if (!rec->found_inode_item || rec->errors)
2079                 goto out;
2080         if (rec->nlink != 1 || rec->found_link != 0)
2081                 goto out;
2082         if (list_empty(&rec->backrefs))
2083                 goto out;
2084         backref = to_inode_backref(rec->backrefs.next);
2085         if (!backref->found_inode_ref)
2086                 goto out;
2087         if (backref->index != 0 || backref->namelen != 2 ||
2088             memcmp(backref->name, "..", 2))
2089                 goto out;
2090         if (backref->found_dir_index || backref->found_dir_item)
2091                 goto out;
2092         ret = 0;
2093 out:
2094         return ret;
2095 }
2096
2097 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2098                               struct btrfs_root *root, struct btrfs_path *path,
2099                               struct inode_record *rec)
2100 {
2101         struct btrfs_inode_item *ei;
2102         struct btrfs_key key;
2103         int ret;
2104
2105         key.objectid = rec->ino;
2106         key.type = BTRFS_INODE_ITEM_KEY;
2107         key.offset = (u64)-1;
2108
2109         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2110         if (ret < 0)
2111                 goto out;
2112         if (ret) {
2113                 if (!path->slots[0]) {
2114                         ret = -ENOENT;
2115                         goto out;
2116                 }
2117                 path->slots[0]--;
2118                 ret = 0;
2119         }
2120         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2121         if (key.objectid != rec->ino) {
2122                 ret = -ENOENT;
2123                 goto out;
2124         }
2125
2126         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2127                             struct btrfs_inode_item);
2128         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2129         btrfs_mark_buffer_dirty(path->nodes[0]);
2130         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2131         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2132                root->root_key.objectid);
2133 out:
2134         btrfs_release_path(path);
2135         return ret;
2136 }
2137
2138 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2139                                     struct btrfs_root *root,
2140                                     struct btrfs_path *path,
2141                                     struct inode_record *rec)
2142 {
2143         int ret;
2144
2145         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2146         btrfs_release_path(path);
2147         if (!ret)
2148                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2149         return ret;
2150 }
2151
2152 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2153                                struct btrfs_root *root,
2154                                struct btrfs_path *path,
2155                                struct inode_record *rec)
2156 {
2157         struct btrfs_inode_item *ei;
2158         struct btrfs_key key;
2159         int ret = 0;
2160
2161         key.objectid = rec->ino;
2162         key.type = BTRFS_INODE_ITEM_KEY;
2163         key.offset = 0;
2164
2165         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2166         if (ret) {
2167                 if (ret > 0)
2168                         ret = -ENOENT;
2169                 goto out;
2170         }
2171
2172         /* Since ret == 0, no need to check anything */
2173         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2174                             struct btrfs_inode_item);
2175         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2176         btrfs_mark_buffer_dirty(path->nodes[0]);
2177         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2178         printf("reset nbytes for ino %llu root %llu\n",
2179                rec->ino, root->root_key.objectid);
2180 out:
2181         btrfs_release_path(path);
2182         return ret;
2183 }
2184
2185 static int add_missing_dir_index(struct btrfs_root *root,
2186                                  struct cache_tree *inode_cache,
2187                                  struct inode_record *rec,
2188                                  struct inode_backref *backref)
2189 {
2190         struct btrfs_path path;
2191         struct btrfs_trans_handle *trans;
2192         struct btrfs_dir_item *dir_item;
2193         struct extent_buffer *leaf;
2194         struct btrfs_key key;
2195         struct btrfs_disk_key disk_key;
2196         struct inode_record *dir_rec;
2197         unsigned long name_ptr;
2198         u32 data_size = sizeof(*dir_item) + backref->namelen;
2199         int ret;
2200
2201         trans = btrfs_start_transaction(root, 1);
2202         if (IS_ERR(trans))
2203                 return PTR_ERR(trans);
2204
2205         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2206                 (unsigned long long)rec->ino);
2207
2208         btrfs_init_path(&path);
2209         key.objectid = backref->dir;
2210         key.type = BTRFS_DIR_INDEX_KEY;
2211         key.offset = backref->index;
2212         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2213         BUG_ON(ret);
2214
2215         leaf = path.nodes[0];
2216         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2217
2218         disk_key.objectid = cpu_to_le64(rec->ino);
2219         disk_key.type = BTRFS_INODE_ITEM_KEY;
2220         disk_key.offset = 0;
2221
2222         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2223         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2224         btrfs_set_dir_data_len(leaf, dir_item, 0);
2225         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2226         name_ptr = (unsigned long)(dir_item + 1);
2227         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2228         btrfs_mark_buffer_dirty(leaf);
2229         btrfs_release_path(&path);
2230         btrfs_commit_transaction(trans, root);
2231
2232         backref->found_dir_index = 1;
2233         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2234         BUG_ON(IS_ERR(dir_rec));
2235         if (!dir_rec)
2236                 return 0;
2237         dir_rec->found_size += backref->namelen;
2238         if (dir_rec->found_size == dir_rec->isize &&
2239             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2240                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2241         if (dir_rec->found_size != dir_rec->isize)
2242                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2243
2244         return 0;
2245 }
2246
2247 static int delete_dir_index(struct btrfs_root *root,
2248                             struct cache_tree *inode_cache,
2249                             struct inode_record *rec,
2250                             struct inode_backref *backref)
2251 {
2252         struct btrfs_trans_handle *trans;
2253         struct btrfs_dir_item *di;
2254         struct btrfs_path path;
2255         int ret = 0;
2256
2257         trans = btrfs_start_transaction(root, 1);
2258         if (IS_ERR(trans))
2259                 return PTR_ERR(trans);
2260
2261         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2262                 (unsigned long long)backref->dir,
2263                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2264                 (unsigned long long)root->objectid);
2265
2266         btrfs_init_path(&path);
2267         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2268                                     backref->name, backref->namelen,
2269                                     backref->index, -1);
2270         if (IS_ERR(di)) {
2271                 ret = PTR_ERR(di);
2272                 btrfs_release_path(&path);
2273                 btrfs_commit_transaction(trans, root);
2274                 if (ret == -ENOENT)
2275                         return 0;
2276                 return ret;
2277         }
2278
2279         if (!di)
2280                 ret = btrfs_del_item(trans, root, &path);
2281         else
2282                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2283         BUG_ON(ret);
2284         btrfs_release_path(&path);
2285         btrfs_commit_transaction(trans, root);
2286         return ret;
2287 }
2288
2289 static int create_inode_item(struct btrfs_root *root,
2290                              struct inode_record *rec,
2291                              struct inode_backref *backref, int root_dir)
2292 {
2293         struct btrfs_trans_handle *trans;
2294         struct btrfs_inode_item inode_item;
2295         time_t now = time(NULL);
2296         int ret;
2297
2298         trans = btrfs_start_transaction(root, 1);
2299         if (IS_ERR(trans)) {
2300                 ret = PTR_ERR(trans);
2301                 return ret;
2302         }
2303
2304         fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2305                 "be incomplete, please check permissions and content after "
2306                 "the fsck completes.\n", (unsigned long long)root->objectid,
2307                 (unsigned long long)rec->ino);
2308
2309         memset(&inode_item, 0, sizeof(inode_item));
2310         btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2311         if (root_dir)
2312                 btrfs_set_stack_inode_nlink(&inode_item, 1);
2313         else
2314                 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2315         btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2316         if (rec->found_dir_item) {
2317                 if (rec->found_file_extent)
2318                         fprintf(stderr, "root %llu inode %llu has both a dir "
2319                                 "item and extents, unsure if it is a dir or a "
2320                                 "regular file so setting it as a directory\n",
2321                                 (unsigned long long)root->objectid,
2322                                 (unsigned long long)rec->ino);
2323                 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2324                 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2325         } else if (!rec->found_dir_item) {
2326                 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2327                 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2328         }
2329         btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2330         btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2331         btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2332         btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2333         btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2334         btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2335         btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2336         btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2337
2338         ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2339         BUG_ON(ret);
2340         btrfs_commit_transaction(trans, root);
2341         return 0;
2342 }
2343
2344 static int repair_inode_backrefs(struct btrfs_root *root,
2345                                  struct inode_record *rec,
2346                                  struct cache_tree *inode_cache,
2347                                  int delete)
2348 {
2349         struct inode_backref *tmp, *backref;
2350         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2351         int ret = 0;
2352         int repaired = 0;
2353
2354         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2355                 if (!delete && rec->ino == root_dirid) {
2356                         if (!rec->found_inode_item) {
2357                                 ret = create_inode_item(root, rec, backref, 1);
2358                                 if (ret)
2359                                         break;
2360                                 repaired++;
2361                         }
2362                 }
2363
2364                 /* Index 0 for root dir's are special, don't mess with it */
2365                 if (rec->ino == root_dirid && backref->index == 0)
2366                         continue;
2367
2368                 if (delete &&
2369                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2370                      (backref->found_dir_index && backref->found_inode_ref &&
2371                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2372                         ret = delete_dir_index(root, inode_cache, rec, backref);
2373                         if (ret)
2374                                 break;
2375                         repaired++;
2376                         list_del(&backref->list);
2377                         free(backref);
2378                 }
2379
2380                 if (!delete && !backref->found_dir_index &&
2381                     backref->found_dir_item && backref->found_inode_ref) {
2382                         ret = add_missing_dir_index(root, inode_cache, rec,
2383                                                     backref);
2384                         if (ret)
2385                                 break;
2386                         repaired++;
2387                         if (backref->found_dir_item &&
2388                             backref->found_dir_index &&
2389                             backref->found_dir_index) {
2390                                 if (!backref->errors &&
2391                                     backref->found_inode_ref) {
2392                                         list_del(&backref->list);
2393                                         free(backref);
2394                                 }
2395                         }
2396                 }
2397
2398                 if (!delete && (!backref->found_dir_index &&
2399                                 !backref->found_dir_item &&
2400                                 backref->found_inode_ref)) {
2401                         struct btrfs_trans_handle *trans;
2402                         struct btrfs_key location;
2403
2404                         ret = check_dir_conflict(root, backref->name,
2405                                                  backref->namelen,
2406                                                  backref->dir,
2407                                                  backref->index);
2408                         if (ret) {
2409                                 /*
2410                                  * let nlink fixing routine to handle it,
2411                                  * which can do it better.
2412                                  */
2413                                 ret = 0;
2414                                 break;
2415                         }
2416                         location.objectid = rec->ino;
2417                         location.type = BTRFS_INODE_ITEM_KEY;
2418                         location.offset = 0;
2419
2420                         trans = btrfs_start_transaction(root, 1);
2421                         if (IS_ERR(trans)) {
2422                                 ret = PTR_ERR(trans);
2423                                 break;
2424                         }
2425                         fprintf(stderr, "adding missing dir index/item pair "
2426                                 "for inode %llu\n",
2427                                 (unsigned long long)rec->ino);
2428                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2429                                                     backref->namelen,
2430                                                     backref->dir, &location,
2431                                                     imode_to_type(rec->imode),
2432                                                     backref->index);
2433                         BUG_ON(ret);
2434                         btrfs_commit_transaction(trans, root);
2435                         repaired++;
2436                 }
2437
2438                 if (!delete && (backref->found_inode_ref &&
2439                                 backref->found_dir_index &&
2440                                 backref->found_dir_item &&
2441                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2442                                 !rec->found_inode_item)) {
2443                         ret = create_inode_item(root, rec, backref, 0);
2444                         if (ret)
2445                                 break;
2446                         repaired++;
2447                 }
2448
2449         }
2450         return ret ? ret : repaired;
2451 }
2452
2453 /*
2454  * To determine the file type for nlink/inode_item repair
2455  *
2456  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2457  * Return -ENOENT if file type is not found.
2458  */
2459 static int find_file_type(struct inode_record *rec, u8 *type)
2460 {
2461         struct inode_backref *backref;
2462
2463         /* For inode item recovered case */
2464         if (rec->found_inode_item) {
2465                 *type = imode_to_type(rec->imode);
2466                 return 0;
2467         }
2468
2469         list_for_each_entry(backref, &rec->backrefs, list) {
2470                 if (backref->found_dir_index || backref->found_dir_item) {
2471                         *type = backref->filetype;
2472                         return 0;
2473                 }
2474         }
2475         return -ENOENT;
2476 }
2477
2478 /*
2479  * To determine the file name for nlink repair
2480  *
2481  * Return 0 if file name is found, set name and namelen.
2482  * Return -ENOENT if file name is not found.
2483  */
2484 static int find_file_name(struct inode_record *rec,
2485                           char *name, int *namelen)
2486 {
2487         struct inode_backref *backref;
2488
2489         list_for_each_entry(backref, &rec->backrefs, list) {
2490                 if (backref->found_dir_index || backref->found_dir_item ||
2491                     backref->found_inode_ref) {
2492                         memcpy(name, backref->name, backref->namelen);
2493                         *namelen = backref->namelen;
2494                         return 0;
2495                 }
2496         }
2497         return -ENOENT;
2498 }
2499
2500 /* Reset the nlink of the inode to the correct one */
2501 static int reset_nlink(struct btrfs_trans_handle *trans,
2502                        struct btrfs_root *root,
2503                        struct btrfs_path *path,
2504                        struct inode_record *rec)
2505 {
2506         struct inode_backref *backref;
2507         struct inode_backref *tmp;
2508         struct btrfs_key key;
2509         struct btrfs_inode_item *inode_item;
2510         int ret = 0;
2511
2512         /* We don't believe this either, reset it and iterate backref */
2513         rec->found_link = 0;
2514
2515         /* Remove all backref including the valid ones */
2516         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2517                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2518                                    backref->index, backref->name,
2519                                    backref->namelen, 0);
2520                 if (ret < 0)
2521                         goto out;
2522
2523                 /* remove invalid backref, so it won't be added back */
2524                 if (!(backref->found_dir_index &&
2525                       backref->found_dir_item &&
2526                       backref->found_inode_ref)) {
2527                         list_del(&backref->list);
2528                         free(backref);
2529                 } else {
2530                         rec->found_link++;
2531                 }
2532         }
2533
2534         /* Set nlink to 0 */
2535         key.objectid = rec->ino;
2536         key.type = BTRFS_INODE_ITEM_KEY;
2537         key.offset = 0;
2538         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2539         if (ret < 0)
2540                 goto out;
2541         if (ret > 0) {
2542                 ret = -ENOENT;
2543                 goto out;
2544         }
2545         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2546                                     struct btrfs_inode_item);
2547         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2548         btrfs_mark_buffer_dirty(path->nodes[0]);
2549         btrfs_release_path(path);
2550
2551         /*
2552          * Add back valid inode_ref/dir_item/dir_index,
2553          * add_link() will handle the nlink inc, so new nlink must be correct
2554          */
2555         list_for_each_entry(backref, &rec->backrefs, list) {
2556                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2557                                      backref->name, backref->namelen,
2558                                      backref->filetype, &backref->index, 1);
2559                 if (ret < 0)
2560                         goto out;
2561         }
2562 out:
2563         btrfs_release_path(path);
2564         return ret;
2565 }
2566
2567 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2568                                struct btrfs_root *root,
2569                                struct btrfs_path *path,
2570                                struct inode_record *rec)
2571 {
2572         char *dir_name = "lost+found";
2573         char namebuf[BTRFS_NAME_LEN] = {0};
2574         u64 lost_found_ino;
2575         u32 mode = 0700;
2576         u8 type = 0;
2577         int namelen = 0;
2578         int name_recovered = 0;
2579         int type_recovered = 0;
2580         int ret = 0;
2581
2582         /*
2583          * Get file name and type first before these invalid inode ref
2584          * are deleted by remove_all_invalid_backref()
2585          */
2586         name_recovered = !find_file_name(rec, namebuf, &namelen);
2587         type_recovered = !find_file_type(rec, &type);
2588
2589         if (!name_recovered) {
2590                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2591                        rec->ino, rec->ino);
2592                 namelen = count_digits(rec->ino);
2593                 sprintf(namebuf, "%llu", rec->ino);
2594                 name_recovered = 1;
2595         }
2596         if (!type_recovered) {
2597                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2598                        rec->ino);
2599                 type = BTRFS_FT_REG_FILE;
2600                 type_recovered = 1;
2601         }
2602
2603         ret = reset_nlink(trans, root, path, rec);
2604         if (ret < 0) {
2605                 fprintf(stderr,
2606                         "Failed to reset nlink for inode %llu: %s\n",
2607                         rec->ino, strerror(-ret));
2608                 goto out;
2609         }
2610
2611         if (rec->found_link == 0) {
2612                 lost_found_ino = root->highest_inode;
2613                 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2614                         ret = -EOVERFLOW;
2615                         goto out;
2616                 }
2617                 lost_found_ino++;
2618                 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2619                                   BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2620                                   mode);
2621                 if (ret < 0) {
2622                         fprintf(stderr, "Failed to create '%s' dir: %s\n",
2623                                 dir_name, strerror(-ret));
2624                         goto out;
2625                 }
2626                 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2627                                      namebuf, namelen, type, NULL, 1);
2628                 /*
2629                  * Add ".INO" suffix several times to handle case where
2630                  * "FILENAME.INO" is already taken by another file.
2631                  */
2632                 while (ret == -EEXIST) {
2633                         /*
2634                          * Conflicting file name, add ".INO" as suffix * +1 for '.'
2635                          */
2636                         if (namelen + count_digits(rec->ino) + 1 >
2637                             BTRFS_NAME_LEN) {
2638                                 ret = -EFBIG;
2639                                 goto out;
2640                         }
2641                         snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2642                                  ".%llu", rec->ino);
2643                         namelen += count_digits(rec->ino) + 1;
2644                         ret = btrfs_add_link(trans, root, rec->ino,
2645                                              lost_found_ino, namebuf,
2646                                              namelen, type, NULL, 1);
2647                 }
2648                 if (ret < 0) {
2649                         fprintf(stderr,
2650                                 "Failed to link the inode %llu to %s dir: %s\n",
2651                                 rec->ino, dir_name, strerror(-ret));
2652                         goto out;
2653                 }
2654                 /*
2655                  * Just increase the found_link, don't actually add the
2656                  * backref. This will make things easier and this inode
2657                  * record will be freed after the repair is done.
2658                  * So fsck will not report problem about this inode.
2659                  */
2660                 rec->found_link++;
2661                 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2662                        namelen, namebuf, dir_name);
2663         }
2664         printf("Fixed the nlink of inode %llu\n", rec->ino);
2665 out:
2666         /*
2667          * Clear the flag anyway, or we will loop forever for the same inode
2668          * as it will not be removed from the bad inode list and the dead loop
2669          * happens.
2670          */
2671         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2672         btrfs_release_path(path);
2673         return ret;
2674 }
2675
2676 /*
2677  * Check if there is any normal(reg or prealloc) file extent for given
2678  * ino.
2679  * This is used to determine the file type when neither its dir_index/item or
2680  * inode_item exists.
2681  *
2682  * This will *NOT* report error, if any error happens, just consider it does
2683  * not have any normal file extent.
2684  */
2685 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2686 {
2687         struct btrfs_path path;
2688         struct btrfs_key key;
2689         struct btrfs_key found_key;
2690         struct btrfs_file_extent_item *fi;
2691         u8 type;
2692         int ret = 0;
2693
2694         btrfs_init_path(&path);
2695         key.objectid = ino;
2696         key.type = BTRFS_EXTENT_DATA_KEY;
2697         key.offset = 0;
2698
2699         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2700         if (ret < 0) {
2701                 ret = 0;
2702                 goto out;
2703         }
2704         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2705                 ret = btrfs_next_leaf(root, &path);
2706                 if (ret) {
2707                         ret = 0;
2708                         goto out;
2709                 }
2710         }
2711         while (1) {
2712                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2713                                       path.slots[0]);
2714                 if (found_key.objectid != ino ||
2715                     found_key.type != BTRFS_EXTENT_DATA_KEY)
2716                         break;
2717                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2718                                     struct btrfs_file_extent_item);
2719                 type = btrfs_file_extent_type(path.nodes[0], fi);
2720                 if (type != BTRFS_FILE_EXTENT_INLINE) {
2721                         ret = 1;
2722                         goto out;
2723                 }
2724         }
2725 out:
2726         btrfs_release_path(&path);
2727         return ret;
2728 }
2729
2730 static u32 btrfs_type_to_imode(u8 type)
2731 {
2732         static u32 imode_by_btrfs_type[] = {
2733                 [BTRFS_FT_REG_FILE]     = S_IFREG,
2734                 [BTRFS_FT_DIR]          = S_IFDIR,
2735                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
2736                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
2737                 [BTRFS_FT_FIFO]         = S_IFIFO,
2738                 [BTRFS_FT_SOCK]         = S_IFSOCK,
2739                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
2740         };
2741
2742         return imode_by_btrfs_type[(type)];
2743 }
2744
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746                                 struct btrfs_root *root,
2747                                 struct btrfs_path *path,
2748                                 struct inode_record *rec)
2749 {
2750         u8 filetype;
2751         u32 mode = 0700;
2752         int type_recovered = 0;
2753         int ret = 0;
2754
2755         printf("Trying to rebuild inode:%llu\n", rec->ino);
2756
2757         type_recovered = !find_file_type(rec, &filetype);
2758
2759         /*
2760          * Try to determine inode type if type not found.
2761          *
2762          * For found regular file extent, it must be FILE.
2763          * For found dir_item/index, it must be DIR.
2764          *
2765          * For undetermined one, use FILE as fallback.
2766          *
2767          * TODO:
2768          * 1. If found backref(inode_index/item is already handled) to it,
2769          *    it must be DIR.
2770          *    Need new inode-inode ref structure to allow search for that.
2771          */
2772         if (!type_recovered) {
2773                 if (rec->found_file_extent &&
2774                     find_normal_file_extent(root, rec->ino)) {
2775                         type_recovered = 1;
2776                         filetype = BTRFS_FT_REG_FILE;
2777                 } else if (rec->found_dir_item) {
2778                         type_recovered = 1;
2779                         filetype = BTRFS_FT_DIR;
2780                 } else if (!list_empty(&rec->orphan_extents)) {
2781                         type_recovered = 1;
2782                         filetype = BTRFS_FT_REG_FILE;
2783                 } else{
2784                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2785                                rec->ino);
2786                         type_recovered = 1;
2787                         filetype = BTRFS_FT_REG_FILE;
2788                 }
2789         }
2790
2791         ret = btrfs_new_inode(trans, root, rec->ino,
2792                               mode | btrfs_type_to_imode(filetype));
2793         if (ret < 0)
2794                 goto out;
2795
2796         /*
2797          * Here inode rebuild is done, we only rebuild the inode item,
2798          * don't repair the nlink(like move to lost+found).
2799          * That is the job of nlink repair.
2800          *
2801          * We just fill the record and return
2802          */
2803         rec->found_dir_item = 1;
2804         rec->imode = mode | btrfs_type_to_imode(filetype);
2805         rec->nlink = 0;
2806         rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807         /* Ensure the inode_nlinks repair function will be called */
2808         rec->errors |= I_ERR_LINK_COUNT_WRONG;
2809 out:
2810         return ret;
2811 }
2812
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814                                       struct btrfs_root *root,
2815                                       struct btrfs_path *path,
2816                                       struct inode_record *rec)
2817 {
2818         struct orphan_data_extent *orphan;
2819         struct orphan_data_extent *tmp;
2820         int ret = 0;
2821
2822         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2823                 /*
2824                  * Check for conflicting file extents
2825                  *
2826                  * Here we don't know whether the extents is compressed or not,
2827                  * so we can only assume it not compressed nor data offset,
2828                  * and use its disk_len as extent length.
2829                  */
2830                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831                                        orphan->offset, orphan->disk_len, 0);
2832                 btrfs_release_path(path);
2833                 if (ret < 0)
2834                         goto out;
2835                 if (!ret) {
2836                         fprintf(stderr,
2837                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838                                 orphan->disk_bytenr, orphan->disk_len);
2839                         ret = btrfs_free_extent(trans,
2840                                         root->fs_info->extent_root,
2841                                         orphan->disk_bytenr, orphan->disk_len,
2842                                         0, root->objectid, orphan->objectid,
2843                                         orphan->offset);
2844                         if (ret < 0)
2845                                 goto out;
2846                 }
2847                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848                                 orphan->offset, orphan->disk_bytenr,
2849                                 orphan->disk_len, orphan->disk_len);
2850                 if (ret < 0)
2851                         goto out;
2852
2853                 /* Update file size info */
2854                 rec->found_size += orphan->disk_len;
2855                 if (rec->found_size == rec->nbytes)
2856                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2857
2858                 /* Update the file extent hole info too */
2859                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2860                                            orphan->disk_len);
2861                 if (ret < 0)
2862                         goto out;
2863                 if (RB_EMPTY_ROOT(&rec->holes))
2864                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2865
2866                 list_del(&orphan->list);
2867                 free(orphan);
2868         }
2869         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2870 out:
2871         return ret;
2872 }
2873
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875                                         struct btrfs_root *root,
2876                                         struct btrfs_path *path,
2877                                         struct inode_record *rec)
2878 {
2879         struct rb_node *node;
2880         struct file_extent_hole *hole;
2881         int found = 0;
2882         int ret = 0;
2883
2884         node = rb_first(&rec->holes);
2885
2886         while (node) {
2887                 found = 1;
2888                 hole = rb_entry(node, struct file_extent_hole, node);
2889                 ret = btrfs_punch_hole(trans, root, rec->ino,
2890                                        hole->start, hole->len);
2891                 if (ret < 0)
2892                         goto out;
2893                 ret = del_file_extent_hole(&rec->holes, hole->start,
2894                                            hole->len);
2895                 if (ret < 0)
2896                         goto out;
2897                 if (RB_EMPTY_ROOT(&rec->holes))
2898                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899                 node = rb_first(&rec->holes);
2900         }
2901         /* special case for a file losing all its file extent */
2902         if (!found) {
2903                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904                                        round_up(rec->isize, root->sectorsize));
2905                 if (ret < 0)
2906                         goto out;
2907         }
2908         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909                rec->ino, root->objectid);
2910 out:
2911         return ret;
2912 }
2913
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2915 {
2916         struct btrfs_trans_handle *trans;
2917         struct btrfs_path path;
2918         int ret = 0;
2919
2920         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921                              I_ERR_NO_ORPHAN_ITEM |
2922                              I_ERR_LINK_COUNT_WRONG |
2923                              I_ERR_NO_INODE_ITEM |
2924                              I_ERR_FILE_EXTENT_ORPHAN |
2925                              I_ERR_FILE_EXTENT_DISCOUNT|
2926                              I_ERR_FILE_NBYTES_WRONG)))
2927                 return rec->errors;
2928
2929         /*
2930          * For nlink repair, it may create a dir and add link, so
2931          * 2 for parent(256)'s dir_index and dir_item
2932          * 2 for lost+found dir's inode_item and inode_ref
2933          * 1 for the new inode_ref of the file
2934          * 2 for lost+found dir's dir_index and dir_item for the file
2935          */
2936         trans = btrfs_start_transaction(root, 7);
2937         if (IS_ERR(trans))
2938                 return PTR_ERR(trans);
2939
2940         btrfs_init_path(&path);
2941         if (rec->errors & I_ERR_NO_INODE_ITEM)
2942                 ret = repair_inode_no_item(trans, root, &path, rec);
2943         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2944                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2945         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2946                 ret = repair_inode_discount_extent(trans, root, &path, rec);
2947         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2948                 ret = repair_inode_isize(trans, root, &path, rec);
2949         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2950                 ret = repair_inode_orphan_item(trans, root, &path, rec);
2951         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2952                 ret = repair_inode_nlinks(trans, root, &path, rec);
2953         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2954                 ret = repair_inode_nbytes(trans, root, &path, rec);
2955         btrfs_commit_transaction(trans, root);
2956         btrfs_release_path(&path);
2957         return ret;
2958 }
2959
2960 static int check_inode_recs(struct btrfs_root *root,
2961                             struct cache_tree *inode_cache)
2962 {
2963         struct cache_extent *cache;
2964         struct ptr_node *node;
2965         struct inode_record *rec;
2966         struct inode_backref *backref;
2967         int stage = 0;
2968         int ret = 0;
2969         int err = 0;
2970         u64 error = 0;
2971         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2972
2973         if (btrfs_root_refs(&root->root_item) == 0) {
2974                 if (!cache_tree_empty(inode_cache))
2975                         fprintf(stderr, "warning line %d\n", __LINE__);
2976                 return 0;
2977         }
2978
2979         /*
2980          * We need to record the highest inode number for later 'lost+found'
2981          * dir creation.
2982          * We must select an ino not used/referred by any existing inode, or
2983          * 'lost+found' ino may be a missing ino in a corrupted leaf,
2984          * this may cause 'lost+found' dir has wrong nlinks.
2985          */
2986         cache = last_cache_extent(inode_cache);
2987         if (cache) {
2988                 node = container_of(cache, struct ptr_node, cache);
2989                 rec = node->data;
2990                 if (rec->ino > root->highest_inode)
2991                         root->highest_inode = rec->ino;
2992         }
2993
2994         /*
2995          * We need to repair backrefs first because we could change some of the
2996          * errors in the inode recs.
2997          *
2998          * We also need to go through and delete invalid backrefs first and then
2999          * add the correct ones second.  We do this because we may get EEXIST
3000          * when adding back the correct index because we hadn't yet deleted the
3001          * invalid index.
3002          *
3003          * For example, if we were missing a dir index then the directories
3004          * isize would be wrong, so if we fixed the isize to what we thought it
3005          * would be and then fixed the backref we'd still have a invalid fs, so
3006          * we need to add back the dir index and then check to see if the isize
3007          * is still wrong.
3008          */
3009         while (stage < 3) {
3010                 stage++;
3011                 if (stage == 3 && !err)
3012                         break;
3013
3014                 cache = search_cache_extent(inode_cache, 0);
3015                 while (repair && cache) {
3016                         node = container_of(cache, struct ptr_node, cache);
3017                         rec = node->data;
3018                         cache = next_cache_extent(cache);
3019
3020                         /* Need to free everything up and rescan */
3021                         if (stage == 3) {
3022                                 remove_cache_extent(inode_cache, &node->cache);
3023                                 free(node);
3024                                 free_inode_rec(rec);
3025                                 continue;
3026                         }
3027
3028                         if (list_empty(&rec->backrefs))
3029                                 continue;
3030
3031                         ret = repair_inode_backrefs(root, rec, inode_cache,
3032                                                     stage == 1);
3033                         if (ret < 0) {
3034                                 err = ret;
3035                                 stage = 2;
3036                                 break;
3037                         } if (ret > 0) {
3038                                 err = -EAGAIN;
3039                         }
3040                 }
3041         }
3042         if (err)
3043                 return err;
3044
3045         rec = get_inode_rec(inode_cache, root_dirid, 0);
3046         BUG_ON(IS_ERR(rec));
3047         if (rec) {
3048                 ret = check_root_dir(rec);
3049                 if (ret) {
3050                         fprintf(stderr, "root %llu root dir %llu error\n",
3051                                 (unsigned long long)root->root_key.objectid,
3052                                 (unsigned long long)root_dirid);
3053                         print_inode_error(root, rec);
3054                         error++;
3055                 }
3056         } else {
3057                 if (repair) {
3058                         struct btrfs_trans_handle *trans;
3059
3060                         trans = btrfs_start_transaction(root, 1);
3061                         if (IS_ERR(trans)) {
3062                                 err = PTR_ERR(trans);
3063                                 return err;
3064                         }
3065
3066                         fprintf(stderr,
3067                                 "root %llu missing its root dir, recreating\n",
3068                                 (unsigned long long)root->objectid);
3069
3070                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3071                         BUG_ON(ret);
3072
3073                         btrfs_commit_transaction(trans, root);
3074                         return -EAGAIN;
3075                 }
3076
3077                 fprintf(stderr, "root %llu root dir %llu not found\n",
3078                         (unsigned long long)root->root_key.objectid,
3079                         (unsigned long long)root_dirid);
3080         }
3081
3082         while (1) {
3083                 cache = search_cache_extent(inode_cache, 0);
3084                 if (!cache)
3085                         break;
3086                 node = container_of(cache, struct ptr_node, cache);
3087                 rec = node->data;
3088                 remove_cache_extent(inode_cache, &node->cache);
3089                 free(node);
3090                 if (rec->ino == root_dirid ||
3091                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3092                         free_inode_rec(rec);
3093                         continue;
3094                 }
3095
3096                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3097                         ret = check_orphan_item(root, rec->ino);
3098                         if (ret == 0)
3099                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3100                         if (can_free_inode_rec(rec)) {
3101                                 free_inode_rec(rec);
3102                                 continue;
3103                         }
3104                 }
3105
3106                 if (!rec->found_inode_item)
3107                         rec->errors |= I_ERR_NO_INODE_ITEM;
3108                 if (rec->found_link != rec->nlink)
3109                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3110                 if (repair) {
3111                         ret = try_repair_inode(root, rec);
3112                         if (ret == 0 && can_free_inode_rec(rec)) {
3113                                 free_inode_rec(rec);
3114                                 continue;
3115                         }
3116                         ret = 0;
3117                 }
3118
3119                 if (!(repair && ret == 0))
3120                         error++;
3121                 print_inode_error(root, rec);
3122                 list_for_each_entry(backref, &rec->backrefs, list) {
3123                         if (!backref->found_dir_item)
3124                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3125                         if (!backref->found_dir_index)
3126                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3127                         if (!backref->found_inode_ref)
3128                                 backref->errors |= REF_ERR_NO_INODE_REF;
3129                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3130                                 " namelen %u name %s filetype %d errors %x",
3131                                 (unsigned long long)backref->dir,
3132                                 (unsigned long long)backref->index,
3133                                 backref->namelen, backref->name,
3134                                 backref->filetype, backref->errors);
3135                         print_ref_error(backref->errors);
3136                 }
3137                 free_inode_rec(rec);
3138         }
3139         return (error > 0) ? -1 : 0;
3140 }
3141
3142 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3143                                         u64 objectid)
3144 {
3145         struct cache_extent *cache;
3146         struct root_record *rec = NULL;
3147         int ret;
3148
3149         cache = lookup_cache_extent(root_cache, objectid, 1);
3150         if (cache) {
3151                 rec = container_of(cache, struct root_record, cache);
3152         } else {
3153                 rec = calloc(1, sizeof(*rec));
3154                 if (!rec)
3155                         return ERR_PTR(-ENOMEM);
3156                 rec->objectid = objectid;
3157                 INIT_LIST_HEAD(&rec->backrefs);
3158                 rec->cache.start = objectid;
3159                 rec->cache.size = 1;
3160
3161                 ret = insert_cache_extent(root_cache, &rec->cache);
3162                 if (ret)
3163                         return ERR_PTR(-EEXIST);
3164         }
3165         return rec;
3166 }
3167
3168 static struct root_backref *get_root_backref(struct root_record *rec,
3169                                              u64 ref_root, u64 dir, u64 index,
3170                                              const char *name, int namelen)
3171 {
3172         struct root_backref *backref;
3173
3174         list_for_each_entry(backref, &rec->backrefs, list) {
3175                 if (backref->ref_root != ref_root || backref->dir != dir ||
3176                     backref->namelen != namelen)
3177                         continue;
3178                 if (memcmp(name, backref->name, namelen))
3179                         continue;
3180                 return backref;
3181         }
3182
3183         backref = calloc(1, sizeof(*backref) + namelen + 1);
3184         if (!backref)
3185                 return NULL;
3186         backref->ref_root = ref_root;
3187         backref->dir = dir;
3188         backref->index = index;
3189         backref->namelen = namelen;
3190         memcpy(backref->name, name, namelen);
3191         backref->name[namelen] = '\0';
3192         list_add_tail(&backref->list, &rec->backrefs);
3193         return backref;
3194 }
3195
3196 static void free_root_record(struct cache_extent *cache)
3197 {
3198         struct root_record *rec;
3199         struct root_backref *backref;
3200
3201         rec = container_of(cache, struct root_record, cache);
3202         while (!list_empty(&rec->backrefs)) {
3203                 backref = to_root_backref(rec->backrefs.next);
3204                 list_del(&backref->list);
3205                 free(backref);
3206         }
3207
3208         free(rec);
3209 }
3210
3211 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3212
3213 static int add_root_backref(struct cache_tree *root_cache,
3214                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3215                             const char *name, int namelen,
3216                             int item_type, int errors)
3217 {
3218         struct root_record *rec;
3219         struct root_backref *backref;
3220
3221         rec = get_root_rec(root_cache, root_id);
3222         BUG_ON(IS_ERR(rec));
3223         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3224         BUG_ON(!backref);
3225
3226         backref->errors |= errors;
3227
3228         if (item_type != BTRFS_DIR_ITEM_KEY) {
3229                 if (backref->found_dir_index || backref->found_back_ref ||
3230                     backref->found_forward_ref) {
3231                         if (backref->index != index)
3232                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3233                 } else {
3234                         backref->index = index;
3235                 }
3236         }
3237
3238         if (item_type == BTRFS_DIR_ITEM_KEY) {
3239                 if (backref->found_forward_ref)
3240                         rec->found_ref++;
3241                 backref->found_dir_item = 1;
3242         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3243                 backref->found_dir_index = 1;
3244         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3245                 if (backref->found_forward_ref)
3246                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3247                 else if (backref->found_dir_item)
3248                         rec->found_ref++;
3249                 backref->found_forward_ref = 1;
3250         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3251                 if (backref->found_back_ref)
3252                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3253                 backref->found_back_ref = 1;
3254         } else {
3255                 BUG_ON(1);
3256         }
3257
3258         if (backref->found_forward_ref && backref->found_dir_item)
3259                 backref->reachable = 1;
3260         return 0;
3261 }
3262
3263 static int merge_root_recs(struct btrfs_root *root,
3264                            struct cache_tree *src_cache,
3265                            struct cache_tree *dst_cache)
3266 {
3267         struct cache_extent *cache;
3268         struct ptr_node *node;
3269         struct inode_record *rec;
3270         struct inode_backref *backref;
3271         int ret = 0;
3272
3273         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3274                 free_inode_recs_tree(src_cache);
3275                 return 0;
3276         }
3277
3278         while (1) {
3279                 cache = search_cache_extent(src_cache, 0);
3280                 if (!cache)
3281                         break;
3282                 node = container_of(cache, struct ptr_node, cache);
3283                 rec = node->data;
3284                 remove_cache_extent(src_cache, &node->cache);
3285                 free(node);
3286
3287                 ret = is_child_root(root, root->objectid, rec->ino);
3288                 if (ret < 0)
3289                         break;
3290                 else if (ret == 0)
3291                         goto skip;
3292
3293                 list_for_each_entry(backref, &rec->backrefs, list) {
3294                         BUG_ON(backref->found_inode_ref);
3295                         if (backref->found_dir_item)
3296                                 add_root_backref(dst_cache, rec->ino,
3297                                         root->root_key.objectid, backref->dir,
3298                                         backref->index, backref->name,
3299                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3300                                         backref->errors);
3301                         if (backref->found_dir_index)
3302                                 add_root_backref(dst_cache, rec->ino,
3303                                         root->root_key.objectid, backref->dir,
3304                                         backref->index, backref->name,
3305                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3306                                         backref->errors);
3307                 }
3308 skip:
3309                 free_inode_rec(rec);
3310         }
3311         if (ret < 0)
3312                 return ret;
3313         return 0;
3314 }
3315
3316 static int check_root_refs(struct btrfs_root *root,
3317                            struct cache_tree *root_cache)
3318 {
3319         struct root_record *rec;
3320         struct root_record *ref_root;
3321         struct root_backref *backref;
3322         struct cache_extent *cache;
3323         int loop = 1;
3324         int ret;
3325         int error;
3326         int errors = 0;
3327
3328         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3329         BUG_ON(IS_ERR(rec));
3330         rec->found_ref = 1;
3331
3332         /* fixme: this can not detect circular references */
3333         while (loop) {
3334                 loop = 0;
3335                 cache = search_cache_extent(root_cache, 0);
3336                 while (1) {
3337                         if (!cache)
3338                                 break;
3339                         rec = container_of(cache, struct root_record, cache);
3340                         cache = next_cache_extent(cache);
3341
3342                         if (rec->found_ref == 0)
3343                                 continue;
3344
3345                         list_for_each_entry(backref, &rec->backrefs, list) {
3346                                 if (!backref->reachable)
3347                                         continue;
3348
3349                                 ref_root = get_root_rec(root_cache,
3350                                                         backref->ref_root);
3351                                 BUG_ON(IS_ERR(ref_root));
3352                                 if (ref_root->found_ref > 0)
3353                                         continue;
3354
3355                                 backref->reachable = 0;
3356                                 rec->found_ref--;
3357                                 if (rec->found_ref == 0)
3358                                         loop = 1;
3359                         }
3360                 }
3361         }
3362
3363         cache = search_cache_extent(root_cache, 0);
3364         while (1) {
3365                 if (!cache)
3366                         break;
3367                 rec = container_of(cache, struct root_record, cache);
3368                 cache = next_cache_extent(cache);
3369
3370                 if (rec->found_ref == 0 &&
3371                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3372                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3373                         ret = check_orphan_item(root->fs_info->tree_root,
3374                                                 rec->objectid);
3375                         if (ret == 0)
3376                                 continue;
3377
3378                         /*
3379                          * If we don't have a root item then we likely just have
3380                          * a dir item in a snapshot for this root but no actual
3381                          * ref key or anything so it's meaningless.
3382                          */
3383                         if (!rec->found_root_item)
3384                                 continue;
3385                         errors++;
3386                         fprintf(stderr, "fs tree %llu not referenced\n",
3387                                 (unsigned long long)rec->objectid);
3388                 }
3389
3390                 error = 0;
3391                 if (rec->found_ref > 0 && !rec->found_root_item)
3392                         error = 1;
3393                 list_for_each_entry(backref, &rec->backrefs, list) {
3394                         if (!backref->found_dir_item)
3395                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3396                         if (!backref->found_dir_index)
3397                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3398                         if (!backref->found_back_ref)
3399                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3400                         if (!backref->found_forward_ref)
3401                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3402                         if (backref->reachable && backref->errors)
3403                                 error = 1;
3404                 }
3405                 if (!error)
3406                         continue;
3407
3408                 errors++;
3409                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3410                         (unsigned long long)rec->objectid, rec->found_ref,
3411                          rec->found_root_item ? "" : "not found");
3412
3413                 list_for_each_entry(backref, &rec->backrefs, list) {
3414                         if (!backref->reachable)
3415                                 continue;
3416                         if (!backref->errors && rec->found_root_item)
3417                                 continue;
3418                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3419                                 " index %llu namelen %u name %s errors %x\n",
3420                                 (unsigned long long)backref->ref_root,
3421                                 (unsigned long long)backref->dir,
3422                                 (unsigned long long)backref->index,
3423                                 backref->namelen, backref->name,
3424                                 backref->errors);
3425                         print_ref_error(backref->errors);
3426                 }
3427         }
3428         return errors > 0 ? 1 : 0;
3429 }
3430
3431 static int process_root_ref(struct extent_buffer *eb, int slot,
3432                             struct btrfs_key *key,
3433                             struct cache_tree *root_cache)
3434 {
3435         u64 dirid;
3436         u64 index;
3437         u32 len;
3438         u32 name_len;
3439         struct btrfs_root_ref *ref;
3440         char namebuf[BTRFS_NAME_LEN];
3441         int error;
3442
3443         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3444
3445         dirid = btrfs_root_ref_dirid(eb, ref);
3446         index = btrfs_root_ref_sequence(eb, ref);
3447         name_len = btrfs_root_ref_name_len(eb, ref);
3448
3449         if (name_len <= BTRFS_NAME_LEN) {
3450                 len = name_len;
3451                 error = 0;
3452         } else {
3453                 len = BTRFS_NAME_LEN;
3454                 error = REF_ERR_NAME_TOO_LONG;
3455         }
3456         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3457
3458         if (key->type == BTRFS_ROOT_REF_KEY) {
3459                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3460                                  index, namebuf, len, key->type, error);
3461         } else {
3462                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3463                                  index, namebuf, len, key->type, error);
3464         }
3465         return 0;
3466 }
3467
3468 static void free_corrupt_block(struct cache_extent *cache)
3469 {
3470         struct btrfs_corrupt_block *corrupt;
3471
3472         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3473         free(corrupt);
3474 }
3475
3476 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3477
3478 /*
3479  * Repair the btree of the given root.
3480  *
3481  * The fix is to remove the node key in corrupt_blocks cache_tree.
3482  * and rebalance the tree.
3483  * After the fix, the btree should be writeable.
3484  */
3485 static int repair_btree(struct btrfs_root *root,
3486                         struct cache_tree *corrupt_blocks)
3487 {
3488         struct btrfs_trans_handle *trans;
3489         struct btrfs_path path;
3490         struct btrfs_corrupt_block *corrupt;
3491         struct cache_extent *cache;
3492         struct btrfs_key key;
3493         u64 offset;
3494         int level;
3495         int ret = 0;
3496
3497         if (cache_tree_empty(corrupt_blocks))
3498                 return 0;
3499
3500         trans = btrfs_start_transaction(root, 1);
3501         if (IS_ERR(trans)) {
3502                 ret = PTR_ERR(trans);
3503                 fprintf(stderr, "Error starting transaction: %s\n",
3504                         strerror(-ret));
3505                 return ret;
3506         }
3507         btrfs_init_path(&path);
3508         cache = first_cache_extent(corrupt_blocks);
3509         while (cache) {
3510                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3511                                        cache);
3512                 level = corrupt->level;
3513                 path.lowest_level = level;
3514                 key.objectid = corrupt->key.objectid;
3515                 key.type = corrupt->key.type;
3516                 key.offset = corrupt->key.offset;
3517
3518                 /*
3519                  * Here we don't want to do any tree balance, since it may
3520                  * cause a balance with corrupted brother leaf/node,
3521                  * so ins_len set to 0 here.
3522                  * Balance will be done after all corrupt node/leaf is deleted.
3523                  */
3524                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3525                 if (ret < 0)
3526                         goto out;
3527                 offset = btrfs_node_blockptr(path.nodes[level],
3528                                              path.slots[level]);
3529
3530                 /* Remove the ptr */
3531                 ret = btrfs_del_ptr(trans, root, &path, level,
3532                                     path.slots[level]);
3533                 if (ret < 0)
3534                         goto out;
3535                 /*
3536                  * Remove the corresponding extent
3537                  * return value is not concerned.
3538                  */
3539                 btrfs_release_path(&path);
3540                 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3541                                         0, root->root_key.objectid,
3542                                         level - 1, 0);
3543                 cache = next_cache_extent(cache);
3544         }
3545
3546         /* Balance the btree using btrfs_search_slot() */
3547         cache = first_cache_extent(corrupt_blocks);
3548         while (cache) {
3549                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3550                                        cache);
3551                 memcpy(&key, &corrupt->key, sizeof(key));
3552                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3553                 if (ret < 0)
3554                         goto out;
3555                 /* return will always >0 since it won't find the item */
3556                 ret = 0;
3557                 btrfs_release_path(&path);
3558                 cache = next_cache_extent(cache);
3559         }
3560 out:
3561         btrfs_commit_transaction(trans, root);
3562         btrfs_release_path(&path);
3563         return ret;
3564 }
3565
3566 static int check_fs_root(struct btrfs_root *root,
3567                          struct cache_tree *root_cache,
3568                          struct walk_control *wc)
3569 {
3570         int ret = 0;
3571         int err = 0;
3572         int wret;
3573         int level;
3574         struct btrfs_path path;
3575         struct shared_node root_node;
3576         struct root_record *rec;
3577         struct btrfs_root_item *root_item = &root->root_item;
3578         struct cache_tree corrupt_blocks;
3579         struct orphan_data_extent *orphan;
3580         struct orphan_data_extent *tmp;
3581         enum btrfs_tree_block_status status;
3582         struct node_refs nrefs;
3583
3584         /*
3585          * Reuse the corrupt_block cache tree to record corrupted tree block
3586          *
3587          * Unlike the usage in extent tree check, here we do it in a per
3588          * fs/subvol tree base.
3589          */
3590         cache_tree_init(&corrupt_blocks);
3591         root->fs_info->corrupt_blocks = &corrupt_blocks;
3592
3593         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3594                 rec = get_root_rec(root_cache, root->root_key.objectid);
3595                 BUG_ON(IS_ERR(rec));
3596                 if (btrfs_root_refs(root_item) > 0)
3597                         rec->found_root_item = 1;
3598         }
3599
3600         btrfs_init_path(&path);
3601         memset(&root_node, 0, sizeof(root_node));
3602         cache_tree_init(&root_node.root_cache);
3603         cache_tree_init(&root_node.inode_cache);
3604         memset(&nrefs, 0, sizeof(nrefs));
3605
3606         /* Move the orphan extent record to corresponding inode_record */
3607         list_for_each_entry_safe(orphan, tmp,
3608                                  &root->orphan_data_extents, list) {
3609                 struct inode_record *inode;
3610
3611                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3612                                       1);
3613                 BUG_ON(IS_ERR(inode));
3614                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3615                 list_move(&orphan->list, &inode->orphan_extents);
3616         }
3617
3618         level = btrfs_header_level(root->node);
3619         memset(wc->nodes, 0, sizeof(wc->nodes));
3620         wc->nodes[level] = &root_node;
3621         wc->active_node = level;
3622         wc->root_level = level;
3623
3624         /* We may not have checked the root block, lets do that now */
3625         if (btrfs_is_leaf(root->node))
3626                 status = btrfs_check_leaf(root, NULL, root->node);
3627         else
3628                 status = btrfs_check_node(root, NULL, root->node);
3629         if (status != BTRFS_TREE_BLOCK_CLEAN)
3630                 return -EIO;
3631
3632         if (btrfs_root_refs(root_item) > 0 ||
3633             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3634                 path.nodes[level] = root->node;
3635                 extent_buffer_get(root->node);
3636                 path.slots[level] = 0;
3637         } else {
3638                 struct btrfs_key key;
3639                 struct btrfs_disk_key found_key;
3640
3641                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3642                 level = root_item->drop_level;
3643                 path.lowest_level = level;
3644                 if (level > btrfs_header_level(root->node) ||
3645                     level >= BTRFS_MAX_LEVEL) {
3646                         error("ignoring invalid drop level: %u", level);
3647                         goto skip_walking;
3648                 }
3649                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3650                 if (wret < 0)
3651                         goto skip_walking;
3652                 btrfs_node_key(path.nodes[level], &found_key,
3653                                 path.slots[level]);
3654                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3655                                         sizeof(found_key)));
3656         }
3657
3658         while (1) {
3659                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3660                 if (wret < 0)
3661                         ret = wret;
3662                 if (wret != 0)
3663                         break;
3664
3665                 wret = walk_up_tree(root, &path, wc, &level);
3666                 if (wret < 0)
3667                         ret = wret;
3668                 if (wret != 0)
3669                         break;
3670         }
3671 skip_walking:
3672         btrfs_release_path(&path);
3673
3674         if (!cache_tree_empty(&corrupt_blocks)) {
3675                 struct cache_extent *cache;
3676                 struct btrfs_corrupt_block *corrupt;
3677
3678                 printf("The following tree block(s) is corrupted in tree %llu:\n",
3679                        root->root_key.objectid);
3680                 cache = first_cache_extent(&corrupt_blocks);
3681                 while (cache) {
3682                         corrupt = container_of(cache,
3683                                                struct btrfs_corrupt_block,
3684                                                cache);
3685                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3686                                cache->start, corrupt->level,
3687                                corrupt->key.objectid, corrupt->key.type,
3688                                corrupt->key.offset);
3689                         cache = next_cache_extent(cache);
3690                 }
3691                 if (repair) {
3692                         printf("Try to repair the btree for root %llu\n",
3693                                root->root_key.objectid);
3694                         ret = repair_btree(root, &corrupt_blocks);
3695                         if (ret < 0)
3696                                 fprintf(stderr, "Failed to repair btree: %s\n",
3697                                         strerror(-ret));
3698                         if (!ret)
3699                                 printf("Btree for root %llu is fixed\n",
3700                                        root->root_key.objectid);
3701                 }
3702         }
3703
3704         err = merge_root_recs(root, &root_node.root_cache, root_cache);
3705         if (err < 0)
3706                 ret = err;
3707
3708         if (root_node.current) {
3709                 root_node.current->checked = 1;
3710                 maybe_free_inode_rec(&root_node.inode_cache,
3711                                 root_node.current);
3712         }
3713
3714         err = check_inode_recs(root, &root_node.inode_cache);
3715         if (!ret)
3716                 ret = err;
3717
3718         free_corrupt_blocks_tree(&corrupt_blocks);
3719         root->fs_info->corrupt_blocks = NULL;
3720         free_orphan_data_extents(&root->orphan_data_extents);
3721         return ret;
3722 }
3723
3724 static int fs_root_objectid(u64 objectid)
3725 {
3726         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3727             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3728                 return 1;
3729         return is_fstree(objectid);
3730 }
3731
3732 static int check_fs_roots(struct btrfs_root *root,
3733                           struct cache_tree *root_cache)
3734 {
3735         struct btrfs_path path;
3736         struct btrfs_key key;
3737         struct walk_control wc;
3738         struct extent_buffer *leaf, *tree_node;
3739         struct btrfs_root *tmp_root;
3740         struct btrfs_root *tree_root = root->fs_info->tree_root;
3741         int ret;
3742         int err = 0;
3743
3744         if (ctx.progress_enabled) {
3745                 ctx.tp = TASK_FS_ROOTS;
3746                 task_start(ctx.info);
3747         }
3748
3749         /*
3750          * Just in case we made any changes to the extent tree that weren't
3751          * reflected into the free space cache yet.
3752          */
3753         if (repair)
3754                 reset_cached_block_groups(root->fs_info);
3755         memset(&wc, 0, sizeof(wc));
3756         cache_tree_init(&wc.shared);
3757         btrfs_init_path(&path);
3758
3759 again:
3760         key.offset = 0;
3761         key.objectid = 0;
3762         key.type = BTRFS_ROOT_ITEM_KEY;
3763         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3764         if (ret < 0) {
3765                 err = 1;
3766                 goto out;
3767         }
3768         tree_node = tree_root->node;
3769         while (1) {
3770                 if (tree_node != tree_root->node) {
3771                         free_root_recs_tree(root_cache);
3772                         btrfs_release_path(&path);
3773                         goto again;
3774                 }
3775                 leaf = path.nodes[0];
3776                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3777                         ret = btrfs_next_leaf(tree_root, &path);
3778                         if (ret) {
3779                                 if (ret < 0)
3780                                         err = 1;
3781                                 break;
3782                         }
3783                         leaf = path.nodes[0];
3784                 }
3785                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3786                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3787                     fs_root_objectid(key.objectid)) {
3788                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3789                                 tmp_root = btrfs_read_fs_root_no_cache(
3790                                                 root->fs_info, &key);
3791                         } else {
3792                                 key.offset = (u64)-1;
3793                                 tmp_root = btrfs_read_fs_root(
3794                                                 root->fs_info, &key);
3795                         }
3796                         if (IS_ERR(tmp_root)) {
3797                                 err = 1;
3798                                 goto next;
3799                         }
3800                         ret = check_fs_root(tmp_root, root_cache, &wc);
3801                         if (ret == -EAGAIN) {
3802                                 free_root_recs_tree(root_cache);
3803                                 btrfs_release_path(&path);
3804                                 goto again;
3805                         }
3806                         if (ret)
3807                                 err = 1;
3808                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3809                                 btrfs_free_fs_root(tmp_root);
3810                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3811                            key.type == BTRFS_ROOT_BACKREF_KEY) {
3812                         process_root_ref(leaf, path.slots[0], &key,
3813                                          root_cache);
3814                 }
3815 next:
3816                 path.slots[0]++;
3817         }
3818 out:
3819         btrfs_release_path(&path);
3820         if (err)
3821                 free_extent_cache_tree(&wc.shared);
3822         if (!cache_tree_empty(&wc.shared))
3823                 fprintf(stderr, "warning line %d\n", __LINE__);
3824
3825         task_stop(ctx.info);
3826
3827         return err;
3828 }
3829
3830 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
3831 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
3832 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
3833 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
3834 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
3835 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
3836 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
3837 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
3838 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
3839 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
3840 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
3841 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
3842 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
3843 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
3844 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
3845 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
3846 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
3847
3848 /*
3849  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
3850  * INODE_REF/INODE_EXTREF match.
3851  *
3852  * @root:       the root of the fs/file tree
3853  * @ref_key:    the key of the INODE_REF/INODE_EXTREF
3854  * @key:        the key of the DIR_ITEM/DIR_INDEX
3855  * @index:      the index in the INODE_REF/INODE_EXTREF, be used to
3856  *              distinguish root_dir between normal dir/file
3857  * @name:       the name in the INODE_REF/INODE_EXTREF
3858  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
3859  * @mode:       the st_mode of INODE_ITEM
3860  *
3861  * Return 0 if no error occurred.
3862  * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
3863  * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
3864  * dir/file.
3865  * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
3866  * not match for normal dir/file.
3867  */
3868 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
3869                          struct btrfs_key *key, u64 index, char *name,
3870                          u32 namelen, u32 mode)
3871 {
3872         struct btrfs_path path;
3873         struct extent_buffer *node;
3874         struct btrfs_dir_item *di;
3875         struct btrfs_key location;
3876         char namebuf[BTRFS_NAME_LEN] = {0};
3877         u32 total;
3878         u32 cur = 0;
3879         u32 len;
3880         u32 name_len;
3881         u32 data_len;
3882         u8 filetype;
3883         int slot;
3884         int ret;
3885
3886         btrfs_init_path(&path);
3887         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
3888         if (ret < 0) {
3889                 ret = DIR_ITEM_MISSING;
3890                 goto out;
3891         }
3892
3893         /* Process root dir and goto out*/
3894         if (index == 0) {
3895                 if (ret == 0) {
3896                         ret = ROOT_DIR_ERROR;
3897                         error(
3898                         "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
3899                                 root->objectid,
3900                                 ref_key->type == BTRFS_INODE_REF_KEY ?
3901                                         "REF" : "EXTREF",
3902                                 ref_key->objectid, ref_key->offset,
3903                                 key->type == BTRFS_DIR_ITEM_KEY ?
3904                                         "DIR_ITEM" : "DIR_INDEX");
3905                 } else {
3906                         ret = 0;
3907                 }
3908
3909                 goto out;
3910         }
3911
3912         /* Process normal file/dir */
3913         if (ret > 0) {
3914                 ret = DIR_ITEM_MISSING;
3915                 error(
3916                 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
3917                         root->objectid,
3918                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3919                         ref_key->objectid, ref_key->offset,
3920                         key->type == BTRFS_DIR_ITEM_KEY ?
3921                                 "DIR_ITEM" : "DIR_INDEX",
3922                         key->objectid, key->offset, namelen, name,
3923                         imode_to_type(mode));
3924                 goto out;
3925         }
3926
3927         /* Check whether inode_id/filetype/name match */
3928         node = path.nodes[0];
3929         slot = path.slots[0];
3930         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
3931         total = btrfs_item_size_nr(node, slot);
3932         while (cur < total) {
3933                 ret = DIR_ITEM_MISMATCH;
3934                 name_len = btrfs_dir_name_len(node, di);
3935                 data_len = btrfs_dir_data_len(node, di);
3936
3937                 btrfs_dir_item_key_to_cpu(node, di, &location);
3938                 if (location.objectid != ref_key->objectid ||
3939                     location.type !=  BTRFS_INODE_ITEM_KEY ||
3940                     location.offset != 0)
3941                         goto next;
3942
3943                 filetype = btrfs_dir_type(node, di);
3944                 if (imode_to_type(mode) != filetype)
3945                         goto next;
3946
3947                 if (name_len <= BTRFS_NAME_LEN) {
3948                         len = name_len;
3949                 } else {
3950                         len = BTRFS_NAME_LEN;
3951                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
3952                         root->objectid,
3953                         key->type == BTRFS_DIR_ITEM_KEY ?
3954                         "DIR_ITEM" : "DIR_INDEX",
3955                         key->objectid, key->offset, name_len);
3956                 }
3957                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
3958                 if (len != namelen || strncmp(namebuf, name, len))
3959                         goto next;
3960
3961                 ret = 0;
3962                 goto out;
3963 next:
3964                 len = sizeof(*di) + name_len + data_len;
3965                 di = (struct btrfs_dir_item *)((char *)di + len);
3966                 cur += len;
3967         }
3968         if (ret == DIR_ITEM_MISMATCH)
3969                 error(
3970                 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
3971                         root->objectid,
3972                         ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3973                         ref_key->objectid, ref_key->offset,
3974                         key->type == BTRFS_DIR_ITEM_KEY ?
3975                                 "DIR_ITEM" : "DIR_INDEX",
3976                         key->objectid, key->offset, namelen, name,
3977                         imode_to_type(mode));
3978 out:
3979         btrfs_release_path(&path);
3980         return ret;
3981 }
3982
3983 /*
3984  * Traverse the given INODE_REF and call find_dir_item() to find related
3985  * DIR_ITEM/DIR_INDEX.
3986  *
3987  * @root:       the root of the fs/file tree
3988  * @ref_key:    the key of the INODE_REF
3989  * @refs:       the count of INODE_REF
3990  * @mode:       the st_mode of INODE_ITEM
3991  *
3992  * Return 0 if no error occurred.
3993  */
3994 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
3995                            struct extent_buffer *node, int slot, u64 *refs,
3996                            int mode)
3997 {
3998         struct btrfs_key key;
3999         struct btrfs_inode_ref *ref;
4000         char namebuf[BTRFS_NAME_LEN] = {0};
4001         u32 total;
4002         u32 cur = 0;
4003         u32 len;
4004         u32 name_len;
4005         u64 index;
4006         int ret, err = 0;
4007
4008         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4009         total = btrfs_item_size_nr(node, slot);
4010
4011 next:
4012         /* Update inode ref count */
4013         (*refs)++;
4014
4015         index = btrfs_inode_ref_index(node, ref);
4016         name_len = btrfs_inode_ref_name_len(node, ref);
4017         if (name_len <= BTRFS_NAME_LEN) {
4018                 len = name_len;
4019         } else {
4020                 len = BTRFS_NAME_LEN;
4021                 warning("root %llu INODE_REF[%llu %llu] name too long",
4022                         root->objectid, ref_key->objectid, ref_key->offset);
4023         }
4024
4025         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4026
4027         /* Check root dir ref name */
4028         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4029                 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4030                       root->objectid, ref_key->objectid, ref_key->offset,
4031                       namebuf);
4032                 err |= ROOT_DIR_ERROR;
4033         }
4034
4035         /* Find related DIR_INDEX */
4036         key.objectid = ref_key->offset;
4037         key.type = BTRFS_DIR_INDEX_KEY;
4038         key.offset = index;
4039         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4040         err |= ret;
4041
4042         /* Find related dir_item */
4043         key.objectid = ref_key->offset;
4044         key.type = BTRFS_DIR_ITEM_KEY;
4045         key.offset = btrfs_name_hash(namebuf, len);
4046         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4047         err |= ret;
4048
4049         len = sizeof(*ref) + name_len;
4050         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4051         cur += len;
4052         if (cur < total)
4053                 goto next;
4054
4055         return err;
4056 }
4057
4058 /*
4059  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4060  * DIR_ITEM/DIR_INDEX.
4061  *
4062  * @root:       the root of the fs/file tree
4063  * @ref_key:    the key of the INODE_EXTREF
4064  * @refs:       the count of INODE_EXTREF
4065  * @mode:       the st_mode of INODE_ITEM
4066  *
4067  * Return 0 if no error occurred.
4068  */
4069 static int check_inode_extref(struct btrfs_root *root,
4070                               struct btrfs_key *ref_key,
4071                               struct extent_buffer *node, int slot, u64 *refs,
4072                               int mode)
4073 {
4074         struct btrfs_key key;
4075         struct btrfs_inode_extref *extref;
4076         char namebuf[BTRFS_NAME_LEN] = {0};
4077         u32 total;
4078         u32 cur = 0;
4079         u32 len;
4080         u32 name_len;
4081         u64 index;
4082         u64 parent;
4083         int ret;
4084         int err = 0;
4085
4086         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4087         total = btrfs_item_size_nr(node, slot);
4088
4089 next:
4090         /* update inode ref count */
4091         (*refs)++;
4092         name_len = btrfs_inode_extref_name_len(node, extref);
4093         index = btrfs_inode_extref_index(node, extref);
4094         parent = btrfs_inode_extref_parent(node, extref);
4095         if (name_len <= BTRFS_NAME_LEN) {
4096                 len = name_len;
4097         } else {
4098                 len = BTRFS_NAME_LEN;
4099                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4100                         root->objectid, ref_key->objectid, ref_key->offset);
4101         }
4102         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4103
4104         /* Check root dir ref name */
4105         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4106                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4107                       root->objectid, ref_key->objectid, ref_key->offset,
4108                       namebuf);
4109                 err |= ROOT_DIR_ERROR;
4110         }
4111
4112         /* find related dir_index */
4113         key.objectid = parent;
4114         key.type = BTRFS_DIR_INDEX_KEY;
4115         key.offset = index;
4116         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4117         err |= ret;
4118
4119         /* find related dir_item */
4120         key.objectid = parent;
4121         key.type = BTRFS_DIR_ITEM_KEY;
4122         key.offset = btrfs_name_hash(namebuf, len);
4123         ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4124         err |= ret;
4125
4126         len = sizeof(*extref) + name_len;
4127         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4128         cur += len;
4129
4130         if (cur < total)
4131                 goto next;
4132
4133         return err;
4134 }
4135
4136 /*
4137  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4138  * DIR_ITEM/DIR_INDEX match.
4139  *
4140  * @root:       the root of the fs/file tree
4141  * @key:        the key of the INODE_REF/INODE_EXTREF
4142  * @name:       the name in the INODE_REF/INODE_EXTREF
4143  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4144  * @index:      the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4145  * to (u64)-1
4146  * @ext_ref:    the EXTENDED_IREF feature
4147  *
4148  * Return 0 if no error occurred.
4149  * Return >0 for error bitmap
4150  */
4151 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4152                           char *name, int namelen, u64 index,
4153                           unsigned int ext_ref)
4154 {
4155         struct btrfs_path path;
4156         struct btrfs_inode_ref *ref;
4157         struct btrfs_inode_extref *extref;
4158         struct extent_buffer *node;
4159         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4160         u32 total;
4161         u32 cur = 0;
4162         u32 len;
4163         u32 ref_namelen;
4164         u64 ref_index;
4165         u64 parent;
4166         u64 dir_id;
4167         int slot;
4168         int ret;
4169
4170         btrfs_init_path(&path);
4171         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4172         if (ret) {
4173                 ret = INODE_REF_MISSING;
4174                 goto extref;
4175         }
4176
4177         node = path.nodes[0];
4178         slot = path.slots[0];
4179
4180         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4181         total = btrfs_item_size_nr(node, slot);
4182
4183         /* Iterate all entry of INODE_REF */
4184         while (cur < total) {
4185                 ret = INODE_REF_MISSING;
4186
4187                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4188                 ref_index = btrfs_inode_ref_index(node, ref);
4189                 if (index != (u64)-1 && index != ref_index)
4190                         goto next_ref;
4191
4192                 if (ref_namelen <= BTRFS_NAME_LEN) {
4193                         len = ref_namelen;
4194                 } else {
4195                         len = BTRFS_NAME_LEN;
4196                         warning("root %llu INODE %s[%llu %llu] name too long",
4197                                 root->objectid,
4198                                 key->type == BTRFS_INODE_REF_KEY ?
4199                                         "REF" : "EXTREF",
4200                                 key->objectid, key->offset);
4201                 }
4202                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4203                                    len);
4204
4205                 if (len != namelen || strncmp(ref_namebuf, name, len))
4206                         goto next_ref;
4207
4208                 ret = 0;
4209                 goto out;
4210 next_ref:
4211                 len = sizeof(*ref) + ref_namelen;
4212                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4213                 cur += len;
4214         }
4215
4216 extref:
4217         /* Skip if not support EXTENDED_IREF feature */
4218         if (!ext_ref)
4219                 goto out;
4220
4221         btrfs_release_path(&path);
4222         btrfs_init_path(&path);
4223
4224         dir_id = key->offset;
4225         key->type = BTRFS_INODE_EXTREF_KEY;
4226         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4227
4228         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4229         if (ret) {
4230                 ret = INODE_REF_MISSING;
4231                 goto out;
4232         }
4233
4234         node = path.nodes[0];
4235         slot = path.slots[0];
4236
4237         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4238         cur = 0;
4239         total = btrfs_item_size_nr(node, slot);
4240
4241         /* Iterate all entry of INODE_EXTREF */
4242         while (cur < total) {
4243                 ret = INODE_REF_MISSING;
4244
4245                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4246                 ref_index = btrfs_inode_extref_index(node, extref);
4247                 parent = btrfs_inode_extref_parent(node, extref);
4248                 if (index != (u64)-1 && index != ref_index)
4249                         goto next_extref;
4250
4251                 if (parent != dir_id)
4252                         goto next_extref;
4253
4254                 if (ref_namelen <= BTRFS_NAME_LEN) {
4255                         len = ref_namelen;
4256                 } else {
4257                         len = BTRFS_NAME_LEN;
4258                         warning("Warning: root %llu INODE %s[%llu %llu] name too long\n",
4259                                 root->objectid,
4260                                 key->type == BTRFS_INODE_REF_KEY ?
4261                                         "REF" : "EXTREF",
4262                                 key->objectid, key->offset);
4263                 }
4264                 read_extent_buffer(node, ref_namebuf,
4265                                    (unsigned long)(extref + 1), len);
4266
4267                 if (len != namelen || strncmp(ref_namebuf, name, len))
4268                         goto next_extref;
4269
4270                 ret = 0;
4271                 goto out;
4272
4273 next_extref:
4274                 len = sizeof(*extref) + ref_namelen;
4275                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4276                 cur += len;
4277
4278         }
4279 out:
4280         btrfs_release_path(&path);
4281         return ret;
4282 }
4283
4284 /*
4285  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4286  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4287  *
4288  * @root:       the root of the fs/file tree
4289  * @key:        the key of the INODE_REF/INODE_EXTREF
4290  * @size:       the st_size of the INODE_ITEM
4291  * @ext_ref:    the EXTENDED_IREF feature
4292  *
4293  * Return 0 if no error occurred.
4294  */
4295 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4296                           struct extent_buffer *node, int slot, u64 *size,
4297                           unsigned int ext_ref)
4298 {
4299         struct btrfs_dir_item *di;
4300         struct btrfs_inode_item *ii;
4301         struct btrfs_path path;
4302         struct btrfs_key location;
4303         char namebuf[BTRFS_NAME_LEN] = {0};
4304         u32 total;
4305         u32 cur = 0;
4306         u32 len;
4307         u32 name_len;
4308         u32 data_len;
4309         u8 filetype;
4310         u32 mode;
4311         u64 index;
4312         int ret;
4313         int err = 0;
4314
4315         /*
4316          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4317          * ignore index check.
4318          */
4319         index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4320
4321         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4322         total = btrfs_item_size_nr(node, slot);
4323
4324         while (cur < total) {
4325                 data_len = btrfs_dir_data_len(node, di);
4326                 if (data_len)
4327                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4328                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4329                               "DIR_ITEM" : "DIR_INDEX",
4330                               key->objectid, key->offset, data_len);
4331
4332                 name_len = btrfs_dir_name_len(node, di);
4333                 if (name_len <= BTRFS_NAME_LEN) {
4334                         len = name_len;
4335                 } else {
4336                         len = BTRFS_NAME_LEN;
4337                         warning("root %llu %s[%llu %llu] name too long",
4338                                 root->objectid,
4339                                 key->type == BTRFS_DIR_ITEM_KEY ?
4340                                 "DIR_ITEM" : "DIR_INDEX",
4341                                 key->objectid, key->offset);
4342                 }
4343                 (*size) += name_len;
4344
4345                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4346                 filetype = btrfs_dir_type(node, di);
4347
4348                 btrfs_init_path(&path);
4349                 btrfs_dir_item_key_to_cpu(node, di, &location);
4350
4351                 /* Ignore related ROOT_ITEM check */
4352                 if (location.type == BTRFS_ROOT_ITEM_KEY)
4353                         goto next;
4354
4355                 /* Check relative INODE_ITEM(existence/filetype) */
4356                 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4357                 if (ret) {
4358                         err |= INODE_ITEM_MISSING;
4359                         error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4360                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4361                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4362                               key->offset, location.objectid, name_len,
4363                               namebuf, filetype);
4364                         goto next;
4365                 }
4366
4367                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4368                                     struct btrfs_inode_item);
4369                 mode = btrfs_inode_mode(path.nodes[0], ii);
4370
4371                 if (imode_to_type(mode) != filetype) {
4372                         err |= INODE_ITEM_MISMATCH;
4373                         error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4374                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4375                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4376                               key->offset, name_len, namebuf, filetype);
4377                 }
4378
4379                 /* Check relative INODE_REF/INODE_EXTREF */
4380                 location.type = BTRFS_INODE_REF_KEY;
4381                 location.offset = key->objectid;
4382                 ret = find_inode_ref(root, &location, namebuf, len,
4383                                        index, ext_ref);
4384                 err |= ret;
4385                 if (ret & INODE_REF_MISSING)
4386                         error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4387                               root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4388                               "DIR_ITEM" : "DIR_INDEX", key->objectid,
4389                               key->offset, name_len, namebuf, filetype);
4390
4391 next:
4392                 btrfs_release_path(&path);
4393                 len = sizeof(*di) + name_len + data_len;
4394                 di = (struct btrfs_dir_item *)((char *)di + len);
4395                 cur += len;
4396
4397                 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4398                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4399                               root->objectid, key->objectid, key->offset);
4400                         break;
4401                 }
4402         }
4403
4404         return err;
4405 }
4406
4407 /*
4408  * Check file extent datasum/hole, update the size of the file extents,
4409  * check and update the last offset of the file extent.
4410  *
4411  * @root:       the root of fs/file tree.
4412  * @fkey:       the key of the file extent.
4413  * @nodatasum:  INODE_NODATASUM feature.
4414  * @size:       the sum of all EXTENT_DATA items size for this inode.
4415  * @end:        the offset of the last extent.
4416  *
4417  * Return 0 if no error occurred.
4418  */
4419 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4420                              struct extent_buffer *node, int slot,
4421                              unsigned int nodatasum, u64 *size, u64 *end)
4422 {
4423         struct btrfs_file_extent_item *fi;
4424         u64 disk_bytenr;
4425         u64 disk_num_bytes;
4426         u64 extent_num_bytes;
4427         u64 found;
4428         unsigned int extent_type;
4429         unsigned int is_hole;
4430         int ret;
4431         int err = 0;
4432
4433         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4434
4435         extent_type = btrfs_file_extent_type(node, fi);
4436         /* Skip if file extent is inline */
4437         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4438                 struct btrfs_item *e = btrfs_item_nr(slot);
4439                 u32 item_inline_len;
4440
4441                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4442                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4443                 if (extent_num_bytes == 0 ||
4444                     extent_num_bytes != item_inline_len)
4445                         err |= FILE_EXTENT_ERROR;
4446                 *size += extent_num_bytes;
4447                 return err;
4448         }
4449
4450         /* Check extent type */
4451         if (extent_type != BTRFS_FILE_EXTENT_REG &&
4452                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4453                 err |= FILE_EXTENT_ERROR;
4454                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4455                       root->objectid, fkey->objectid, fkey->offset);
4456                 return err;
4457         }
4458
4459         /* Check REG_EXTENT/PREALLOC_EXTENT */
4460         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4461         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4462         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4463         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4464
4465         /* Check EXTENT_DATA datasum */
4466         ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4467         if (found > 0 && nodatasum) {
4468                 err |= ODD_CSUM_ITEM;
4469                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4470                       root->objectid, fkey->objectid, fkey->offset);
4471         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4472                    !is_hole &&
4473                    (ret < 0 || found == 0 || found < disk_num_bytes)) {
4474                 err |= CSUM_ITEM_MISSING;
4475                 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4476                       root->objectid, fkey->objectid, fkey->offset);
4477         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4478                 err |= ODD_CSUM_ITEM;
4479                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4480                       root->objectid, fkey->objectid, fkey->offset);
4481         }
4482
4483         /* Check EXTENT_DATA hole */
4484         if (no_holes && is_hole) {
4485                 err |= FILE_EXTENT_ERROR;
4486                 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4487                       root->objectid, fkey->objectid, fkey->offset);
4488         } else if (!no_holes && *end != fkey->offset) {
4489                 err |= FILE_EXTENT_ERROR;
4490                 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4491                       root->objectid, fkey->objectid, fkey->offset);
4492         }
4493
4494         *end += extent_num_bytes;
4495         if (!is_hole)
4496                 *size += extent_num_bytes;
4497
4498         return err;
4499 }
4500
4501 /*
4502  * Check INODE_ITEM and related ITEMs (the same inode number)
4503  * 1. check link count
4504  * 2. check inode ref/extref
4505  * 3. check dir item/index
4506  *
4507  * @ext_ref:    the EXTENDED_IREF feature
4508  *
4509  * Return 0 if no error occurred.
4510  * Return >0 for error or hit the traversal is done(by error bitmap)
4511  */
4512 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4513                             unsigned int ext_ref)
4514 {
4515         struct extent_buffer *node;
4516         struct btrfs_inode_item *ii;
4517         struct btrfs_key key;
4518         u64 inode_id;
4519         u32 mode;
4520         u64 nlink;
4521         u64 nbytes;
4522         u64 isize;
4523         u64 size = 0;
4524         u64 refs = 0;
4525         u64 extent_end = 0;
4526         u64 extent_size = 0;
4527         unsigned int dir;
4528         unsigned int nodatasum;
4529         int slot;
4530         int ret;
4531         int err = 0;
4532
4533         node = path->nodes[0];
4534         slot = path->slots[0];
4535
4536         btrfs_item_key_to_cpu(node, &key, slot);
4537         inode_id = key.objectid;
4538
4539         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4540                 ret = btrfs_next_item(root, path);
4541                 if (ret > 0)
4542                         err |= LAST_ITEM;
4543                 return err;
4544         }
4545
4546         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4547         isize = btrfs_inode_size(node, ii);
4548         nbytes = btrfs_inode_nbytes(node, ii);
4549         mode = btrfs_inode_mode(node, ii);
4550         dir = imode_to_type(mode) == BTRFS_FT_DIR;
4551         nlink = btrfs_inode_nlink(node, ii);
4552         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4553
4554         while (1) {
4555                 ret = btrfs_next_item(root, path);
4556                 if (ret < 0) {
4557                         /* out will fill 'err' rusing current statistics */
4558                         goto out;
4559                 } else if (ret > 0) {
4560                         err |= LAST_ITEM;
4561                         goto out;
4562                 }
4563
4564                 node = path->nodes[0];
4565                 slot = path->slots[0];
4566                 btrfs_item_key_to_cpu(node, &key, slot);
4567                 if (key.objectid != inode_id)
4568                         goto out;
4569
4570                 switch (key.type) {
4571                 case BTRFS_INODE_REF_KEY:
4572                         ret = check_inode_ref(root, &key, node, slot, &refs,
4573                                               mode);
4574                         err |= ret;
4575                         break;
4576                 case BTRFS_INODE_EXTREF_KEY:
4577                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4578                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
4579                                         root->objectid, key.objectid,
4580                                         key.offset);
4581                         ret = check_inode_extref(root, &key, node, slot, &refs,
4582                                                  mode);
4583                         err |= ret;
4584                         break;
4585                 case BTRFS_DIR_ITEM_KEY:
4586                 case BTRFS_DIR_INDEX_KEY:
4587                         if (!dir) {
4588                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4589                                         root->objectid, inode_id,
4590                                         imode_to_type(mode), key.objectid,
4591                                         key.offset);
4592                         }
4593                         ret = check_dir_item(root, &key, node, slot, &size,
4594                                              ext_ref);
4595                         err |= ret;
4596                         break;
4597                 case BTRFS_EXTENT_DATA_KEY:
4598                         if (dir) {
4599                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4600                                         root->objectid, inode_id, key.objectid,
4601                                         key.offset);
4602                         }
4603                         ret = check_file_extent(root, &key, node, slot,
4604                                                 nodatasum, &extent_size,
4605                                                 &extent_end);
4606                         err |= ret;
4607                         break;
4608                 case BTRFS_XATTR_ITEM_KEY:
4609                         break;
4610                 default:
4611                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4612                               key.objectid, key.type, key.offset);
4613                 }
4614         }
4615
4616 out:
4617         /* verify INODE_ITEM nlink/isize/nbytes */
4618         if (dir) {
4619                 if (nlink != 1) {
4620                         err |= LINK_COUNT_ERROR;
4621                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4622                               root->objectid, inode_id, nlink);
4623                 }
4624
4625                 /*
4626                  * Just a warning, as dir inode nbytes is just an
4627                  * instructive value.
4628                  */
4629                 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4630                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4631                                 root->objectid, inode_id, root->nodesize);
4632                 }
4633
4634                 if (isize != size) {
4635                         err |= ISIZE_ERROR;
4636                         error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4637                               root->objectid, inode_id, isize, size);
4638                 }
4639         } else {
4640                 if (nlink != refs) {
4641                         err |= LINK_COUNT_ERROR;
4642                         error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4643                               root->objectid, inode_id, nlink, refs);
4644                 } else if (!nlink) {
4645                         err |= ORPHAN_ITEM;
4646                 }
4647
4648                 if (!nbytes && !no_holes && extent_end < isize) {
4649                         err |= NBYTES_ERROR;
4650                         error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4651                               root->objectid, inode_id, isize);
4652                 }
4653
4654                 if (nbytes != extent_size) {
4655                         err |= NBYTES_ERROR;
4656                         error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4657                               root->objectid, inode_id, nbytes, extent_size);
4658                 }
4659         }
4660
4661         return err;
4662 }
4663
4664 /*
4665  * Iterate all item on the tree and call check_inode_item() to check.
4666  *
4667  * @root:       the root of the tree to be checked.
4668  * @ext_ref:    the EXTENDED_IREF feature
4669  *
4670  * Return 0 if no error found.
4671  * Return <0 for error.
4672  * All internal error bitmap will be converted to -EIO, to avoid
4673  * mixing negative and postive return value.
4674  */
4675 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
4676 {
4677         struct btrfs_path *path;
4678         struct btrfs_key key;
4679         u64 inode_id;
4680         int ret, err = 0;
4681
4682         path = btrfs_alloc_path();
4683         if (!path)
4684                 return -ENOMEM;
4685
4686         key.objectid = 0;
4687         key.type = 0;
4688         key.offset = 0;
4689
4690         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4691         if (ret < 0)
4692                 goto out;
4693
4694         while (1) {
4695                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
4696
4697                 /*
4698                  * All check must start with inode item, skip if not
4699                  */
4700                 if (key.type == BTRFS_INODE_ITEM_KEY) {
4701                         ret = check_inode_item(root, path, ext_ref);
4702                         err |= ret;
4703                         if (err & LAST_ITEM)
4704                                 goto out;
4705                         continue;
4706                 }
4707                 error("root %llu ITEM[%llu %u %llu] isn't INODE_ITEM, skip to next inode",
4708                       root->objectid, key.objectid, key.type,
4709                       key.offset);
4710
4711                 err |= NO_INODE_ITEM;
4712                 inode_id = key.objectid;
4713
4714                 /*
4715                  * skip to next inode
4716                  * TODO: Maybe search_slot() will be faster?
4717                  */
4718                 do {
4719                         ret = btrfs_next_item(root, path);
4720                         if (ret > 0) {
4721                                 goto out;
4722                         } else if (ret < 0) {
4723                                 err = ret;
4724                                 goto out;
4725                         }
4726                         btrfs_item_key_to_cpu(path->nodes[0], &key,
4727                                               path->slots[0]);
4728                 } while (inode_id == key.objectid);
4729         }
4730
4731 out:
4732         err &= ~LAST_ITEM;
4733         if (err && !ret)
4734                 ret = -EIO;
4735         btrfs_free_path(path);
4736         return ret;
4737 }
4738
4739 /*
4740  * Find the relative ref for root_ref and root_backref.
4741  *
4742  * @root:       the root of the root tree.
4743  * @ref_key:    the key of the root ref.
4744  *
4745  * Return 0 if no error occurred.
4746  */
4747 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4748                           struct extent_buffer *node, int slot)
4749 {
4750         struct btrfs_path *path;
4751         struct btrfs_key key;
4752         struct btrfs_root_ref *ref;
4753         struct btrfs_root_ref *backref;
4754         char ref_name[BTRFS_NAME_LEN] = {0};
4755         char backref_name[BTRFS_NAME_LEN] = {0};
4756         u64 ref_dirid;
4757         u64 ref_seq;
4758         u32 ref_namelen;
4759         u64 backref_dirid;
4760         u64 backref_seq;
4761         u32 backref_namelen;
4762         u32 len;
4763         int ret;
4764         int err = 0;
4765
4766         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
4767         ref_dirid = btrfs_root_ref_dirid(node, ref);
4768         ref_seq = btrfs_root_ref_sequence(node, ref);
4769         ref_namelen = btrfs_root_ref_name_len(node, ref);
4770
4771         if (ref_namelen <= BTRFS_NAME_LEN) {
4772                 len = ref_namelen;
4773         } else {
4774                 len = BTRFS_NAME_LEN;
4775                 warning("%s[%llu %llu] ref_name too long",
4776                         ref_key->type == BTRFS_ROOT_REF_KEY ?
4777                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
4778                         ref_key->offset);
4779         }
4780         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
4781
4782         /* Find relative root_ref */
4783         key.objectid = ref_key->offset;
4784         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
4785         key.offset = ref_key->objectid;
4786
4787         path = btrfs_alloc_path();
4788         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4789         if (ret) {
4790                 err |= ROOT_REF_MISSING;
4791                 error("%s[%llu %llu] couldn't find relative ref",
4792                       ref_key->type == BTRFS_ROOT_REF_KEY ?
4793                       "ROOT_REF" : "ROOT_BACKREF",
4794                       ref_key->objectid, ref_key->offset);
4795                 goto out;
4796         }
4797
4798         backref = btrfs_item_ptr(path->nodes[0], path->slots[0],
4799                                  struct btrfs_root_ref);
4800         backref_dirid = btrfs_root_ref_dirid(path->nodes[0], backref);
4801         backref_seq = btrfs_root_ref_sequence(path->nodes[0], backref);
4802         backref_namelen = btrfs_root_ref_name_len(path->nodes[0], backref);
4803
4804         if (backref_namelen <= BTRFS_NAME_LEN) {
4805                 len = backref_namelen;
4806         } else {
4807                 len = BTRFS_NAME_LEN;
4808                 warning("%s[%llu %llu] ref_name too long",
4809                         key.type == BTRFS_ROOT_REF_KEY ?
4810                         "ROOT_REF" : "ROOT_BACKREF",
4811                         key.objectid, key.offset);
4812         }
4813         read_extent_buffer(path->nodes[0], backref_name,
4814                            (unsigned long)(backref + 1), len);
4815
4816         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
4817             ref_namelen != backref_namelen ||
4818             strncmp(ref_name, backref_name, len)) {
4819                 err |= ROOT_REF_MISMATCH;
4820                 error("%s[%llu %llu] mismatch relative ref",
4821                       ref_key->type == BTRFS_ROOT_REF_KEY ?
4822                       "ROOT_REF" : "ROOT_BACKREF",
4823                       ref_key->objectid, ref_key->offset);
4824         }
4825 out:
4826         btrfs_free_path(path);
4827         return err;
4828 }
4829
4830 /*
4831  * Check all fs/file tree in low_memory mode.
4832  *
4833  * 1. for fs tree root item, call check_fs_root_v2()
4834  * 2. for fs tree root ref/backref, call check_root_ref()
4835  *
4836  * Return 0 if no error occurred.
4837  */
4838 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
4839 {
4840         struct btrfs_root *tree_root = fs_info->tree_root;
4841         struct btrfs_root *cur_root = NULL;
4842         struct btrfs_path *path;
4843         struct btrfs_key key;
4844         struct extent_buffer *node;
4845         unsigned int ext_ref;
4846         int slot;
4847         int ret;
4848         int err = 0;
4849
4850         ext_ref = btrfs_fs_incompat(fs_info,
4851                                     BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF);
4852
4853         path = btrfs_alloc_path();
4854         if (!path)
4855                 return -ENOMEM;
4856
4857         key.objectid = BTRFS_FS_TREE_OBJECTID;
4858         key.offset = 0;
4859         key.type = BTRFS_ROOT_ITEM_KEY;
4860
4861         ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
4862         if (ret < 0) {
4863                 err = ret;
4864                 goto out;
4865         } else if (ret > 0) {
4866                 err = -ENOENT;
4867                 goto out;
4868         }
4869
4870         while (1) {
4871                 node = path->nodes[0];
4872                 slot = path->slots[0];
4873                 btrfs_item_key_to_cpu(node, &key, slot);
4874                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
4875                         goto out;
4876                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4877                     fs_root_objectid(key.objectid)) {
4878                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4879                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
4880                                                                        &key);
4881                         } else {
4882                                 key.offset = (u64)-1;
4883                                 cur_root = btrfs_read_fs_root(fs_info, &key);
4884                         }
4885
4886                         if (IS_ERR(cur_root)) {
4887                                 error("Fail to read fs/subvol tree: %lld",
4888                                       key.objectid);
4889                                 err = -EIO;
4890                                 goto next;
4891                         }
4892
4893                         ret = check_fs_root_v2(cur_root, ext_ref);
4894                         err |= ret;
4895
4896                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4897                                 btrfs_free_fs_root(cur_root);
4898                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4899                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
4900                         ret = check_root_ref(tree_root, &key, node, slot);
4901                         err |= ret;
4902                 }
4903 next:
4904                 ret = btrfs_next_item(tree_root, path);
4905                 if (ret > 0)
4906                         goto out;
4907                 if (ret < 0) {
4908                         err = ret;
4909                         goto out;
4910                 }
4911         }
4912
4913 out:
4914         btrfs_free_path(path);
4915         return err;
4916 }
4917
4918 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
4919 {
4920         struct list_head *cur = rec->backrefs.next;
4921         struct extent_backref *back;
4922         struct tree_backref *tback;
4923         struct data_backref *dback;
4924         u64 found = 0;
4925         int err = 0;
4926
4927         while(cur != &rec->backrefs) {
4928                 back = to_extent_backref(cur);
4929                 cur = cur->next;
4930                 if (!back->found_extent_tree) {
4931                         err = 1;
4932                         if (!print_errs)
4933                                 goto out;
4934                         if (back->is_data) {
4935                                 dback = to_data_backref(back);
4936                                 fprintf(stderr, "Backref %llu %s %llu"
4937                                         " owner %llu offset %llu num_refs %lu"
4938                                         " not found in extent tree\n",
4939                                         (unsigned long long)rec->start,
4940                                         back->full_backref ?
4941                                         "parent" : "root",
4942                                         back->full_backref ?
4943                                         (unsigned long long)dback->parent:
4944                                         (unsigned long long)dback->root,
4945                                         (unsigned long long)dback->owner,
4946                                         (unsigned long long)dback->offset,
4947                                         (unsigned long)dback->num_refs);
4948                         } else {
4949                                 tback = to_tree_backref(back);
4950                                 fprintf(stderr, "Backref %llu parent %llu"
4951                                         " root %llu not found in extent tree\n",
4952                                         (unsigned long long)rec->start,
4953                                         (unsigned long long)tback->parent,
4954                                         (unsigned long long)tback->root);
4955                         }
4956                 }
4957                 if (!back->is_data && !back->found_ref) {
4958                         err = 1;
4959                         if (!print_errs)
4960                                 goto out;
4961                         tback = to_tree_backref(back);
4962                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
4963                                 (unsigned long long)rec->start,
4964                                 back->full_backref ? "parent" : "root",
4965                                 back->full_backref ?
4966                                 (unsigned long long)tback->parent :
4967                                 (unsigned long long)tback->root, back);
4968                 }
4969                 if (back->is_data) {
4970                         dback = to_data_backref(back);
4971                         if (dback->found_ref != dback->num_refs) {
4972                                 err = 1;
4973                                 if (!print_errs)
4974                                         goto out;
4975                                 fprintf(stderr, "Incorrect local backref count"
4976                                         " on %llu %s %llu owner %llu"
4977                                         " offset %llu found %u wanted %u back %p\n",
4978                                         (unsigned long long)rec->start,
4979                                         back->full_backref ?
4980                                         "parent" : "root",
4981                                         back->full_backref ?
4982                                         (unsigned long long)dback->parent:
4983                                         (unsigned long long)dback->root,
4984                                         (unsigned long long)dback->owner,
4985                                         (unsigned long long)dback->offset,
4986                                         dback->found_ref, dback->num_refs, back);
4987                         }
4988                         if (dback->disk_bytenr != rec->start) {
4989                                 err = 1;
4990                                 if (!print_errs)
4991                                         goto out;
4992                                 fprintf(stderr, "Backref disk bytenr does not"
4993                                         " match extent record, bytenr=%llu, "
4994                                         "ref bytenr=%llu\n",
4995                                         (unsigned long long)rec->start,
4996                                         (unsigned long long)dback->disk_bytenr);
4997                         }
4998
4999                         if (dback->bytes != rec->nr) {
5000                                 err = 1;
5001                                 if (!print_errs)
5002                                         goto out;
5003                                 fprintf(stderr, "Backref bytes do not match "
5004                                         "extent backref, bytenr=%llu, ref "
5005                                         "bytes=%llu, backref bytes=%llu\n",
5006                                         (unsigned long long)rec->start,
5007                                         (unsigned long long)rec->nr,
5008                                         (unsigned long long)dback->bytes);
5009                         }
5010                 }
5011                 if (!back->is_data) {
5012                         found += 1;
5013                 } else {
5014                         dback = to_data_backref(back);
5015                         found += dback->found_ref;
5016                 }
5017         }
5018         if (found != rec->refs) {
5019                 err = 1;
5020                 if (!print_errs)
5021                         goto out;
5022                 fprintf(stderr, "Incorrect global backref count "
5023                         "on %llu found %llu wanted %llu\n",
5024                         (unsigned long long)rec->start,
5025                         (unsigned long long)found,
5026                         (unsigned long long)rec->refs);
5027         }
5028 out:
5029         return err;
5030 }
5031
5032 static int free_all_extent_backrefs(struct extent_record *rec)
5033 {
5034         struct extent_backref *back;
5035         struct list_head *cur;
5036         while (!list_empty(&rec->backrefs)) {
5037                 cur = rec->backrefs.next;
5038                 back = to_extent_backref(cur);
5039                 list_del(cur);
5040                 free(back);
5041         }
5042         return 0;
5043 }
5044
5045 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
5046                                      struct cache_tree *extent_cache)
5047 {
5048         struct cache_extent *cache;
5049         struct extent_record *rec;
5050
5051         while (1) {
5052                 cache = first_cache_extent(extent_cache);
5053                 if (!cache)
5054                         break;
5055                 rec = container_of(cache, struct extent_record, cache);
5056                 remove_cache_extent(extent_cache, cache);
5057                 free_all_extent_backrefs(rec);
5058                 free(rec);
5059         }
5060 }
5061
5062 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5063                                  struct extent_record *rec)
5064 {
5065         if (rec->content_checked && rec->owner_ref_checked &&
5066             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5067             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5068             !rec->bad_full_backref && !rec->crossing_stripes &&
5069             !rec->wrong_chunk_type) {
5070                 remove_cache_extent(extent_cache, &rec->cache);
5071                 free_all_extent_backrefs(rec);
5072                 list_del_init(&rec->list);
5073                 free(rec);
5074         }
5075         return 0;
5076 }
5077
5078 static int check_owner_ref(struct btrfs_root *root,
5079                             struct extent_record *rec,
5080                             struct extent_buffer *buf)
5081 {
5082         struct extent_backref *node;
5083         struct tree_backref *back;
5084         struct btrfs_root *ref_root;
5085         struct btrfs_key key;
5086         struct btrfs_path path;
5087         struct extent_buffer *parent;
5088         int level;
5089         int found = 0;
5090         int ret;
5091
5092         list_for_each_entry(node, &rec->backrefs, list) {
5093                 if (node->is_data)
5094                         continue;
5095                 if (!node->found_ref)
5096                         continue;
5097                 if (node->full_backref)
5098                         continue;
5099                 back = to_tree_backref(node);
5100                 if (btrfs_header_owner(buf) == back->root)
5101                         return 0;
5102         }
5103         BUG_ON(rec->is_root);
5104
5105         /* try to find the block by search corresponding fs tree */
5106         key.objectid = btrfs_header_owner(buf);
5107         key.type = BTRFS_ROOT_ITEM_KEY;
5108         key.offset = (u64)-1;
5109
5110         ref_root = btrfs_read_fs_root(root->fs_info, &key);
5111         if (IS_ERR(ref_root))
5112                 return 1;
5113
5114         level = btrfs_header_level(buf);
5115         if (level == 0)
5116                 btrfs_item_key_to_cpu(buf, &key, 0);
5117         else
5118                 btrfs_node_key_to_cpu(buf, &key, 0);
5119
5120         btrfs_init_path(&path);
5121         path.lowest_level = level + 1;
5122         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5123         if (ret < 0)
5124                 return 0;
5125
5126         parent = path.nodes[level + 1];
5127         if (parent && buf->start == btrfs_node_blockptr(parent,
5128                                                         path.slots[level + 1]))
5129                 found = 1;
5130
5131         btrfs_release_path(&path);
5132         return found ? 0 : 1;
5133 }
5134
5135 static int is_extent_tree_record(struct extent_record *rec)
5136 {
5137         struct list_head *cur = rec->backrefs.next;
5138         struct extent_backref *node;
5139         struct tree_backref *back;
5140         int is_extent = 0;
5141
5142         while(cur != &rec->backrefs) {
5143                 node = to_extent_backref(cur);
5144                 cur = cur->next;
5145                 if (node->is_data)
5146                         return 0;
5147                 back = to_tree_backref(node);
5148                 if (node->full_backref)
5149                         return 0;
5150                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5151                         is_extent = 1;
5152         }
5153         return is_extent;
5154 }
5155
5156
5157 static int record_bad_block_io(struct btrfs_fs_info *info,
5158                                struct cache_tree *extent_cache,
5159                                u64 start, u64 len)
5160 {
5161         struct extent_record *rec;
5162         struct cache_extent *cache;
5163         struct btrfs_key key;
5164
5165         cache = lookup_cache_extent(extent_cache, start, len);
5166         if (!cache)
5167                 return 0;
5168
5169         rec = container_of(cache, struct extent_record, cache);
5170         if (!is_extent_tree_record(rec))
5171                 return 0;
5172
5173         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5174         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5175 }
5176
5177 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5178                        struct extent_buffer *buf, int slot)
5179 {
5180         if (btrfs_header_level(buf)) {
5181                 struct btrfs_key_ptr ptr1, ptr2;
5182
5183                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5184                                    sizeof(struct btrfs_key_ptr));
5185                 read_extent_buffer(buf, &ptr2,
5186                                    btrfs_node_key_ptr_offset(slot + 1),
5187                                    sizeof(struct btrfs_key_ptr));
5188                 write_extent_buffer(buf, &ptr1,
5189                                     btrfs_node_key_ptr_offset(slot + 1),
5190                                     sizeof(struct btrfs_key_ptr));
5191                 write_extent_buffer(buf, &ptr2,
5192                                     btrfs_node_key_ptr_offset(slot),
5193                                     sizeof(struct btrfs_key_ptr));
5194                 if (slot == 0) {
5195                         struct btrfs_disk_key key;
5196                         btrfs_node_key(buf, &key, 0);
5197                         btrfs_fixup_low_keys(root, path, &key,
5198                                              btrfs_header_level(buf) + 1);
5199                 }
5200         } else {
5201                 struct btrfs_item *item1, *item2;
5202                 struct btrfs_key k1, k2;
5203                 char *item1_data, *item2_data;
5204                 u32 item1_offset, item2_offset, item1_size, item2_size;
5205
5206                 item1 = btrfs_item_nr(slot);
5207                 item2 = btrfs_item_nr(slot + 1);
5208                 btrfs_item_key_to_cpu(buf, &k1, slot);
5209                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5210                 item1_offset = btrfs_item_offset(buf, item1);
5211                 item2_offset = btrfs_item_offset(buf, item2);
5212                 item1_size = btrfs_item_size(buf, item1);
5213                 item2_size = btrfs_item_size(buf, item2);
5214
5215                 item1_data = malloc(item1_size);
5216                 if (!item1_data)
5217                         return -ENOMEM;
5218                 item2_data = malloc(item2_size);
5219                 if (!item2_data) {
5220                         free(item1_data);
5221                         return -ENOMEM;
5222                 }
5223
5224                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5225                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5226
5227                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5228                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5229                 free(item1_data);
5230                 free(item2_data);
5231
5232                 btrfs_set_item_offset(buf, item1, item2_offset);
5233                 btrfs_set_item_offset(buf, item2, item1_offset);
5234                 btrfs_set_item_size(buf, item1, item2_size);
5235                 btrfs_set_item_size(buf, item2, item1_size);
5236
5237                 path->slots[0] = slot;
5238                 btrfs_set_item_key_unsafe(root, path, &k2);
5239                 path->slots[0] = slot + 1;
5240                 btrfs_set_item_key_unsafe(root, path, &k1);
5241         }
5242         return 0;
5243 }
5244
5245 static int fix_key_order(struct btrfs_trans_handle *trans,
5246                          struct btrfs_root *root,
5247                          struct btrfs_path *path)
5248 {
5249         struct extent_buffer *buf;
5250         struct btrfs_key k1, k2;
5251         int i;
5252         int level = path->lowest_level;
5253         int ret = -EIO;
5254
5255         buf = path->nodes[level];
5256         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5257                 if (level) {
5258                         btrfs_node_key_to_cpu(buf, &k1, i);
5259                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
5260                 } else {
5261                         btrfs_item_key_to_cpu(buf, &k1, i);
5262                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
5263                 }
5264                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5265                         continue;
5266                 ret = swap_values(root, path, buf, i);
5267                 if (ret)
5268                         break;
5269                 btrfs_mark_buffer_dirty(buf);
5270                 i = 0;
5271         }
5272         return ret;
5273 }
5274
5275 static int delete_bogus_item(struct btrfs_trans_handle *trans,
5276                              struct btrfs_root *root,
5277                              struct btrfs_path *path,
5278                              struct extent_buffer *buf, int slot)
5279 {
5280         struct btrfs_key key;
5281         int nritems = btrfs_header_nritems(buf);
5282
5283         btrfs_item_key_to_cpu(buf, &key, slot);
5284
5285         /* These are all the keys we can deal with missing. */
5286         if (key.type != BTRFS_DIR_INDEX_KEY &&
5287             key.type != BTRFS_EXTENT_ITEM_KEY &&
5288             key.type != BTRFS_METADATA_ITEM_KEY &&
5289             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5290             key.type != BTRFS_EXTENT_DATA_REF_KEY)
5291                 return -1;
5292
5293         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5294                (unsigned long long)key.objectid, key.type,
5295                (unsigned long long)key.offset, slot, buf->start);
5296         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5297                               btrfs_item_nr_offset(slot + 1),
5298                               sizeof(struct btrfs_item) *
5299                               (nritems - slot - 1));
5300         btrfs_set_header_nritems(buf, nritems - 1);
5301         if (slot == 0) {
5302                 struct btrfs_disk_key disk_key;
5303
5304                 btrfs_item_key(buf, &disk_key, 0);
5305                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5306         }
5307         btrfs_mark_buffer_dirty(buf);
5308         return 0;
5309 }
5310
5311 static int fix_item_offset(struct btrfs_trans_handle *trans,
5312                            struct btrfs_root *root,
5313                            struct btrfs_path *path)
5314 {
5315         struct extent_buffer *buf;
5316         int i;
5317         int ret = 0;
5318
5319         /* We should only get this for leaves */
5320         BUG_ON(path->lowest_level);
5321         buf = path->nodes[0];
5322 again:
5323         for (i = 0; i < btrfs_header_nritems(buf); i++) {
5324                 unsigned int shift = 0, offset;
5325
5326                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5327                     BTRFS_LEAF_DATA_SIZE(root)) {
5328                         if (btrfs_item_end_nr(buf, i) >
5329                             BTRFS_LEAF_DATA_SIZE(root)) {
5330                                 ret = delete_bogus_item(trans, root, path,
5331                                                         buf, i);
5332                                 if (!ret)
5333                                         goto again;
5334                                 fprintf(stderr, "item is off the end of the "
5335                                         "leaf, can't fix\n");
5336                                 ret = -EIO;
5337                                 break;
5338                         }
5339                         shift = BTRFS_LEAF_DATA_SIZE(root) -
5340                                 btrfs_item_end_nr(buf, i);
5341                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5342                            btrfs_item_offset_nr(buf, i - 1)) {
5343                         if (btrfs_item_end_nr(buf, i) >
5344                             btrfs_item_offset_nr(buf, i - 1)) {
5345                                 ret = delete_bogus_item(trans, root, path,
5346                                                         buf, i);
5347                                 if (!ret)
5348                                         goto again;
5349                                 fprintf(stderr, "items overlap, can't fix\n");
5350                                 ret = -EIO;
5351                                 break;
5352                         }
5353                         shift = btrfs_item_offset_nr(buf, i - 1) -
5354                                 btrfs_item_end_nr(buf, i);
5355                 }
5356                 if (!shift)
5357                         continue;
5358
5359                 printf("Shifting item nr %d by %u bytes in block %llu\n",
5360                        i, shift, (unsigned long long)buf->start);
5361                 offset = btrfs_item_offset_nr(buf, i);
5362                 memmove_extent_buffer(buf,
5363                                       btrfs_leaf_data(buf) + offset + shift,
5364                                       btrfs_leaf_data(buf) + offset,
5365                                       btrfs_item_size_nr(buf, i));
5366                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5367                                       offset + shift);
5368                 btrfs_mark_buffer_dirty(buf);
5369         }
5370
5371         /*
5372          * We may have moved things, in which case we want to exit so we don't
5373          * write those changes out.  Once we have proper abort functionality in
5374          * progs this can be changed to something nicer.
5375          */
5376         BUG_ON(ret);
5377         return ret;
5378 }
5379
5380 /*
5381  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
5382  * then just return -EIO.
5383  */
5384 static int try_to_fix_bad_block(struct btrfs_root *root,
5385                                 struct extent_buffer *buf,
5386                                 enum btrfs_tree_block_status status)
5387 {
5388         struct btrfs_trans_handle *trans;
5389         struct ulist *roots;
5390         struct ulist_node *node;
5391         struct btrfs_root *search_root;
5392         struct btrfs_path path;
5393         struct ulist_iterator iter;
5394         struct btrfs_key root_key, key;
5395         int ret;
5396
5397         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5398             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5399                 return -EIO;
5400
5401         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5402         if (ret)
5403                 return -EIO;
5404
5405         btrfs_init_path(&path);
5406         ULIST_ITER_INIT(&iter);
5407         while ((node = ulist_next(roots, &iter))) {
5408                 root_key.objectid = node->val;
5409                 root_key.type = BTRFS_ROOT_ITEM_KEY;
5410                 root_key.offset = (u64)-1;
5411
5412                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5413                 if (IS_ERR(root)) {
5414                         ret = -EIO;
5415                         break;
5416                 }
5417
5418
5419                 trans = btrfs_start_transaction(search_root, 0);
5420                 if (IS_ERR(trans)) {
5421                         ret = PTR_ERR(trans);
5422                         break;
5423                 }
5424
5425                 path.lowest_level = btrfs_header_level(buf);
5426                 path.skip_check_block = 1;
5427                 if (path.lowest_level)
5428                         btrfs_node_key_to_cpu(buf, &key, 0);
5429                 else
5430                         btrfs_item_key_to_cpu(buf, &key, 0);
5431                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5432                 if (ret) {
5433                         ret = -EIO;
5434                         btrfs_commit_transaction(trans, search_root);
5435                         break;
5436                 }
5437                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5438                         ret = fix_key_order(trans, search_root, &path);
5439                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5440                         ret = fix_item_offset(trans, search_root, &path);
5441                 if (ret) {
5442                         btrfs_commit_transaction(trans, search_root);
5443                         break;
5444                 }
5445                 btrfs_release_path(&path);
5446                 btrfs_commit_transaction(trans, search_root);
5447         }
5448         ulist_free(roots);
5449         btrfs_release_path(&path);
5450         return ret;
5451 }
5452
5453 static int check_block(struct btrfs_root *root,
5454                        struct cache_tree *extent_cache,
5455                        struct extent_buffer *buf, u64 flags)
5456 {
5457         struct extent_record *rec;
5458         struct cache_extent *cache;
5459         struct btrfs_key key;
5460         enum btrfs_tree_block_status status;
5461         int ret = 0;
5462         int level;
5463
5464         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5465         if (!cache)
5466                 return 1;
5467         rec = container_of(cache, struct extent_record, cache);
5468         rec->generation = btrfs_header_generation(buf);
5469
5470         level = btrfs_header_level(buf);
5471         if (btrfs_header_nritems(buf) > 0) {
5472
5473                 if (level == 0)
5474                         btrfs_item_key_to_cpu(buf, &key, 0);
5475                 else
5476                         btrfs_node_key_to_cpu(buf, &key, 0);
5477
5478                 rec->info_objectid = key.objectid;
5479         }
5480         rec->info_level = level;
5481
5482         if (btrfs_is_leaf(buf))
5483                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5484         else
5485                 status = btrfs_check_node(root, &rec->parent_key, buf);
5486
5487         if (status != BTRFS_TREE_BLOCK_CLEAN) {
5488                 if (repair)
5489                         status = try_to_fix_bad_block(root, buf, status);
5490                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5491                         ret = -EIO;
5492                         fprintf(stderr, "bad block %llu\n",
5493                                 (unsigned long long)buf->start);
5494                 } else {
5495                         /*
5496                          * Signal to callers we need to start the scan over
5497                          * again since we'll have cowed blocks.
5498                          */
5499                         ret = -EAGAIN;
5500                 }
5501         } else {
5502                 rec->content_checked = 1;
5503                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5504                         rec->owner_ref_checked = 1;
5505                 else {
5506                         ret = check_owner_ref(root, rec, buf);
5507                         if (!ret)
5508                                 rec->owner_ref_checked = 1;
5509                 }
5510         }
5511         if (!ret)
5512                 maybe_free_extent_rec(extent_cache, rec);
5513         return ret;
5514 }
5515
5516 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5517                                                 u64 parent, u64 root)
5518 {
5519         struct list_head *cur = rec->backrefs.next;
5520         struct extent_backref *node;
5521         struct tree_backref *back;
5522
5523         while(cur != &rec->backrefs) {
5524                 node = to_extent_backref(cur);
5525                 cur = cur->next;
5526                 if (node->is_data)
5527                         continue;
5528                 back = to_tree_backref(node);
5529                 if (parent > 0) {
5530                         if (!node->full_backref)
5531                                 continue;
5532                         if (parent == back->parent)
5533                                 return back;
5534                 } else {
5535                         if (node->full_backref)
5536                                 continue;
5537                         if (back->root == root)
5538                                 return back;
5539                 }
5540         }
5541         return NULL;
5542 }
5543
5544 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5545                                                 u64 parent, u64 root)
5546 {
5547         struct tree_backref *ref = malloc(sizeof(*ref));
5548
5549         if (!ref)
5550                 return NULL;
5551         memset(&ref->node, 0, sizeof(ref->node));
5552         if (parent > 0) {
5553                 ref->parent = parent;
5554                 ref->node.full_backref = 1;
5555         } else {
5556                 ref->root = root;
5557                 ref->node.full_backref = 0;
5558         }
5559         list_add_tail(&ref->node.list, &rec->backrefs);
5560
5561         return ref;
5562 }
5563
5564 static struct data_backref *find_data_backref(struct extent_record *rec,
5565                                                 u64 parent, u64 root,
5566                                                 u64 owner, u64 offset,
5567                                                 int found_ref,
5568                                                 u64 disk_bytenr, u64 bytes)
5569 {
5570         struct list_head *cur = rec->backrefs.next;
5571         struct extent_backref *node;
5572         struct data_backref *back;
5573
5574         while(cur != &rec->backrefs) {
5575                 node = to_extent_backref(cur);
5576                 cur = cur->next;
5577                 if (!node->is_data)
5578                         continue;
5579                 back = to_data_backref(node);
5580                 if (parent > 0) {
5581                         if (!node->full_backref)
5582                                 continue;
5583                         if (parent == back->parent)
5584                                 return back;
5585                 } else {
5586                         if (node->full_backref)
5587                                 continue;
5588                         if (back->root == root && back->owner == owner &&
5589                             back->offset == offset) {
5590                                 if (found_ref && node->found_ref &&
5591                                     (back->bytes != bytes ||
5592                                     back->disk_bytenr != disk_bytenr))
5593                                         continue;
5594                                 return back;
5595                         }
5596                 }
5597         }
5598         return NULL;
5599 }
5600
5601 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5602                                                 u64 parent, u64 root,
5603                                                 u64 owner, u64 offset,
5604                                                 u64 max_size)
5605 {
5606         struct data_backref *ref = malloc(sizeof(*ref));
5607
5608         if (!ref)
5609                 return NULL;
5610         memset(&ref->node, 0, sizeof(ref->node));
5611         ref->node.is_data = 1;
5612
5613         if (parent > 0) {
5614                 ref->parent = parent;
5615                 ref->owner = 0;
5616                 ref->offset = 0;
5617                 ref->node.full_backref = 1;
5618         } else {
5619                 ref->root = root;
5620                 ref->owner = owner;
5621                 ref->offset = offset;
5622                 ref->node.full_backref = 0;
5623         }
5624         ref->bytes = max_size;
5625         ref->found_ref = 0;
5626         ref->num_refs = 0;
5627         list_add_tail(&ref->node.list, &rec->backrefs);
5628         if (max_size > rec->max_size)
5629                 rec->max_size = max_size;
5630         return ref;
5631 }
5632
5633 /* Check if the type of extent matches with its chunk */
5634 static void check_extent_type(struct extent_record *rec)
5635 {
5636         struct btrfs_block_group_cache *bg_cache;
5637
5638         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5639         if (!bg_cache)
5640                 return;
5641
5642         /* data extent, check chunk directly*/
5643         if (!rec->metadata) {
5644                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5645                         rec->wrong_chunk_type = 1;
5646                 return;
5647         }
5648
5649         /* metadata extent, check the obvious case first */
5650         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5651                                  BTRFS_BLOCK_GROUP_METADATA))) {
5652                 rec->wrong_chunk_type = 1;
5653                 return;
5654         }
5655
5656         /*
5657          * Check SYSTEM extent, as it's also marked as metadata, we can only
5658          * make sure it's a SYSTEM extent by its backref
5659          */
5660         if (!list_empty(&rec->backrefs)) {
5661                 struct extent_backref *node;
5662                 struct tree_backref *tback;
5663                 u64 bg_type;
5664
5665                 node = to_extent_backref(rec->backrefs.next);
5666                 if (node->is_data) {
5667                         /* tree block shouldn't have data backref */
5668                         rec->wrong_chunk_type = 1;
5669                         return;
5670                 }
5671                 tback = container_of(node, struct tree_backref, node);
5672
5673                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5674                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5675                 else
5676                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
5677                 if (!(bg_cache->flags & bg_type))
5678                         rec->wrong_chunk_type = 1;
5679         }
5680 }
5681
5682 /*
5683  * Allocate a new extent record, fill default values from @tmpl and insert int
5684  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5685  * the cache, otherwise it fails.
5686  */
5687 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5688                 struct extent_record *tmpl)
5689 {
5690         struct extent_record *rec;
5691         int ret = 0;
5692
5693         rec = malloc(sizeof(*rec));
5694         if (!rec)
5695                 return -ENOMEM;
5696         rec->start = tmpl->start;
5697         rec->max_size = tmpl->max_size;
5698         rec->nr = max(tmpl->nr, tmpl->max_size);
5699         rec->found_rec = tmpl->found_rec;
5700         rec->content_checked = tmpl->content_checked;
5701         rec->owner_ref_checked = tmpl->owner_ref_checked;
5702         rec->num_duplicates = 0;
5703         rec->metadata = tmpl->metadata;
5704         rec->flag_block_full_backref = FLAG_UNSET;
5705         rec->bad_full_backref = 0;
5706         rec->crossing_stripes = 0;
5707         rec->wrong_chunk_type = 0;
5708         rec->is_root = tmpl->is_root;
5709         rec->refs = tmpl->refs;
5710         rec->extent_item_refs = tmpl->extent_item_refs;
5711         rec->parent_generation = tmpl->parent_generation;
5712         INIT_LIST_HEAD(&rec->backrefs);
5713         INIT_LIST_HEAD(&rec->dups);
5714         INIT_LIST_HEAD(&rec->list);
5715         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
5716         rec->cache.start = tmpl->start;
5717         rec->cache.size = tmpl->nr;
5718         ret = insert_cache_extent(extent_cache, &rec->cache);
5719         if (ret) {
5720                 free(rec);
5721                 return ret;
5722         }
5723         bytes_used += rec->nr;
5724
5725         if (tmpl->metadata)
5726                 rec->crossing_stripes = check_crossing_stripes(global_info,
5727                                 rec->start, global_info->tree_root->nodesize);
5728         check_extent_type(rec);
5729         return ret;
5730 }
5731
5732 /*
5733  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
5734  * some are hints:
5735  * - refs              - if found, increase refs
5736  * - is_root           - if found, set
5737  * - content_checked   - if found, set
5738  * - owner_ref_checked - if found, set
5739  *
5740  * If not found, create a new one, initialize and insert.
5741  */
5742 static int add_extent_rec(struct cache_tree *extent_cache,
5743                 struct extent_record *tmpl)
5744 {
5745         struct extent_record *rec;
5746         struct cache_extent *cache;
5747         int ret = 0;
5748         int dup = 0;
5749
5750         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
5751         if (cache) {
5752                 rec = container_of(cache, struct extent_record, cache);
5753                 if (tmpl->refs)
5754                         rec->refs++;
5755                 if (rec->nr == 1)
5756                         rec->nr = max(tmpl->nr, tmpl->max_size);
5757
5758                 /*
5759                  * We need to make sure to reset nr to whatever the extent
5760                  * record says was the real size, this way we can compare it to
5761                  * the backrefs.
5762                  */
5763                 if (tmpl->found_rec) {
5764                         if (tmpl->start != rec->start || rec->found_rec) {
5765                                 struct extent_record *tmp;
5766
5767                                 dup = 1;
5768                                 if (list_empty(&rec->list))
5769                                         list_add_tail(&rec->list,
5770                                                       &duplicate_extents);
5771
5772                                 /*
5773                                  * We have to do this song and dance in case we
5774                                  * find an extent record that falls inside of
5775                                  * our current extent record but does not have
5776                                  * the same objectid.
5777                                  */
5778                                 tmp = malloc(sizeof(*tmp));
5779                                 if (!tmp)
5780                                         return -ENOMEM;
5781                                 tmp->start = tmpl->start;
5782                                 tmp->max_size = tmpl->max_size;
5783                                 tmp->nr = tmpl->nr;
5784                                 tmp->found_rec = 1;
5785                                 tmp->metadata = tmpl->metadata;
5786                                 tmp->extent_item_refs = tmpl->extent_item_refs;
5787                                 INIT_LIST_HEAD(&tmp->list);
5788                                 list_add_tail(&tmp->list, &rec->dups);
5789                                 rec->num_duplicates++;
5790                         } else {
5791                                 rec->nr = tmpl->nr;
5792                                 rec->found_rec = 1;
5793                         }
5794                 }
5795
5796                 if (tmpl->extent_item_refs && !dup) {
5797                         if (rec->extent_item_refs) {
5798                                 fprintf(stderr, "block %llu rec "
5799                                         "extent_item_refs %llu, passed %llu\n",
5800                                         (unsigned long long)tmpl->start,
5801                                         (unsigned long long)
5802                                                         rec->extent_item_refs,
5803                                         (unsigned long long)tmpl->extent_item_refs);
5804                         }
5805                         rec->extent_item_refs = tmpl->extent_item_refs;
5806                 }
5807                 if (tmpl->is_root)
5808                         rec->is_root = 1;
5809                 if (tmpl->content_checked)
5810                         rec->content_checked = 1;
5811                 if (tmpl->owner_ref_checked)
5812                         rec->owner_ref_checked = 1;
5813                 memcpy(&rec->parent_key, &tmpl->parent_key,
5814                                 sizeof(tmpl->parent_key));
5815                 if (tmpl->parent_generation)
5816                         rec->parent_generation = tmpl->parent_generation;
5817                 if (rec->max_size < tmpl->max_size)
5818                         rec->max_size = tmpl->max_size;
5819
5820                 /*
5821                  * A metadata extent can't cross stripe_len boundary, otherwise
5822                  * kernel scrub won't be able to handle it.
5823                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
5824                  * it.
5825                  */
5826                 if (tmpl->metadata)
5827                         rec->crossing_stripes = check_crossing_stripes(
5828                                         global_info, rec->start,
5829                                         global_info->tree_root->nodesize);
5830                 check_extent_type(rec);
5831                 maybe_free_extent_rec(extent_cache, rec);
5832                 return ret;
5833         }
5834
5835         ret = add_extent_rec_nolookup(extent_cache, tmpl);
5836
5837         return ret;
5838 }
5839
5840 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
5841                             u64 parent, u64 root, int found_ref)
5842 {
5843         struct extent_record *rec;
5844         struct tree_backref *back;
5845         struct cache_extent *cache;
5846         int ret;
5847
5848         cache = lookup_cache_extent(extent_cache, bytenr, 1);
5849         if (!cache) {
5850                 struct extent_record tmpl;
5851
5852                 memset(&tmpl, 0, sizeof(tmpl));
5853                 tmpl.start = bytenr;
5854                 tmpl.nr = 1;
5855                 tmpl.metadata = 1;
5856
5857                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5858                 if (ret)
5859                         return ret;
5860
5861                 /* really a bug in cache_extent implement now */
5862                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5863                 if (!cache)
5864                         return -ENOENT;
5865         }
5866
5867         rec = container_of(cache, struct extent_record, cache);
5868         if (rec->start != bytenr) {
5869                 /*
5870                  * Several cause, from unaligned bytenr to over lapping extents
5871                  */
5872                 return -EEXIST;
5873         }
5874
5875         back = find_tree_backref(rec, parent, root);
5876         if (!back) {
5877                 back = alloc_tree_backref(rec, parent, root);
5878                 if (!back)
5879                         return -ENOMEM;
5880         }
5881
5882         if (found_ref) {
5883                 if (back->node.found_ref) {
5884                         fprintf(stderr, "Extent back ref already exists "
5885                                 "for %llu parent %llu root %llu \n",
5886                                 (unsigned long long)bytenr,
5887                                 (unsigned long long)parent,
5888                                 (unsigned long long)root);
5889                 }
5890                 back->node.found_ref = 1;
5891         } else {
5892                 if (back->node.found_extent_tree) {
5893                         fprintf(stderr, "Extent back ref already exists "
5894                                 "for %llu parent %llu root %llu \n",
5895                                 (unsigned long long)bytenr,
5896                                 (unsigned long long)parent,
5897                                 (unsigned long long)root);
5898                 }
5899                 back->node.found_extent_tree = 1;
5900         }
5901         check_extent_type(rec);
5902         maybe_free_extent_rec(extent_cache, rec);
5903         return 0;
5904 }
5905
5906 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
5907                             u64 parent, u64 root, u64 owner, u64 offset,
5908                             u32 num_refs, int found_ref, u64 max_size)
5909 {
5910         struct extent_record *rec;
5911         struct data_backref *back;
5912         struct cache_extent *cache;
5913         int ret;
5914
5915         cache = lookup_cache_extent(extent_cache, bytenr, 1);
5916         if (!cache) {
5917                 struct extent_record tmpl;
5918
5919                 memset(&tmpl, 0, sizeof(tmpl));
5920                 tmpl.start = bytenr;
5921                 tmpl.nr = 1;
5922                 tmpl.max_size = max_size;
5923
5924                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5925                 if (ret)
5926                         return ret;
5927
5928                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5929                 if (!cache)
5930                         abort();
5931         }
5932
5933         rec = container_of(cache, struct extent_record, cache);
5934         if (rec->max_size < max_size)
5935                 rec->max_size = max_size;
5936
5937         /*
5938          * If found_ref is set then max_size is the real size and must match the
5939          * existing refs.  So if we have already found a ref then we need to
5940          * make sure that this ref matches the existing one, otherwise we need
5941          * to add a new backref so we can notice that the backrefs don't match
5942          * and we need to figure out who is telling the truth.  This is to
5943          * account for that awful fsync bug I introduced where we'd end up with
5944          * a btrfs_file_extent_item that would have its length include multiple
5945          * prealloc extents or point inside of a prealloc extent.
5946          */
5947         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
5948                                  bytenr, max_size);
5949         if (!back) {
5950                 back = alloc_data_backref(rec, parent, root, owner, offset,
5951                                           max_size);
5952                 BUG_ON(!back);
5953         }
5954
5955         if (found_ref) {
5956                 BUG_ON(num_refs != 1);
5957                 if (back->node.found_ref)
5958                         BUG_ON(back->bytes != max_size);
5959                 back->node.found_ref = 1;
5960                 back->found_ref += 1;
5961                 back->bytes = max_size;
5962                 back->disk_bytenr = bytenr;
5963                 rec->refs += 1;
5964                 rec->content_checked = 1;
5965                 rec->owner_ref_checked = 1;
5966         } else {
5967                 if (back->node.found_extent_tree) {
5968                         fprintf(stderr, "Extent back ref already exists "
5969                                 "for %llu parent %llu root %llu "
5970                                 "owner %llu offset %llu num_refs %lu\n",
5971                                 (unsigned long long)bytenr,
5972                                 (unsigned long long)parent,
5973                                 (unsigned long long)root,
5974                                 (unsigned long long)owner,
5975                                 (unsigned long long)offset,
5976                                 (unsigned long)num_refs);
5977                 }
5978                 back->num_refs = num_refs;
5979                 back->node.found_extent_tree = 1;
5980         }
5981         maybe_free_extent_rec(extent_cache, rec);
5982         return 0;
5983 }
5984
5985 static int add_pending(struct cache_tree *pending,
5986                        struct cache_tree *seen, u64 bytenr, u32 size)
5987 {
5988         int ret;
5989         ret = add_cache_extent(seen, bytenr, size);
5990         if (ret)
5991                 return ret;
5992         add_cache_extent(pending, bytenr, size);
5993         return 0;
5994 }
5995
5996 static int pick_next_pending(struct cache_tree *pending,
5997                         struct cache_tree *reada,
5998                         struct cache_tree *nodes,
5999                         u64 last, struct block_info *bits, int bits_nr,
6000                         int *reada_bits)
6001 {
6002         unsigned long node_start = last;
6003         struct cache_extent *cache;
6004         int ret;
6005
6006         cache = search_cache_extent(reada, 0);
6007         if (cache) {
6008                 bits[0].start = cache->start;
6009                 bits[0].size = cache->size;
6010                 *reada_bits = 1;
6011                 return 1;
6012         }
6013         *reada_bits = 0;
6014         if (node_start > 32768)
6015                 node_start -= 32768;
6016
6017         cache = search_cache_extent(nodes, node_start);
6018         if (!cache)
6019                 cache = search_cache_extent(nodes, 0);
6020
6021         if (!cache) {
6022                  cache = search_cache_extent(pending, 0);
6023                  if (!cache)
6024                          return 0;
6025                  ret = 0;
6026                  do {
6027                          bits[ret].start = cache->start;
6028                          bits[ret].size = cache->size;
6029                          cache = next_cache_extent(cache);
6030                          ret++;
6031                  } while (cache && ret < bits_nr);
6032                  return ret;
6033         }
6034
6035         ret = 0;
6036         do {
6037                 bits[ret].start = cache->start;
6038                 bits[ret].size = cache->size;
6039                 cache = next_cache_extent(cache);
6040                 ret++;
6041         } while (cache && ret < bits_nr);
6042
6043         if (bits_nr - ret > 8) {
6044                 u64 lookup = bits[0].start + bits[0].size;
6045                 struct cache_extent *next;
6046                 next = search_cache_extent(pending, lookup);
6047                 while(next) {
6048                         if (next->start - lookup > 32768)
6049                                 break;
6050                         bits[ret].start = next->start;
6051                         bits[ret].size = next->size;
6052                         lookup = next->start + next->size;
6053                         ret++;
6054                         if (ret == bits_nr)
6055                                 break;
6056                         next = next_cache_extent(next);
6057                         if (!next)
6058                                 break;
6059                 }
6060         }
6061         return ret;
6062 }
6063
6064 static void free_chunk_record(struct cache_extent *cache)
6065 {
6066         struct chunk_record *rec;
6067
6068         rec = container_of(cache, struct chunk_record, cache);
6069         list_del_init(&rec->list);
6070         list_del_init(&rec->dextents);
6071         free(rec);
6072 }
6073
6074 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6075 {
6076         cache_tree_free_extents(chunk_cache, free_chunk_record);
6077 }
6078
6079 static void free_device_record(struct rb_node *node)
6080 {
6081         struct device_record *rec;
6082
6083         rec = container_of(node, struct device_record, node);
6084         free(rec);
6085 }
6086
6087 FREE_RB_BASED_TREE(device_cache, free_device_record);
6088
6089 int insert_block_group_record(struct block_group_tree *tree,
6090                               struct block_group_record *bg_rec)
6091 {
6092         int ret;
6093
6094         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6095         if (ret)
6096                 return ret;
6097
6098         list_add_tail(&bg_rec->list, &tree->block_groups);
6099         return 0;
6100 }
6101
6102 static void free_block_group_record(struct cache_extent *cache)
6103 {
6104         struct block_group_record *rec;
6105
6106         rec = container_of(cache, struct block_group_record, cache);
6107         list_del_init(&rec->list);
6108         free(rec);
6109 }
6110
6111 void free_block_group_tree(struct block_group_tree *tree)
6112 {
6113         cache_tree_free_extents(&tree->tree, free_block_group_record);
6114 }
6115
6116 int insert_device_extent_record(struct device_extent_tree *tree,
6117                                 struct device_extent_record *de_rec)
6118 {
6119         int ret;
6120
6121         /*
6122          * Device extent is a bit different from the other extents, because
6123          * the extents which belong to the different devices may have the
6124          * same start and size, so we need use the special extent cache
6125          * search/insert functions.
6126          */
6127         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6128         if (ret)
6129                 return ret;
6130
6131         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6132         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6133         return 0;
6134 }
6135
6136 static void free_device_extent_record(struct cache_extent *cache)
6137 {
6138         struct device_extent_record *rec;
6139
6140         rec = container_of(cache, struct device_extent_record, cache);
6141         if (!list_empty(&rec->chunk_list))
6142                 list_del_init(&rec->chunk_list);
6143         if (!list_empty(&rec->device_list))
6144                 list_del_init(&rec->device_list);
6145         free(rec);
6146 }
6147
6148 void free_device_extent_tree(struct device_extent_tree *tree)
6149 {
6150         cache_tree_free_extents(&tree->tree, free_device_extent_record);
6151 }
6152
6153 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6154 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6155                                  struct extent_buffer *leaf, int slot)
6156 {
6157         struct btrfs_extent_ref_v0 *ref0;
6158         struct btrfs_key key;
6159         int ret;
6160
6161         btrfs_item_key_to_cpu(leaf, &key, slot);
6162         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6163         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6164                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6165                                 0, 0);
6166         } else {
6167                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6168                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6169         }
6170         return ret;
6171 }
6172 #endif
6173
6174 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6175                                             struct btrfs_key *key,
6176                                             int slot)
6177 {
6178         struct btrfs_chunk *ptr;
6179         struct chunk_record *rec;
6180         int num_stripes, i;
6181
6182         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6183         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6184
6185         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6186         if (!rec) {
6187                 fprintf(stderr, "memory allocation failed\n");
6188                 exit(-1);
6189         }
6190
6191         INIT_LIST_HEAD(&rec->list);
6192         INIT_LIST_HEAD(&rec->dextents);
6193         rec->bg_rec = NULL;
6194
6195         rec->cache.start = key->offset;
6196         rec->cache.size = btrfs_chunk_length(leaf, ptr);
6197
6198         rec->generation = btrfs_header_generation(leaf);
6199
6200         rec->objectid = key->objectid;
6201         rec->type = key->type;
6202         rec->offset = key->offset;
6203
6204         rec->length = rec->cache.size;
6205         rec->owner = btrfs_chunk_owner(leaf, ptr);
6206         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6207         rec->type_flags = btrfs_chunk_type(leaf, ptr);
6208         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6209         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6210         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6211         rec->num_stripes = num_stripes;
6212         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6213
6214         for (i = 0; i < rec->num_stripes; ++i) {
6215                 rec->stripes[i].devid =
6216                         btrfs_stripe_devid_nr(leaf, ptr, i);
6217                 rec->stripes[i].offset =
6218                         btrfs_stripe_offset_nr(leaf, ptr, i);
6219                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6220                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6221                                 BTRFS_UUID_SIZE);
6222         }
6223
6224         return rec;
6225 }
6226
6227 static int process_chunk_item(struct cache_tree *chunk_cache,
6228                               struct btrfs_key *key, struct extent_buffer *eb,
6229                               int slot)
6230 {
6231         struct chunk_record *rec;
6232         struct btrfs_chunk *chunk;
6233         int ret = 0;
6234
6235         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6236         /*
6237          * Do extra check for this chunk item,
6238          *
6239          * It's still possible one can craft a leaf with CHUNK_ITEM, with
6240          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6241          * and owner<->key_type check.
6242          */
6243         ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6244                                       key->offset);
6245         if (ret < 0) {
6246                 error("chunk(%llu, %llu) is not valid, ignore it",
6247                       key->offset, btrfs_chunk_length(eb, chunk));
6248                 return 0;
6249         }
6250         rec = btrfs_new_chunk_record(eb, key, slot);
6251         ret = insert_cache_extent(chunk_cache, &rec->cache);
6252         if (ret) {
6253                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6254                         rec->offset, rec->length);
6255                 free(rec);
6256         }
6257
6258         return ret;
6259 }
6260
6261 static int process_device_item(struct rb_root *dev_cache,
6262                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6263 {
6264         struct btrfs_dev_item *ptr;
6265         struct device_record *rec;
6266         int ret = 0;
6267
6268         ptr = btrfs_item_ptr(eb,
6269                 slot, struct btrfs_dev_item);
6270
6271         rec = malloc(sizeof(*rec));
6272         if (!rec) {
6273                 fprintf(stderr, "memory allocation failed\n");
6274                 return -ENOMEM;
6275         }
6276
6277         rec->devid = key->offset;
6278         rec->generation = btrfs_header_generation(eb);
6279
6280         rec->objectid = key->objectid;
6281         rec->type = key->type;
6282         rec->offset = key->offset;
6283
6284         rec->devid = btrfs_device_id(eb, ptr);
6285         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6286         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6287
6288         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6289         if (ret) {
6290                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6291                 free(rec);
6292         }
6293
6294         return ret;
6295 }
6296
6297 struct block_group_record *
6298 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6299                              int slot)
6300 {
6301         struct btrfs_block_group_item *ptr;
6302         struct block_group_record *rec;
6303
6304         rec = calloc(1, sizeof(*rec));
6305         if (!rec) {
6306                 fprintf(stderr, "memory allocation failed\n");
6307                 exit(-1);
6308         }
6309
6310         rec->cache.start = key->objectid;
6311         rec->cache.size = key->offset;
6312
6313         rec->generation = btrfs_header_generation(leaf);
6314
6315         rec->objectid = key->objectid;
6316         rec->type = key->type;
6317         rec->offset = key->offset;
6318
6319         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6320         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6321
6322         INIT_LIST_HEAD(&rec->list);
6323
6324         return rec;
6325 }
6326
6327 static int process_block_group_item(struct block_group_tree *block_group_cache,
6328                                     struct btrfs_key *key,
6329                                     struct extent_buffer *eb, int slot)
6330 {
6331         struct block_group_record *rec;
6332         int ret = 0;
6333
6334         rec = btrfs_new_block_group_record(eb, key, slot);
6335         ret = insert_block_group_record(block_group_cache, rec);
6336         if (ret) {
6337                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6338                         rec->objectid, rec->offset);
6339                 free(rec);
6340         }
6341
6342         return ret;
6343 }
6344
6345 struct device_extent_record *
6346 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6347                                struct btrfs_key *key, int slot)
6348 {
6349         struct device_extent_record *rec;
6350         struct btrfs_dev_extent *ptr;
6351
6352         rec = calloc(1, sizeof(*rec));
6353         if (!rec) {
6354                 fprintf(stderr, "memory allocation failed\n");
6355                 exit(-1);
6356         }
6357
6358         rec->cache.objectid = key->objectid;
6359         rec->cache.start = key->offset;
6360
6361         rec->generation = btrfs_header_generation(leaf);
6362
6363         rec->objectid = key->objectid;
6364         rec->type = key->type;
6365         rec->offset = key->offset;
6366
6367         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6368         rec->chunk_objecteid =
6369                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6370         rec->chunk_offset =
6371                 btrfs_dev_extent_chunk_offset(leaf, ptr);
6372         rec->length = btrfs_dev_extent_length(leaf, ptr);
6373         rec->cache.size = rec->length;
6374
6375         INIT_LIST_HEAD(&rec->chunk_list);
6376         INIT_LIST_HEAD(&rec->device_list);
6377
6378         return rec;
6379 }
6380
6381 static int
6382 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6383                            struct btrfs_key *key, struct extent_buffer *eb,
6384                            int slot)
6385 {
6386         struct device_extent_record *rec;
6387         int ret;
6388
6389         rec = btrfs_new_device_extent_record(eb, key, slot);
6390         ret = insert_device_extent_record(dev_extent_cache, rec);
6391         if (ret) {
6392                 fprintf(stderr,
6393                         "Device extent[%llu, %llu, %llu] existed.\n",
6394                         rec->objectid, rec->offset, rec->length);
6395                 free(rec);
6396         }
6397
6398         return ret;
6399 }
6400
6401 static int process_extent_item(struct btrfs_root *root,
6402                                struct cache_tree *extent_cache,
6403                                struct extent_buffer *eb, int slot)
6404 {
6405         struct btrfs_extent_item *ei;
6406         struct btrfs_extent_inline_ref *iref;
6407         struct btrfs_extent_data_ref *dref;
6408         struct btrfs_shared_data_ref *sref;
6409         struct btrfs_key key;
6410         struct extent_record tmpl;
6411         unsigned long end;
6412         unsigned long ptr;
6413         int ret;
6414         int type;
6415         u32 item_size = btrfs_item_size_nr(eb, slot);
6416         u64 refs = 0;
6417         u64 offset;
6418         u64 num_bytes;
6419         int metadata = 0;
6420
6421         btrfs_item_key_to_cpu(eb, &key, slot);
6422
6423         if (key.type == BTRFS_METADATA_ITEM_KEY) {
6424                 metadata = 1;
6425                 num_bytes = root->nodesize;
6426         } else {
6427                 num_bytes = key.offset;
6428         }
6429
6430         if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6431                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6432                       key.objectid, root->sectorsize);
6433                 return -EIO;
6434         }
6435         if (item_size < sizeof(*ei)) {
6436 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6437                 struct btrfs_extent_item_v0 *ei0;
6438                 BUG_ON(item_size != sizeof(*ei0));
6439                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6440                 refs = btrfs_extent_refs_v0(eb, ei0);
6441 #else
6442                 BUG();
6443 #endif
6444                 memset(&tmpl, 0, sizeof(tmpl));
6445                 tmpl.start = key.objectid;
6446                 tmpl.nr = num_bytes;
6447                 tmpl.extent_item_refs = refs;
6448                 tmpl.metadata = metadata;
6449                 tmpl.found_rec = 1;
6450                 tmpl.max_size = num_bytes;
6451
6452                 return add_extent_rec(extent_cache, &tmpl);
6453         }
6454
6455         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6456         refs = btrfs_extent_refs(eb, ei);
6457         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6458                 metadata = 1;
6459         else
6460                 metadata = 0;
6461         if (metadata && num_bytes != root->nodesize) {
6462                 error("ignore invalid metadata extent, length %llu does not equal to %u",
6463                       num_bytes, root->nodesize);
6464                 return -EIO;
6465         }
6466         if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6467                 error("ignore invalid data extent, length %llu is not aligned to %u",
6468                       num_bytes, root->sectorsize);
6469                 return -EIO;
6470         }
6471
6472         memset(&tmpl, 0, sizeof(tmpl));
6473         tmpl.start = key.objectid;
6474         tmpl.nr = num_bytes;
6475         tmpl.extent_item_refs = refs;
6476         tmpl.metadata = metadata;
6477         tmpl.found_rec = 1;
6478         tmpl.max_size = num_bytes;
6479         add_extent_rec(extent_cache, &tmpl);
6480
6481         ptr = (unsigned long)(ei + 1);
6482         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6483             key.type == BTRFS_EXTENT_ITEM_KEY)
6484                 ptr += sizeof(struct btrfs_tree_block_info);
6485
6486         end = (unsigned long)ei + item_size;
6487         while (ptr < end) {
6488                 iref = (struct btrfs_extent_inline_ref *)ptr;
6489                 type = btrfs_extent_inline_ref_type(eb, iref);
6490                 offset = btrfs_extent_inline_ref_offset(eb, iref);
6491                 switch (type) {
6492                 case BTRFS_TREE_BLOCK_REF_KEY:
6493                         ret = add_tree_backref(extent_cache, key.objectid,
6494                                         0, offset, 0);
6495                         if (ret < 0)
6496                                 error("add_tree_backref failed: %s",
6497                                       strerror(-ret));
6498                         break;
6499                 case BTRFS_SHARED_BLOCK_REF_KEY:
6500                         ret = add_tree_backref(extent_cache, key.objectid,
6501                                         offset, 0, 0);
6502                         if (ret < 0)
6503                                 error("add_tree_backref failed: %s",
6504                                       strerror(-ret));
6505                         break;
6506                 case BTRFS_EXTENT_DATA_REF_KEY:
6507                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6508                         add_data_backref(extent_cache, key.objectid, 0,
6509                                         btrfs_extent_data_ref_root(eb, dref),
6510                                         btrfs_extent_data_ref_objectid(eb,
6511                                                                        dref),
6512                                         btrfs_extent_data_ref_offset(eb, dref),
6513                                         btrfs_extent_data_ref_count(eb, dref),
6514                                         0, num_bytes);
6515                         break;
6516                 case BTRFS_SHARED_DATA_REF_KEY:
6517                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
6518                         add_data_backref(extent_cache, key.objectid, offset,
6519                                         0, 0, 0,
6520                                         btrfs_shared_data_ref_count(eb, sref),
6521                                         0, num_bytes);
6522                         break;
6523                 default:
6524                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6525                                 key.objectid, key.type, num_bytes);
6526                         goto out;
6527                 }
6528                 ptr += btrfs_extent_inline_ref_size(type);
6529         }
6530         WARN_ON(ptr > end);
6531 out:
6532         return 0;
6533 }
6534
6535 static int check_cache_range(struct btrfs_root *root,
6536                              struct btrfs_block_group_cache *cache,
6537                              u64 offset, u64 bytes)
6538 {
6539         struct btrfs_free_space *entry;
6540         u64 *logical;
6541         u64 bytenr;
6542         int stripe_len;
6543         int i, nr, ret;
6544
6545         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6546                 bytenr = btrfs_sb_offset(i);
6547                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6548                                        cache->key.objectid, bytenr, 0,
6549                                        &logical, &nr, &stripe_len);
6550                 if (ret)
6551                         return ret;
6552
6553                 while (nr--) {
6554                         if (logical[nr] + stripe_len <= offset)
6555                                 continue;
6556                         if (offset + bytes <= logical[nr])
6557                                 continue;
6558                         if (logical[nr] == offset) {
6559                                 if (stripe_len >= bytes) {
6560                                         free(logical);
6561                                         return 0;
6562                                 }
6563                                 bytes -= stripe_len;
6564                                 offset += stripe_len;
6565                         } else if (logical[nr] < offset) {
6566                                 if (logical[nr] + stripe_len >=
6567                                     offset + bytes) {
6568                                         free(logical);
6569                                         return 0;
6570                                 }
6571                                 bytes = (offset + bytes) -
6572                                         (logical[nr] + stripe_len);
6573                                 offset = logical[nr] + stripe_len;
6574                         } else {
6575                                 /*
6576                                  * Could be tricky, the super may land in the
6577                                  * middle of the area we're checking.  First
6578                                  * check the easiest case, it's at the end.
6579                                  */
6580                                 if (logical[nr] + stripe_len >=
6581                                     bytes + offset) {
6582                                         bytes = logical[nr] - offset;
6583                                         continue;
6584                                 }
6585
6586                                 /* Check the left side */
6587                                 ret = check_cache_range(root, cache,
6588                                                         offset,
6589                                                         logical[nr] - offset);
6590                                 if (ret) {
6591                                         free(logical);
6592                                         return ret;
6593                                 }
6594
6595                                 /* Now we continue with the right side */
6596                                 bytes = (offset + bytes) -
6597                                         (logical[nr] + stripe_len);
6598                                 offset = logical[nr] + stripe_len;
6599                         }
6600                 }
6601
6602                 free(logical);
6603         }
6604
6605         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6606         if (!entry) {
6607                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6608                         offset, offset+bytes);
6609                 return -EINVAL;
6610         }
6611
6612         if (entry->offset != offset) {
6613                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6614                         entry->offset);
6615                 return -EINVAL;
6616         }
6617
6618         if (entry->bytes != bytes) {
6619                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6620                         bytes, entry->bytes, offset);
6621                 return -EINVAL;
6622         }
6623
6624         unlink_free_space(cache->free_space_ctl, entry);
6625         free(entry);
6626         return 0;
6627 }
6628
6629 static int verify_space_cache(struct btrfs_root *root,
6630                               struct btrfs_block_group_cache *cache)
6631 {
6632         struct btrfs_path path;
6633         struct extent_buffer *leaf;
6634         struct btrfs_key key;
6635         u64 last;
6636         int ret = 0;
6637
6638         root = root->fs_info->extent_root;
6639
6640         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6641
6642         btrfs_init_path(&path);
6643         key.objectid = last;
6644         key.offset = 0;
6645         key.type = BTRFS_EXTENT_ITEM_KEY;
6646         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6647         if (ret < 0)
6648                 goto out;
6649         ret = 0;
6650         while (1) {
6651                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6652                         ret = btrfs_next_leaf(root, &path);
6653                         if (ret < 0)
6654                                 goto out;
6655                         if (ret > 0) {
6656                                 ret = 0;
6657                                 break;
6658                         }
6659                 }
6660                 leaf = path.nodes[0];
6661                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6662                 if (key.objectid >= cache->key.offset + cache->key.objectid)
6663                         break;
6664                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6665                     key.type != BTRFS_METADATA_ITEM_KEY) {
6666                         path.slots[0]++;
6667                         continue;
6668                 }
6669
6670                 if (last == key.objectid) {
6671                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
6672                                 last = key.objectid + key.offset;
6673                         else
6674                                 last = key.objectid + root->nodesize;
6675                         path.slots[0]++;
6676                         continue;
6677                 }
6678
6679                 ret = check_cache_range(root, cache, last,
6680                                         key.objectid - last);
6681                 if (ret)
6682                         break;
6683                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6684                         last = key.objectid + key.offset;
6685                 else
6686                         last = key.objectid + root->nodesize;
6687                 path.slots[0]++;
6688         }
6689
6690         if (last < cache->key.objectid + cache->key.offset)
6691                 ret = check_cache_range(root, cache, last,
6692                                         cache->key.objectid +
6693                                         cache->key.offset - last);
6694
6695 out:
6696         btrfs_release_path(&path);
6697
6698         if (!ret &&
6699             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6700                 fprintf(stderr, "There are still entries left in the space "
6701                         "cache\n");
6702                 ret = -EINVAL;
6703         }
6704
6705         return ret;
6706 }
6707
6708 static int check_space_cache(struct btrfs_root *root)
6709 {
6710         struct btrfs_block_group_cache *cache;
6711         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
6712         int ret;
6713         int error = 0;
6714
6715         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
6716             btrfs_super_generation(root->fs_info->super_copy) !=
6717             btrfs_super_cache_generation(root->fs_info->super_copy)) {
6718                 printf("cache and super generation don't match, space cache "
6719                        "will be invalidated\n");
6720                 return 0;
6721         }
6722
6723         if (ctx.progress_enabled) {
6724                 ctx.tp = TASK_FREE_SPACE;
6725                 task_start(ctx.info);
6726         }
6727
6728         while (1) {
6729                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
6730                 if (!cache)
6731                         break;
6732
6733                 start = cache->key.objectid + cache->key.offset;
6734                 if (!cache->free_space_ctl) {
6735                         if (btrfs_init_free_space_ctl(cache,
6736                                                       root->sectorsize)) {
6737                                 ret = -ENOMEM;
6738                                 break;
6739                         }
6740                 } else {
6741                         btrfs_remove_free_space_cache(cache);
6742                 }
6743
6744                 if (btrfs_fs_compat_ro(root->fs_info,
6745                                        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
6746                         ret = exclude_super_stripes(root, cache);
6747                         if (ret) {
6748                                 fprintf(stderr, "could not exclude super stripes: %s\n",
6749                                         strerror(-ret));
6750                                 error++;
6751                                 continue;
6752                         }
6753                         ret = load_free_space_tree(root->fs_info, cache);
6754                         free_excluded_extents(root, cache);
6755                         if (ret < 0) {
6756                                 fprintf(stderr, "could not load free space tree: %s\n",
6757                                         strerror(-ret));
6758                                 error++;
6759                                 continue;
6760                         }
6761                         error += ret;
6762                 } else {
6763                         ret = load_free_space_cache(root->fs_info, cache);
6764                         if (!ret)
6765                                 continue;
6766                 }
6767
6768                 ret = verify_space_cache(root, cache);
6769                 if (ret) {
6770                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
6771                                 cache->key.objectid);
6772                         error++;
6773                 }
6774         }
6775
6776         task_stop(ctx.info);
6777
6778         return error ? -EINVAL : 0;
6779 }
6780
6781 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
6782                         u64 num_bytes, unsigned long leaf_offset,
6783                         struct extent_buffer *eb) {
6784
6785         u64 offset = 0;
6786         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6787         char *data;
6788         unsigned long csum_offset;
6789         u32 csum;
6790         u32 csum_expected;
6791         u64 read_len;
6792         u64 data_checked = 0;
6793         u64 tmp;
6794         int ret = 0;
6795         int mirror;
6796         int num_copies;
6797
6798         if (num_bytes % root->sectorsize)
6799                 return -EINVAL;
6800
6801         data = malloc(num_bytes);
6802         if (!data)
6803                 return -ENOMEM;
6804
6805         while (offset < num_bytes) {
6806                 mirror = 0;
6807 again:
6808                 read_len = num_bytes - offset;
6809                 /* read as much space once a time */
6810                 ret = read_extent_data(root, data + offset,
6811                                 bytenr + offset, &read_len, mirror);
6812                 if (ret)
6813                         goto out;
6814                 data_checked = 0;
6815                 /* verify every 4k data's checksum */
6816                 while (data_checked < read_len) {
6817                         csum = ~(u32)0;
6818                         tmp = offset + data_checked;
6819
6820                         csum = btrfs_csum_data(NULL, (char *)data + tmp,
6821                                                csum, root->sectorsize);
6822                         btrfs_csum_final(csum, (u8 *)&csum);
6823
6824                         csum_offset = leaf_offset +
6825                                  tmp / root->sectorsize * csum_size;
6826                         read_extent_buffer(eb, (char *)&csum_expected,
6827                                            csum_offset, csum_size);
6828                         /* try another mirror */
6829                         if (csum != csum_expected) {
6830                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
6831                                                 mirror, bytenr + tmp,
6832                                                 csum, csum_expected);
6833                                 num_copies = btrfs_num_copies(
6834                                                 &root->fs_info->mapping_tree,
6835                                                 bytenr, num_bytes);
6836                                 if (mirror < num_copies - 1) {
6837                                         mirror += 1;
6838                                         goto again;
6839                                 }
6840                         }
6841                         data_checked += root->sectorsize;
6842                 }
6843                 offset += read_len;
6844         }
6845 out:
6846         free(data);
6847         return ret;
6848 }
6849
6850 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
6851                                u64 num_bytes)
6852 {
6853         struct btrfs_path path;
6854         struct extent_buffer *leaf;
6855         struct btrfs_key key;
6856         int ret;
6857
6858         btrfs_init_path(&path);
6859         key.objectid = bytenr;
6860         key.type = BTRFS_EXTENT_ITEM_KEY;
6861         key.offset = (u64)-1;
6862
6863 again:
6864         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
6865                                 0, 0);
6866         if (ret < 0) {
6867                 fprintf(stderr, "Error looking up extent record %d\n", ret);
6868                 btrfs_release_path(&path);
6869                 return ret;
6870         } else if (ret) {
6871                 if (path.slots[0] > 0) {
6872                         path.slots[0]--;
6873                 } else {
6874                         ret = btrfs_prev_leaf(root, &path);
6875                         if (ret < 0) {
6876                                 goto out;
6877                         } else if (ret > 0) {
6878                                 ret = 0;
6879                                 goto out;
6880                         }
6881                 }
6882         }
6883
6884         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6885
6886         /*
6887          * Block group items come before extent items if they have the same
6888          * bytenr, so walk back one more just in case.  Dear future traveller,
6889          * first congrats on mastering time travel.  Now if it's not too much
6890          * trouble could you go back to 2006 and tell Chris to make the
6891          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
6892          * EXTENT_ITEM_KEY please?
6893          */
6894         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
6895                 if (path.slots[0] > 0) {
6896                         path.slots[0]--;
6897                 } else {
6898                         ret = btrfs_prev_leaf(root, &path);
6899                         if (ret < 0) {
6900                                 goto out;
6901                         } else if (ret > 0) {
6902                                 ret = 0;
6903                                 goto out;
6904                         }
6905                 }
6906                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6907         }
6908
6909         while (num_bytes) {
6910                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6911                         ret = btrfs_next_leaf(root, &path);
6912                         if (ret < 0) {
6913                                 fprintf(stderr, "Error going to next leaf "
6914                                         "%d\n", ret);
6915                                 btrfs_release_path(&path);
6916                                 return ret;
6917                         } else if (ret) {
6918                                 break;
6919                         }
6920                 }
6921                 leaf = path.nodes[0];
6922                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6923                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
6924                         path.slots[0]++;
6925                         continue;
6926                 }
6927                 if (key.objectid + key.offset < bytenr) {
6928                         path.slots[0]++;
6929                         continue;
6930                 }
6931                 if (key.objectid > bytenr + num_bytes)
6932                         break;
6933
6934                 if (key.objectid == bytenr) {
6935                         if (key.offset >= num_bytes) {
6936                                 num_bytes = 0;
6937                                 break;
6938                         }
6939                         num_bytes -= key.offset;
6940                         bytenr += key.offset;
6941                 } else if (key.objectid < bytenr) {
6942                         if (key.objectid + key.offset >= bytenr + num_bytes) {
6943                                 num_bytes = 0;
6944                                 break;
6945                         }
6946                         num_bytes = (bytenr + num_bytes) -
6947                                 (key.objectid + key.offset);
6948                         bytenr = key.objectid + key.offset;
6949                 } else {
6950                         if (key.objectid + key.offset < bytenr + num_bytes) {
6951                                 u64 new_start = key.objectid + key.offset;
6952                                 u64 new_bytes = bytenr + num_bytes - new_start;
6953
6954                                 /*
6955                                  * Weird case, the extent is in the middle of
6956                                  * our range, we'll have to search one side
6957                                  * and then the other.  Not sure if this happens
6958                                  * in real life, but no harm in coding it up
6959                                  * anyway just in case.
6960                                  */
6961                                 btrfs_release_path(&path);
6962                                 ret = check_extent_exists(root, new_start,
6963                                                           new_bytes);
6964                                 if (ret) {
6965                                         fprintf(stderr, "Right section didn't "
6966                                                 "have a record\n");
6967                                         break;
6968                                 }
6969                                 num_bytes = key.objectid - bytenr;
6970                                 goto again;
6971                         }
6972                         num_bytes = key.objectid - bytenr;
6973                 }
6974                 path.slots[0]++;
6975         }
6976         ret = 0;
6977
6978 out:
6979         if (num_bytes && !ret) {
6980                 fprintf(stderr, "There are no extents for csum range "
6981                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
6982                 ret = 1;
6983         }
6984
6985         btrfs_release_path(&path);
6986         return ret;
6987 }
6988
6989 static int check_csums(struct btrfs_root *root)
6990 {
6991         struct btrfs_path path;
6992         struct extent_buffer *leaf;
6993         struct btrfs_key key;
6994         u64 offset = 0, num_bytes = 0;
6995         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6996         int errors = 0;
6997         int ret;
6998         u64 data_len;
6999         unsigned long leaf_offset;
7000
7001         root = root->fs_info->csum_root;
7002         if (!extent_buffer_uptodate(root->node)) {
7003                 fprintf(stderr, "No valid csum tree found\n");
7004                 return -ENOENT;
7005         }
7006
7007         btrfs_init_path(&path);
7008         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7009         key.type = BTRFS_EXTENT_CSUM_KEY;
7010         key.offset = 0;
7011         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7012         if (ret < 0) {
7013                 fprintf(stderr, "Error searching csum tree %d\n", ret);
7014                 btrfs_release_path(&path);
7015                 return ret;
7016         }
7017
7018         if (ret > 0 && path.slots[0])
7019                 path.slots[0]--;
7020         ret = 0;
7021
7022         while (1) {
7023                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7024                         ret = btrfs_next_leaf(root, &path);
7025                         if (ret < 0) {
7026                                 fprintf(stderr, "Error going to next leaf "
7027                                         "%d\n", ret);
7028                                 break;
7029                         }
7030                         if (ret)
7031                                 break;
7032                 }
7033                 leaf = path.nodes[0];
7034
7035                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7036                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7037                         path.slots[0]++;
7038                         continue;
7039                 }
7040
7041                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7042                               csum_size) * root->sectorsize;
7043                 if (!check_data_csum)
7044                         goto skip_csum_check;
7045                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7046                 ret = check_extent_csums(root, key.offset, data_len,
7047                                          leaf_offset, leaf);
7048                 if (ret)
7049                         break;
7050 skip_csum_check:
7051                 if (!num_bytes) {
7052                         offset = key.offset;
7053                 } else if (key.offset != offset + num_bytes) {
7054                         ret = check_extent_exists(root, offset, num_bytes);
7055                         if (ret) {
7056                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7057                                         "there is no extent record\n",
7058                                         offset, offset+num_bytes);
7059                                 errors++;
7060                         }
7061                         offset = key.offset;
7062                         num_bytes = 0;
7063                 }
7064                 num_bytes += data_len;
7065                 path.slots[0]++;
7066         }
7067
7068         btrfs_release_path(&path);
7069         return errors;
7070 }
7071
7072 static int is_dropped_key(struct btrfs_key *key,
7073                           struct btrfs_key *drop_key) {
7074         if (key->objectid < drop_key->objectid)
7075                 return 1;
7076         else if (key->objectid == drop_key->objectid) {
7077                 if (key->type < drop_key->type)
7078                         return 1;
7079                 else if (key->type == drop_key->type) {
7080                         if (key->offset < drop_key->offset)
7081                                 return 1;
7082                 }
7083         }
7084         return 0;
7085 }
7086
7087 /*
7088  * Here are the rules for FULL_BACKREF.
7089  *
7090  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7091  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7092  *      FULL_BACKREF set.
7093  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
7094  *    if it happened after the relocation occurred since we'll have dropped the
7095  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7096  *    have no real way to know for sure.
7097  *
7098  * We process the blocks one root at a time, and we start from the lowest root
7099  * objectid and go to the highest.  So we can just lookup the owner backref for
7100  * the record and if we don't find it then we know it doesn't exist and we have
7101  * a FULL BACKREF.
7102  *
7103  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7104  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7105  * be set or not and then we can check later once we've gathered all the refs.
7106  */
7107 static int calc_extent_flag(struct btrfs_root *root,
7108                            struct cache_tree *extent_cache,
7109                            struct extent_buffer *buf,
7110                            struct root_item_record *ri,
7111                            u64 *flags)
7112 {
7113         struct extent_record *rec;
7114         struct cache_extent *cache;
7115         struct tree_backref *tback;
7116         u64 owner = 0;
7117
7118         cache = lookup_cache_extent(extent_cache, buf->start, 1);
7119         /* we have added this extent before */
7120         if (!cache)
7121                 return -ENOENT;
7122
7123         rec = container_of(cache, struct extent_record, cache);
7124
7125         /*
7126          * Except file/reloc tree, we can not have
7127          * FULL BACKREF MODE
7128          */
7129         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7130                 goto normal;
7131         /*
7132          * root node
7133          */
7134         if (buf->start == ri->bytenr)
7135                 goto normal;
7136
7137         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7138                 goto full_backref;
7139
7140         owner = btrfs_header_owner(buf);
7141         if (owner == ri->objectid)
7142                 goto normal;
7143
7144         tback = find_tree_backref(rec, 0, owner);
7145         if (!tback)
7146                 goto full_backref;
7147 normal:
7148         *flags = 0;
7149         if (rec->flag_block_full_backref != FLAG_UNSET &&
7150             rec->flag_block_full_backref != 0)
7151                 rec->bad_full_backref = 1;
7152         return 0;
7153 full_backref:
7154         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7155         if (rec->flag_block_full_backref != FLAG_UNSET &&
7156             rec->flag_block_full_backref != 1)
7157                 rec->bad_full_backref = 1;
7158         return 0;
7159 }
7160
7161 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7162 {
7163         fprintf(stderr, "Invalid key type(");
7164         print_key_type(stderr, 0, key_type);
7165         fprintf(stderr, ") found in root(");
7166         print_objectid(stderr, rootid, 0);
7167         fprintf(stderr, ")\n");
7168 }
7169
7170 /*
7171  * Check if the key is valid with its extent buffer.
7172  *
7173  * This is a early check in case invalid key exists in a extent buffer
7174  * This is not comprehensive yet, but should prevent wrong key/item passed
7175  * further
7176  */
7177 static int check_type_with_root(u64 rootid, u8 key_type)
7178 {
7179         switch (key_type) {
7180         /* Only valid in chunk tree */
7181         case BTRFS_DEV_ITEM_KEY:
7182         case BTRFS_CHUNK_ITEM_KEY:
7183                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7184                         goto err;
7185                 break;
7186         /* valid in csum and log tree */
7187         case BTRFS_CSUM_TREE_OBJECTID:
7188                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7189                       is_fstree(rootid)))
7190                         goto err;
7191                 break;
7192         case BTRFS_EXTENT_ITEM_KEY:
7193         case BTRFS_METADATA_ITEM_KEY:
7194         case BTRFS_BLOCK_GROUP_ITEM_KEY:
7195                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7196                         goto err;
7197                 break;
7198         case BTRFS_ROOT_ITEM_KEY:
7199                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7200                         goto err;
7201                 break;
7202         case BTRFS_DEV_EXTENT_KEY:
7203                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7204                         goto err;
7205                 break;
7206         }
7207         return 0;
7208 err:
7209         report_mismatch_key_root(key_type, rootid);
7210         return -EINVAL;
7211 }
7212
7213 static int run_next_block(struct btrfs_root *root,
7214                           struct block_info *bits,
7215                           int bits_nr,
7216                           u64 *last,
7217                           struct cache_tree *pending,
7218                           struct cache_tree *seen,
7219                           struct cache_tree *reada,
7220                           struct cache_tree *nodes,
7221                           struct cache_tree *extent_cache,
7222                           struct cache_tree *chunk_cache,
7223                           struct rb_root *dev_cache,
7224                           struct block_group_tree *block_group_cache,
7225                           struct device_extent_tree *dev_extent_cache,
7226                           struct root_item_record *ri)
7227 {
7228         struct extent_buffer *buf;
7229         struct extent_record *rec = NULL;
7230         u64 bytenr;
7231         u32 size;
7232         u64 parent;
7233         u64 owner;
7234         u64 flags;
7235         u64 ptr;
7236         u64 gen = 0;
7237         int ret = 0;
7238         int i;
7239         int nritems;
7240         struct btrfs_key key;
7241         struct cache_extent *cache;
7242         int reada_bits;
7243
7244         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7245                                     bits_nr, &reada_bits);
7246         if (nritems == 0)
7247                 return 1;
7248
7249         if (!reada_bits) {
7250                 for(i = 0; i < nritems; i++) {
7251                         ret = add_cache_extent(reada, bits[i].start,
7252                                                bits[i].size);
7253                         if (ret == -EEXIST)
7254                                 continue;
7255
7256                         /* fixme, get the parent transid */
7257                         readahead_tree_block(root, bits[i].start,
7258                                              bits[i].size, 0);
7259                 }
7260         }
7261         *last = bits[0].start;
7262         bytenr = bits[0].start;
7263         size = bits[0].size;
7264
7265         cache = lookup_cache_extent(pending, bytenr, size);
7266         if (cache) {
7267                 remove_cache_extent(pending, cache);
7268                 free(cache);
7269         }
7270         cache = lookup_cache_extent(reada, bytenr, size);
7271         if (cache) {
7272                 remove_cache_extent(reada, cache);
7273                 free(cache);
7274         }
7275         cache = lookup_cache_extent(nodes, bytenr, size);
7276         if (cache) {
7277                 remove_cache_extent(nodes, cache);
7278                 free(cache);
7279         }
7280         cache = lookup_cache_extent(extent_cache, bytenr, size);
7281         if (cache) {
7282                 rec = container_of(cache, struct extent_record, cache);
7283                 gen = rec->parent_generation;
7284         }
7285
7286         /* fixme, get the real parent transid */
7287         buf = read_tree_block(root, bytenr, size, gen);
7288         if (!extent_buffer_uptodate(buf)) {
7289                 record_bad_block_io(root->fs_info,
7290                                     extent_cache, bytenr, size);
7291                 goto out;
7292         }
7293
7294         nritems = btrfs_header_nritems(buf);
7295
7296         flags = 0;
7297         if (!init_extent_tree) {
7298                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7299                                        btrfs_header_level(buf), 1, NULL,
7300                                        &flags);
7301                 if (ret < 0) {
7302                         ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7303                         if (ret < 0) {
7304                                 fprintf(stderr, "Couldn't calc extent flags\n");
7305                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7306                         }
7307                 }
7308         } else {
7309                 flags = 0;
7310                 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7311                 if (ret < 0) {
7312                         fprintf(stderr, "Couldn't calc extent flags\n");
7313                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7314                 }
7315         }
7316
7317         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7318                 if (ri != NULL &&
7319                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7320                     ri->objectid == btrfs_header_owner(buf)) {
7321                         /*
7322                          * Ok we got to this block from it's original owner and
7323                          * we have FULL_BACKREF set.  Relocation can leave
7324                          * converted blocks over so this is altogether possible,
7325                          * however it's not possible if the generation > the
7326                          * last snapshot, so check for this case.
7327                          */
7328                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7329                             btrfs_header_generation(buf) > ri->last_snapshot) {
7330                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7331                                 rec->bad_full_backref = 1;
7332                         }
7333                 }
7334         } else {
7335                 if (ri != NULL &&
7336                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7337                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7338                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7339                         rec->bad_full_backref = 1;
7340                 }
7341         }
7342
7343         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7344                 rec->flag_block_full_backref = 1;
7345                 parent = bytenr;
7346                 owner = 0;
7347         } else {
7348                 rec->flag_block_full_backref = 0;
7349                 parent = 0;
7350                 owner = btrfs_header_owner(buf);
7351         }
7352
7353         ret = check_block(root, extent_cache, buf, flags);
7354         if (ret)
7355                 goto out;
7356
7357         if (btrfs_is_leaf(buf)) {
7358                 btree_space_waste += btrfs_leaf_free_space(root, buf);
7359                 for (i = 0; i < nritems; i++) {
7360                         struct btrfs_file_extent_item *fi;
7361                         btrfs_item_key_to_cpu(buf, &key, i);
7362                         /*
7363                          * Check key type against the leaf owner.
7364                          * Could filter quite a lot of early error if
7365                          * owner is correct
7366                          */
7367                         if (check_type_with_root(btrfs_header_owner(buf),
7368                                                  key.type)) {
7369                                 fprintf(stderr, "ignoring invalid key\n");
7370                                 continue;
7371                         }
7372                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7373                                 process_extent_item(root, extent_cache, buf,
7374                                                     i);
7375                                 continue;
7376                         }
7377                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7378                                 process_extent_item(root, extent_cache, buf,
7379                                                     i);
7380                                 continue;
7381                         }
7382                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7383                                 total_csum_bytes +=
7384                                         btrfs_item_size_nr(buf, i);
7385                                 continue;
7386                         }
7387                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7388                                 process_chunk_item(chunk_cache, &key, buf, i);
7389                                 continue;
7390                         }
7391                         if (key.type == BTRFS_DEV_ITEM_KEY) {
7392                                 process_device_item(dev_cache, &key, buf, i);
7393                                 continue;
7394                         }
7395                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7396                                 process_block_group_item(block_group_cache,
7397                                         &key, buf, i);
7398                                 continue;
7399                         }
7400                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
7401                                 process_device_extent_item(dev_extent_cache,
7402                                         &key, buf, i);
7403                                 continue;
7404
7405                         }
7406                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7407 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7408                                 process_extent_ref_v0(extent_cache, buf, i);
7409 #else
7410                                 BUG();
7411 #endif
7412                                 continue;
7413                         }
7414
7415                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7416                                 ret = add_tree_backref(extent_cache,
7417                                                 key.objectid, 0, key.offset, 0);
7418                                 if (ret < 0)
7419                                         error("add_tree_backref failed: %s",
7420                                               strerror(-ret));
7421                                 continue;
7422                         }
7423                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7424                                 ret = add_tree_backref(extent_cache,
7425                                                 key.objectid, key.offset, 0, 0);
7426                                 if (ret < 0)
7427                                         error("add_tree_backref failed: %s",
7428                                               strerror(-ret));
7429                                 continue;
7430                         }
7431                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7432                                 struct btrfs_extent_data_ref *ref;
7433                                 ref = btrfs_item_ptr(buf, i,
7434                                                 struct btrfs_extent_data_ref);
7435                                 add_data_backref(extent_cache,
7436                                         key.objectid, 0,
7437                                         btrfs_extent_data_ref_root(buf, ref),
7438                                         btrfs_extent_data_ref_objectid(buf,
7439                                                                        ref),
7440                                         btrfs_extent_data_ref_offset(buf, ref),
7441                                         btrfs_extent_data_ref_count(buf, ref),
7442                                         0, root->sectorsize);
7443                                 continue;
7444                         }
7445                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7446                                 struct btrfs_shared_data_ref *ref;
7447                                 ref = btrfs_item_ptr(buf, i,
7448                                                 struct btrfs_shared_data_ref);
7449                                 add_data_backref(extent_cache,
7450                                         key.objectid, key.offset, 0, 0, 0,
7451                                         btrfs_shared_data_ref_count(buf, ref),
7452                                         0, root->sectorsize);
7453                                 continue;
7454                         }
7455                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7456                                 struct bad_item *bad;
7457
7458                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7459                                         continue;
7460                                 if (!owner)
7461                                         continue;
7462                                 bad = malloc(sizeof(struct bad_item));
7463                                 if (!bad)
7464                                         continue;
7465                                 INIT_LIST_HEAD(&bad->list);
7466                                 memcpy(&bad->key, &key,
7467                                        sizeof(struct btrfs_key));
7468                                 bad->root_id = owner;
7469                                 list_add_tail(&bad->list, &delete_items);
7470                                 continue;
7471                         }
7472                         if (key.type != BTRFS_EXTENT_DATA_KEY)
7473                                 continue;
7474                         fi = btrfs_item_ptr(buf, i,
7475                                             struct btrfs_file_extent_item);
7476                         if (btrfs_file_extent_type(buf, fi) ==
7477                             BTRFS_FILE_EXTENT_INLINE)
7478                                 continue;
7479                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7480                                 continue;
7481
7482                         data_bytes_allocated +=
7483                                 btrfs_file_extent_disk_num_bytes(buf, fi);
7484                         if (data_bytes_allocated < root->sectorsize) {
7485                                 abort();
7486                         }
7487                         data_bytes_referenced +=
7488                                 btrfs_file_extent_num_bytes(buf, fi);
7489                         add_data_backref(extent_cache,
7490                                 btrfs_file_extent_disk_bytenr(buf, fi),
7491                                 parent, owner, key.objectid, key.offset -
7492                                 btrfs_file_extent_offset(buf, fi), 1, 1,
7493                                 btrfs_file_extent_disk_num_bytes(buf, fi));
7494                 }
7495         } else {
7496                 int level;
7497                 struct btrfs_key first_key;
7498
7499                 first_key.objectid = 0;
7500
7501                 if (nritems > 0)
7502                         btrfs_item_key_to_cpu(buf, &first_key, 0);
7503                 level = btrfs_header_level(buf);
7504                 for (i = 0; i < nritems; i++) {
7505                         struct extent_record tmpl;
7506
7507                         ptr = btrfs_node_blockptr(buf, i);
7508                         size = root->nodesize;
7509                         btrfs_node_key_to_cpu(buf, &key, i);
7510                         if (ri != NULL) {
7511                                 if ((level == ri->drop_level)
7512                                     && is_dropped_key(&key, &ri->drop_key)) {
7513                                         continue;
7514                                 }
7515                         }
7516
7517                         memset(&tmpl, 0, sizeof(tmpl));
7518                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7519                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7520                         tmpl.start = ptr;
7521                         tmpl.nr = size;
7522                         tmpl.refs = 1;
7523                         tmpl.metadata = 1;
7524                         tmpl.max_size = size;
7525                         ret = add_extent_rec(extent_cache, &tmpl);
7526                         if (ret < 0)
7527                                 goto out;
7528
7529                         ret = add_tree_backref(extent_cache, ptr, parent,
7530                                         owner, 1);
7531                         if (ret < 0) {
7532                                 error("add_tree_backref failed: %s",
7533                                       strerror(-ret));
7534                                 continue;
7535                         }
7536
7537                         if (level > 1) {
7538                                 add_pending(nodes, seen, ptr, size);
7539                         } else {
7540                                 add_pending(pending, seen, ptr, size);
7541                         }
7542                 }
7543                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7544                                       nritems) * sizeof(struct btrfs_key_ptr);
7545         }
7546         total_btree_bytes += buf->len;
7547         if (fs_root_objectid(btrfs_header_owner(buf)))
7548                 total_fs_tree_bytes += buf->len;
7549         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7550                 total_extent_tree_bytes += buf->len;
7551         if (!found_old_backref &&
7552             btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7553             btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7554             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7555                 found_old_backref = 1;
7556 out:
7557         free_extent_buffer(buf);
7558         return ret;
7559 }
7560
7561 static int add_root_to_pending(struct extent_buffer *buf,
7562                                struct cache_tree *extent_cache,
7563                                struct cache_tree *pending,
7564                                struct cache_tree *seen,
7565                                struct cache_tree *nodes,
7566                                u64 objectid)
7567 {
7568         struct extent_record tmpl;
7569         int ret;
7570
7571         if (btrfs_header_level(buf) > 0)
7572                 add_pending(nodes, seen, buf->start, buf->len);
7573         else
7574                 add_pending(pending, seen, buf->start, buf->len);
7575
7576         memset(&tmpl, 0, sizeof(tmpl));
7577         tmpl.start = buf->start;
7578         tmpl.nr = buf->len;
7579         tmpl.is_root = 1;
7580         tmpl.refs = 1;
7581         tmpl.metadata = 1;
7582         tmpl.max_size = buf->len;
7583         add_extent_rec(extent_cache, &tmpl);
7584
7585         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7586             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7587                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7588                                 0, 1);
7589         else
7590                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7591                                 1);
7592         return ret;
7593 }
7594
7595 /* as we fix the tree, we might be deleting blocks that
7596  * we're tracking for repair.  This hook makes sure we
7597  * remove any backrefs for blocks as we are fixing them.
7598  */
7599 static int free_extent_hook(struct btrfs_trans_handle *trans,
7600                             struct btrfs_root *root,
7601                             u64 bytenr, u64 num_bytes, u64 parent,
7602                             u64 root_objectid, u64 owner, u64 offset,
7603                             int refs_to_drop)
7604 {
7605         struct extent_record *rec;
7606         struct cache_extent *cache;
7607         int is_data;
7608         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7609
7610         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7611         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7612         if (!cache)
7613                 return 0;
7614
7615         rec = container_of(cache, struct extent_record, cache);
7616         if (is_data) {
7617                 struct data_backref *back;
7618                 back = find_data_backref(rec, parent, root_objectid, owner,
7619                                          offset, 1, bytenr, num_bytes);
7620                 if (!back)
7621                         goto out;
7622                 if (back->node.found_ref) {
7623                         back->found_ref -= refs_to_drop;
7624                         if (rec->refs)
7625                                 rec->refs -= refs_to_drop;
7626                 }
7627                 if (back->node.found_extent_tree) {
7628                         back->num_refs -= refs_to_drop;
7629                         if (rec->extent_item_refs)
7630                                 rec->extent_item_refs -= refs_to_drop;
7631                 }
7632                 if (back->found_ref == 0)
7633                         back->node.found_ref = 0;
7634                 if (back->num_refs == 0)
7635                         back->node.found_extent_tree = 0;
7636
7637                 if (!back->node.found_extent_tree && back->node.found_ref) {
7638                         list_del(&back->node.list);
7639                         free(back);
7640                 }
7641         } else {
7642                 struct tree_backref *back;
7643                 back = find_tree_backref(rec, parent, root_objectid);
7644                 if (!back)
7645                         goto out;
7646                 if (back->node.found_ref) {
7647                         if (rec->refs)
7648                                 rec->refs--;
7649                         back->node.found_ref = 0;
7650                 }
7651                 if (back->node.found_extent_tree) {
7652                         if (rec->extent_item_refs)
7653                                 rec->extent_item_refs--;
7654                         back->node.found_extent_tree = 0;
7655                 }
7656                 if (!back->node.found_extent_tree && back->node.found_ref) {
7657                         list_del(&back->node.list);
7658                         free(back);
7659                 }
7660         }
7661         maybe_free_extent_rec(extent_cache, rec);
7662 out:
7663         return 0;
7664 }
7665
7666 static int delete_extent_records(struct btrfs_trans_handle *trans,
7667                                  struct btrfs_root *root,
7668                                  struct btrfs_path *path,
7669                                  u64 bytenr, u64 new_len)
7670 {
7671         struct btrfs_key key;
7672         struct btrfs_key found_key;
7673         struct extent_buffer *leaf;
7674         int ret;
7675         int slot;
7676
7677
7678         key.objectid = bytenr;
7679         key.type = (u8)-1;
7680         key.offset = (u64)-1;
7681
7682         while(1) {
7683                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7684                                         &key, path, 0, 1);
7685                 if (ret < 0)
7686                         break;
7687
7688                 if (ret > 0) {
7689                         ret = 0;
7690                         if (path->slots[0] == 0)
7691                                 break;
7692                         path->slots[0]--;
7693                 }
7694                 ret = 0;
7695
7696                 leaf = path->nodes[0];
7697                 slot = path->slots[0];
7698
7699                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7700                 if (found_key.objectid != bytenr)
7701                         break;
7702
7703                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
7704                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
7705                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7706                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
7707                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
7708                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
7709                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
7710                         btrfs_release_path(path);
7711                         if (found_key.type == 0) {
7712                                 if (found_key.offset == 0)
7713                                         break;
7714                                 key.offset = found_key.offset - 1;
7715                                 key.type = found_key.type;
7716                         }
7717                         key.type = found_key.type - 1;
7718                         key.offset = (u64)-1;
7719                         continue;
7720                 }
7721
7722                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
7723                         found_key.objectid, found_key.type, found_key.offset);
7724
7725                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
7726                 if (ret)
7727                         break;
7728                 btrfs_release_path(path);
7729
7730                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
7731                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
7732                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
7733                                 found_key.offset : root->nodesize;
7734
7735                         ret = btrfs_update_block_group(trans, root, bytenr,
7736                                                        bytes, 0, 0);
7737                         if (ret)
7738                                 break;
7739                 }
7740         }
7741
7742         btrfs_release_path(path);
7743         return ret;
7744 }
7745
7746 /*
7747  * for a single backref, this will allocate a new extent
7748  * and add the backref to it.
7749  */
7750 static int record_extent(struct btrfs_trans_handle *trans,
7751                          struct btrfs_fs_info *info,
7752                          struct btrfs_path *path,
7753                          struct extent_record *rec,
7754                          struct extent_backref *back,
7755                          int allocated, u64 flags)
7756 {
7757         int ret;
7758         struct btrfs_root *extent_root = info->extent_root;
7759         struct extent_buffer *leaf;
7760         struct btrfs_key ins_key;
7761         struct btrfs_extent_item *ei;
7762         struct data_backref *dback;
7763         struct btrfs_tree_block_info *bi;
7764
7765         if (!back->is_data)
7766                 rec->max_size = max_t(u64, rec->max_size,
7767                                     info->extent_root->nodesize);
7768
7769         if (!allocated) {
7770                 u32 item_size = sizeof(*ei);
7771
7772                 if (!back->is_data)
7773                         item_size += sizeof(*bi);
7774
7775                 ins_key.objectid = rec->start;
7776                 ins_key.offset = rec->max_size;
7777                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
7778
7779                 ret = btrfs_insert_empty_item(trans, extent_root, path,
7780                                         &ins_key, item_size);
7781                 if (ret)
7782                         goto fail;
7783
7784                 leaf = path->nodes[0];
7785                 ei = btrfs_item_ptr(leaf, path->slots[0],
7786                                     struct btrfs_extent_item);
7787
7788                 btrfs_set_extent_refs(leaf, ei, 0);
7789                 btrfs_set_extent_generation(leaf, ei, rec->generation);
7790
7791                 if (back->is_data) {
7792                         btrfs_set_extent_flags(leaf, ei,
7793                                                BTRFS_EXTENT_FLAG_DATA);
7794                 } else {
7795                         struct btrfs_disk_key copy_key;;
7796
7797                         bi = (struct btrfs_tree_block_info *)(ei + 1);
7798                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
7799                                              sizeof(*bi));
7800
7801                         btrfs_set_disk_key_objectid(&copy_key,
7802                                                     rec->info_objectid);
7803                         btrfs_set_disk_key_type(&copy_key, 0);
7804                         btrfs_set_disk_key_offset(&copy_key, 0);
7805
7806                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
7807                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
7808
7809                         btrfs_set_extent_flags(leaf, ei,
7810                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
7811                 }
7812
7813                 btrfs_mark_buffer_dirty(leaf);
7814                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
7815                                                rec->max_size, 1, 0);
7816                 if (ret)
7817                         goto fail;
7818                 btrfs_release_path(path);
7819         }
7820
7821         if (back->is_data) {
7822                 u64 parent;
7823                 int i;
7824
7825                 dback = to_data_backref(back);
7826                 if (back->full_backref)
7827                         parent = dback->parent;
7828                 else
7829                         parent = 0;
7830
7831                 for (i = 0; i < dback->found_ref; i++) {
7832                         /* if parent != 0, we're doing a full backref
7833                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
7834                          * just makes the backref allocator create a data
7835                          * backref
7836                          */
7837                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
7838                                                    rec->start, rec->max_size,
7839                                                    parent,
7840                                                    dback->root,
7841                                                    parent ?
7842                                                    BTRFS_FIRST_FREE_OBJECTID :
7843                                                    dback->owner,
7844                                                    dback->offset);
7845                         if (ret)
7846                                 break;
7847                 }
7848                 fprintf(stderr, "adding new data backref"
7849                                 " on %llu %s %llu owner %llu"
7850                                 " offset %llu found %d\n",
7851                                 (unsigned long long)rec->start,
7852                                 back->full_backref ?
7853                                 "parent" : "root",
7854                                 back->full_backref ?
7855                                 (unsigned long long)parent :
7856                                 (unsigned long long)dback->root,
7857                                 (unsigned long long)dback->owner,
7858                                 (unsigned long long)dback->offset,
7859                                 dback->found_ref);
7860         } else {
7861                 u64 parent;
7862                 struct tree_backref *tback;
7863
7864                 tback = to_tree_backref(back);
7865                 if (back->full_backref)
7866                         parent = tback->parent;
7867                 else
7868                         parent = 0;
7869
7870                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
7871                                            rec->start, rec->max_size,
7872                                            parent, tback->root, 0, 0);
7873                 fprintf(stderr, "adding new tree backref on "
7874                         "start %llu len %llu parent %llu root %llu\n",
7875                         rec->start, rec->max_size, parent, tback->root);
7876         }
7877 fail:
7878         btrfs_release_path(path);
7879         return ret;
7880 }
7881
7882 static struct extent_entry *find_entry(struct list_head *entries,
7883                                        u64 bytenr, u64 bytes)
7884 {
7885         struct extent_entry *entry = NULL;
7886
7887         list_for_each_entry(entry, entries, list) {
7888                 if (entry->bytenr == bytenr && entry->bytes == bytes)
7889                         return entry;
7890         }
7891
7892         return NULL;
7893 }
7894
7895 static struct extent_entry *find_most_right_entry(struct list_head *entries)
7896 {
7897         struct extent_entry *entry, *best = NULL, *prev = NULL;
7898
7899         list_for_each_entry(entry, entries, list) {
7900                 /*
7901                  * If there are as many broken entries as entries then we know
7902                  * not to trust this particular entry.
7903                  */
7904                 if (entry->broken == entry->count)
7905                         continue;
7906
7907                 /*
7908                  * Special case, when there are only two entries and 'best' is
7909                  * the first one
7910                  */
7911                 if (!prev) {
7912                         best = entry;
7913                         prev = entry;
7914                         continue;
7915                 }
7916
7917                 /*
7918                  * If our current entry == best then we can't be sure our best
7919                  * is really the best, so we need to keep searching.
7920                  */
7921                 if (best && best->count == entry->count) {
7922                         prev = entry;
7923                         best = NULL;
7924                         continue;
7925                 }
7926
7927                 /* Prev == entry, not good enough, have to keep searching */
7928                 if (!prev->broken && prev->count == entry->count)
7929                         continue;
7930
7931                 if (!best)
7932                         best = (prev->count > entry->count) ? prev : entry;
7933                 else if (best->count < entry->count)
7934                         best = entry;
7935                 prev = entry;
7936         }
7937
7938         return best;
7939 }
7940
7941 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
7942                       struct data_backref *dback, struct extent_entry *entry)
7943 {
7944         struct btrfs_trans_handle *trans;
7945         struct btrfs_root *root;
7946         struct btrfs_file_extent_item *fi;
7947         struct extent_buffer *leaf;
7948         struct btrfs_key key;
7949         u64 bytenr, bytes;
7950         int ret, err;
7951
7952         key.objectid = dback->root;
7953         key.type = BTRFS_ROOT_ITEM_KEY;
7954         key.offset = (u64)-1;
7955         root = btrfs_read_fs_root(info, &key);
7956         if (IS_ERR(root)) {
7957                 fprintf(stderr, "Couldn't find root for our ref\n");
7958                 return -EINVAL;
7959         }
7960
7961         /*
7962          * The backref points to the original offset of the extent if it was
7963          * split, so we need to search down to the offset we have and then walk
7964          * forward until we find the backref we're looking for.
7965          */
7966         key.objectid = dback->owner;
7967         key.type = BTRFS_EXTENT_DATA_KEY;
7968         key.offset = dback->offset;
7969         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7970         if (ret < 0) {
7971                 fprintf(stderr, "Error looking up ref %d\n", ret);
7972                 return ret;
7973         }
7974
7975         while (1) {
7976                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7977                         ret = btrfs_next_leaf(root, path);
7978                         if (ret) {
7979                                 fprintf(stderr, "Couldn't find our ref, next\n");
7980                                 return -EINVAL;
7981                         }
7982                 }
7983                 leaf = path->nodes[0];
7984                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7985                 if (key.objectid != dback->owner ||
7986                     key.type != BTRFS_EXTENT_DATA_KEY) {
7987                         fprintf(stderr, "Couldn't find our ref, search\n");
7988                         return -EINVAL;
7989                 }
7990                 fi = btrfs_item_ptr(leaf, path->slots[0],
7991                                     struct btrfs_file_extent_item);
7992                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7993                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7994
7995                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
7996                         break;
7997                 path->slots[0]++;
7998         }
7999
8000         btrfs_release_path(path);
8001
8002         trans = btrfs_start_transaction(root, 1);
8003         if (IS_ERR(trans))
8004                 return PTR_ERR(trans);
8005
8006         /*
8007          * Ok we have the key of the file extent we want to fix, now we can cow
8008          * down to the thing and fix it.
8009          */
8010         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8011         if (ret < 0) {
8012                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8013                         key.objectid, key.type, key.offset, ret);
8014                 goto out;
8015         }
8016         if (ret > 0) {
8017                 fprintf(stderr, "Well that's odd, we just found this key "
8018                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8019                         key.offset);
8020                 ret = -EINVAL;
8021                 goto out;
8022         }
8023         leaf = path->nodes[0];
8024         fi = btrfs_item_ptr(leaf, path->slots[0],
8025                             struct btrfs_file_extent_item);
8026
8027         if (btrfs_file_extent_compression(leaf, fi) &&
8028             dback->disk_bytenr != entry->bytenr) {
8029                 fprintf(stderr, "Ref doesn't match the record start and is "
8030                         "compressed, please take a btrfs-image of this file "
8031                         "system and send it to a btrfs developer so they can "
8032                         "complete this functionality for bytenr %Lu\n",
8033                         dback->disk_bytenr);
8034                 ret = -EINVAL;
8035                 goto out;
8036         }
8037
8038         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8039                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8040         } else if (dback->disk_bytenr > entry->bytenr) {
8041                 u64 off_diff, offset;
8042
8043                 off_diff = dback->disk_bytenr - entry->bytenr;
8044                 offset = btrfs_file_extent_offset(leaf, fi);
8045                 if (dback->disk_bytenr + offset +
8046                     btrfs_file_extent_num_bytes(leaf, fi) >
8047                     entry->bytenr + entry->bytes) {
8048                         fprintf(stderr, "Ref is past the entry end, please "
8049                                 "take a btrfs-image of this file system and "
8050                                 "send it to a btrfs developer, ref %Lu\n",
8051                                 dback->disk_bytenr);
8052                         ret = -EINVAL;
8053                         goto out;
8054                 }
8055                 offset += off_diff;
8056                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8057                 btrfs_set_file_extent_offset(leaf, fi, offset);
8058         } else if (dback->disk_bytenr < entry->bytenr) {
8059                 u64 offset;
8060
8061                 offset = btrfs_file_extent_offset(leaf, fi);
8062                 if (dback->disk_bytenr + offset < entry->bytenr) {
8063                         fprintf(stderr, "Ref is before the entry start, please"
8064                                 " take a btrfs-image of this file system and "
8065                                 "send it to a btrfs developer, ref %Lu\n",
8066                                 dback->disk_bytenr);
8067                         ret = -EINVAL;
8068                         goto out;
8069                 }
8070
8071                 offset += dback->disk_bytenr;
8072                 offset -= entry->bytenr;
8073                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8074                 btrfs_set_file_extent_offset(leaf, fi, offset);
8075         }
8076
8077         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8078
8079         /*
8080          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8081          * only do this if we aren't using compression, otherwise it's a
8082          * trickier case.
8083          */
8084         if (!btrfs_file_extent_compression(leaf, fi))
8085                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8086         else
8087                 printf("ram bytes may be wrong?\n");
8088         btrfs_mark_buffer_dirty(leaf);
8089 out:
8090         err = btrfs_commit_transaction(trans, root);
8091         btrfs_release_path(path);
8092         return ret ? ret : err;
8093 }
8094
8095 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8096                            struct extent_record *rec)
8097 {
8098         struct extent_backref *back;
8099         struct data_backref *dback;
8100         struct extent_entry *entry, *best = NULL;
8101         LIST_HEAD(entries);
8102         int nr_entries = 0;
8103         int broken_entries = 0;
8104         int ret = 0;
8105         short mismatch = 0;
8106
8107         /*
8108          * Metadata is easy and the backrefs should always agree on bytenr and
8109          * size, if not we've got bigger issues.
8110          */
8111         if (rec->metadata)
8112                 return 0;
8113
8114         list_for_each_entry(back, &rec->backrefs, list) {
8115                 if (back->full_backref || !back->is_data)
8116                         continue;
8117
8118                 dback = to_data_backref(back);
8119
8120                 /*
8121                  * We only pay attention to backrefs that we found a real
8122                  * backref for.
8123                  */
8124                 if (dback->found_ref == 0)
8125                         continue;
8126
8127                 /*
8128                  * For now we only catch when the bytes don't match, not the
8129                  * bytenr.  We can easily do this at the same time, but I want
8130                  * to have a fs image to test on before we just add repair
8131                  * functionality willy-nilly so we know we won't screw up the
8132                  * repair.
8133                  */
8134
8135                 entry = find_entry(&entries, dback->disk_bytenr,
8136                                    dback->bytes);
8137                 if (!entry) {
8138                         entry = malloc(sizeof(struct extent_entry));
8139                         if (!entry) {
8140                                 ret = -ENOMEM;
8141                                 goto out;
8142                         }
8143                         memset(entry, 0, sizeof(*entry));
8144                         entry->bytenr = dback->disk_bytenr;
8145                         entry->bytes = dback->bytes;
8146                         list_add_tail(&entry->list, &entries);
8147                         nr_entries++;
8148                 }
8149
8150                 /*
8151                  * If we only have on entry we may think the entries agree when
8152                  * in reality they don't so we have to do some extra checking.
8153                  */
8154                 if (dback->disk_bytenr != rec->start ||
8155                     dback->bytes != rec->nr || back->broken)
8156                         mismatch = 1;
8157
8158                 if (back->broken) {
8159                         entry->broken++;
8160                         broken_entries++;
8161                 }
8162
8163                 entry->count++;
8164         }
8165
8166         /* Yay all the backrefs agree, carry on good sir */
8167         if (nr_entries <= 1 && !mismatch)
8168                 goto out;
8169
8170         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8171                 "%Lu\n", rec->start);
8172
8173         /*
8174          * First we want to see if the backrefs can agree amongst themselves who
8175          * is right, so figure out which one of the entries has the highest
8176          * count.
8177          */
8178         best = find_most_right_entry(&entries);
8179
8180         /*
8181          * Ok so we may have an even split between what the backrefs think, so
8182          * this is where we use the extent ref to see what it thinks.
8183          */
8184         if (!best) {
8185                 entry = find_entry(&entries, rec->start, rec->nr);
8186                 if (!entry && (!broken_entries || !rec->found_rec)) {
8187                         fprintf(stderr, "Backrefs don't agree with each other "
8188                                 "and extent record doesn't agree with anybody,"
8189                                 " so we can't fix bytenr %Lu bytes %Lu\n",
8190                                 rec->start, rec->nr);
8191                         ret = -EINVAL;
8192                         goto out;
8193                 } else if (!entry) {
8194                         /*
8195                          * Ok our backrefs were broken, we'll assume this is the
8196                          * correct value and add an entry for this range.
8197                          */
8198                         entry = malloc(sizeof(struct extent_entry));
8199                         if (!entry) {
8200                                 ret = -ENOMEM;
8201                                 goto out;
8202                         }
8203                         memset(entry, 0, sizeof(*entry));
8204                         entry->bytenr = rec->start;
8205                         entry->bytes = rec->nr;
8206                         list_add_tail(&entry->list, &entries);
8207                         nr_entries++;
8208                 }
8209                 entry->count++;
8210                 best = find_most_right_entry(&entries);
8211                 if (!best) {
8212                         fprintf(stderr, "Backrefs and extent record evenly "
8213                                 "split on who is right, this is going to "
8214                                 "require user input to fix bytenr %Lu bytes "
8215                                 "%Lu\n", rec->start, rec->nr);
8216                         ret = -EINVAL;
8217                         goto out;
8218                 }
8219         }
8220
8221         /*
8222          * I don't think this can happen currently as we'll abort() if we catch
8223          * this case higher up, but in case somebody removes that we still can't
8224          * deal with it properly here yet, so just bail out of that's the case.
8225          */
8226         if (best->bytenr != rec->start) {
8227                 fprintf(stderr, "Extent start and backref starts don't match, "
8228                         "please use btrfs-image on this file system and send "
8229                         "it to a btrfs developer so they can make fsck fix "
8230                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
8231                         rec->start, rec->nr);
8232                 ret = -EINVAL;
8233                 goto out;
8234         }
8235
8236         /*
8237          * Ok great we all agreed on an extent record, let's go find the real
8238          * references and fix up the ones that don't match.
8239          */
8240         list_for_each_entry(back, &rec->backrefs, list) {
8241                 if (back->full_backref || !back->is_data)
8242                         continue;
8243
8244                 dback = to_data_backref(back);
8245
8246                 /*
8247                  * Still ignoring backrefs that don't have a real ref attached
8248                  * to them.
8249                  */
8250                 if (dback->found_ref == 0)
8251                         continue;
8252
8253                 if (dback->bytes == best->bytes &&
8254                     dback->disk_bytenr == best->bytenr)
8255                         continue;
8256
8257                 ret = repair_ref(info, path, dback, best);
8258                 if (ret)
8259                         goto out;
8260         }
8261
8262         /*
8263          * Ok we messed with the actual refs, which means we need to drop our
8264          * entire cache and go back and rescan.  I know this is a huge pain and
8265          * adds a lot of extra work, but it's the only way to be safe.  Once all
8266          * the backrefs agree we may not need to do anything to the extent
8267          * record itself.
8268          */
8269         ret = -EAGAIN;
8270 out:
8271         while (!list_empty(&entries)) {
8272                 entry = list_entry(entries.next, struct extent_entry, list);
8273                 list_del_init(&entry->list);
8274                 free(entry);
8275         }
8276         return ret;
8277 }
8278
8279 static int process_duplicates(struct btrfs_root *root,
8280                               struct cache_tree *extent_cache,
8281                               struct extent_record *rec)
8282 {
8283         struct extent_record *good, *tmp;
8284         struct cache_extent *cache;
8285         int ret;
8286
8287         /*
8288          * If we found a extent record for this extent then return, or if we
8289          * have more than one duplicate we are likely going to need to delete
8290          * something.
8291          */
8292         if (rec->found_rec || rec->num_duplicates > 1)
8293                 return 0;
8294
8295         /* Shouldn't happen but just in case */
8296         BUG_ON(!rec->num_duplicates);
8297
8298         /*
8299          * So this happens if we end up with a backref that doesn't match the
8300          * actual extent entry.  So either the backref is bad or the extent
8301          * entry is bad.  Either way we want to have the extent_record actually
8302          * reflect what we found in the extent_tree, so we need to take the
8303          * duplicate out and use that as the extent_record since the only way we
8304          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8305          */
8306         remove_cache_extent(extent_cache, &rec->cache);
8307
8308         good = to_extent_record(rec->dups.next);
8309         list_del_init(&good->list);
8310         INIT_LIST_HEAD(&good->backrefs);
8311         INIT_LIST_HEAD(&good->dups);
8312         good->cache.start = good->start;
8313         good->cache.size = good->nr;
8314         good->content_checked = 0;
8315         good->owner_ref_checked = 0;
8316         good->num_duplicates = 0;
8317         good->refs = rec->refs;
8318         list_splice_init(&rec->backrefs, &good->backrefs);
8319         while (1) {
8320                 cache = lookup_cache_extent(extent_cache, good->start,
8321                                             good->nr);
8322                 if (!cache)
8323                         break;
8324                 tmp = container_of(cache, struct extent_record, cache);
8325
8326                 /*
8327                  * If we find another overlapping extent and it's found_rec is
8328                  * set then it's a duplicate and we need to try and delete
8329                  * something.
8330                  */
8331                 if (tmp->found_rec || tmp->num_duplicates > 0) {
8332                         if (list_empty(&good->list))
8333                                 list_add_tail(&good->list,
8334                                               &duplicate_extents);
8335                         good->num_duplicates += tmp->num_duplicates + 1;
8336                         list_splice_init(&tmp->dups, &good->dups);
8337                         list_del_init(&tmp->list);
8338                         list_add_tail(&tmp->list, &good->dups);
8339                         remove_cache_extent(extent_cache, &tmp->cache);
8340                         continue;
8341                 }
8342
8343                 /*
8344                  * Ok we have another non extent item backed extent rec, so lets
8345                  * just add it to this extent and carry on like we did above.
8346                  */
8347                 good->refs += tmp->refs;
8348                 list_splice_init(&tmp->backrefs, &good->backrefs);
8349                 remove_cache_extent(extent_cache, &tmp->cache);
8350                 free(tmp);
8351         }
8352         ret = insert_cache_extent(extent_cache, &good->cache);
8353         BUG_ON(ret);
8354         free(rec);
8355         return good->num_duplicates ? 0 : 1;
8356 }
8357
8358 static int delete_duplicate_records(struct btrfs_root *root,
8359                                     struct extent_record *rec)
8360 {
8361         struct btrfs_trans_handle *trans;
8362         LIST_HEAD(delete_list);
8363         struct btrfs_path path;
8364         struct extent_record *tmp, *good, *n;
8365         int nr_del = 0;
8366         int ret = 0, err;
8367         struct btrfs_key key;
8368
8369         btrfs_init_path(&path);
8370
8371         good = rec;
8372         /* Find the record that covers all of the duplicates. */
8373         list_for_each_entry(tmp, &rec->dups, list) {
8374                 if (good->start < tmp->start)
8375                         continue;
8376                 if (good->nr > tmp->nr)
8377                         continue;
8378
8379                 if (tmp->start + tmp->nr < good->start + good->nr) {
8380                         fprintf(stderr, "Ok we have overlapping extents that "
8381                                 "aren't completely covered by each other, this "
8382                                 "is going to require more careful thought.  "
8383                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8384                                 tmp->start, tmp->nr, good->start, good->nr);
8385                         abort();
8386                 }
8387                 good = tmp;
8388         }
8389
8390         if (good != rec)
8391                 list_add_tail(&rec->list, &delete_list);
8392
8393         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8394                 if (tmp == good)
8395                         continue;
8396                 list_move_tail(&tmp->list, &delete_list);
8397         }
8398
8399         root = root->fs_info->extent_root;
8400         trans = btrfs_start_transaction(root, 1);
8401         if (IS_ERR(trans)) {
8402                 ret = PTR_ERR(trans);
8403                 goto out;
8404         }
8405
8406         list_for_each_entry(tmp, &delete_list, list) {
8407                 if (tmp->found_rec == 0)
8408                         continue;
8409                 key.objectid = tmp->start;
8410                 key.type = BTRFS_EXTENT_ITEM_KEY;
8411                 key.offset = tmp->nr;
8412
8413                 /* Shouldn't happen but just in case */
8414                 if (tmp->metadata) {
8415                         fprintf(stderr, "Well this shouldn't happen, extent "
8416                                 "record overlaps but is metadata? "
8417                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8418                         abort();
8419                 }
8420
8421                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8422                 if (ret) {
8423                         if (ret > 0)
8424                                 ret = -EINVAL;
8425                         break;
8426                 }
8427                 ret = btrfs_del_item(trans, root, &path);
8428                 if (ret)
8429                         break;
8430                 btrfs_release_path(&path);
8431                 nr_del++;
8432         }
8433         err = btrfs_commit_transaction(trans, root);
8434         if (err && !ret)
8435                 ret = err;
8436 out:
8437         while (!list_empty(&delete_list)) {
8438                 tmp = to_extent_record(delete_list.next);
8439                 list_del_init(&tmp->list);
8440                 if (tmp == rec)
8441                         continue;
8442                 free(tmp);
8443         }
8444
8445         while (!list_empty(&rec->dups)) {
8446                 tmp = to_extent_record(rec->dups.next);
8447                 list_del_init(&tmp->list);
8448                 free(tmp);
8449         }
8450
8451         btrfs_release_path(&path);
8452
8453         if (!ret && !nr_del)
8454                 rec->num_duplicates = 0;
8455
8456         return ret ? ret : nr_del;
8457 }
8458
8459 static int find_possible_backrefs(struct btrfs_fs_info *info,
8460                                   struct btrfs_path *path,
8461                                   struct cache_tree *extent_cache,
8462                                   struct extent_record *rec)
8463 {
8464         struct btrfs_root *root;
8465         struct extent_backref *back;
8466         struct data_backref *dback;
8467         struct cache_extent *cache;
8468         struct btrfs_file_extent_item *fi;
8469         struct btrfs_key key;
8470         u64 bytenr, bytes;
8471         int ret;
8472
8473         list_for_each_entry(back, &rec->backrefs, list) {
8474                 /* Don't care about full backrefs (poor unloved backrefs) */
8475                 if (back->full_backref || !back->is_data)
8476                         continue;
8477
8478                 dback = to_data_backref(back);
8479
8480                 /* We found this one, we don't need to do a lookup */
8481                 if (dback->found_ref)
8482                         continue;
8483
8484                 key.objectid = dback->root;
8485                 key.type = BTRFS_ROOT_ITEM_KEY;
8486                 key.offset = (u64)-1;
8487
8488                 root = btrfs_read_fs_root(info, &key);
8489
8490                 /* No root, definitely a bad ref, skip */
8491                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8492                         continue;
8493                 /* Other err, exit */
8494                 if (IS_ERR(root))
8495                         return PTR_ERR(root);
8496
8497                 key.objectid = dback->owner;
8498                 key.type = BTRFS_EXTENT_DATA_KEY;
8499                 key.offset = dback->offset;
8500                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8501                 if (ret) {
8502                         btrfs_release_path(path);
8503                         if (ret < 0)
8504                                 return ret;
8505                         /* Didn't find it, we can carry on */
8506                         ret = 0;
8507                         continue;
8508                 }
8509
8510                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8511                                     struct btrfs_file_extent_item);
8512                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8513                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8514                 btrfs_release_path(path);
8515                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8516                 if (cache) {
8517                         struct extent_record *tmp;
8518                         tmp = container_of(cache, struct extent_record, cache);
8519
8520                         /*
8521                          * If we found an extent record for the bytenr for this
8522                          * particular backref then we can't add it to our
8523                          * current extent record.  We only want to add backrefs
8524                          * that don't have a corresponding extent item in the
8525                          * extent tree since they likely belong to this record
8526                          * and we need to fix it if it doesn't match bytenrs.
8527                          */
8528                         if  (tmp->found_rec)
8529                                 continue;
8530                 }
8531
8532                 dback->found_ref += 1;
8533                 dback->disk_bytenr = bytenr;
8534                 dback->bytes = bytes;
8535
8536                 /*
8537                  * Set this so the verify backref code knows not to trust the
8538                  * values in this backref.
8539                  */
8540                 back->broken = 1;
8541         }
8542
8543         return 0;
8544 }
8545
8546 /*
8547  * Record orphan data ref into corresponding root.
8548  *
8549  * Return 0 if the extent item contains data ref and recorded.
8550  * Return 1 if the extent item contains no useful data ref
8551  *   On that case, it may contains only shared_dataref or metadata backref
8552  *   or the file extent exists(this should be handled by the extent bytenr
8553  *   recovery routine)
8554  * Return <0 if something goes wrong.
8555  */
8556 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8557                                       struct extent_record *rec)
8558 {
8559         struct btrfs_key key;
8560         struct btrfs_root *dest_root;
8561         struct extent_backref *back;
8562         struct data_backref *dback;
8563         struct orphan_data_extent *orphan;
8564         struct btrfs_path path;
8565         int recorded_data_ref = 0;
8566         int ret = 0;
8567
8568         if (rec->metadata)
8569                 return 1;
8570         btrfs_init_path(&path);
8571         list_for_each_entry(back, &rec->backrefs, list) {
8572                 if (back->full_backref || !back->is_data ||
8573                     !back->found_extent_tree)
8574                         continue;
8575                 dback = to_data_backref(back);
8576                 if (dback->found_ref)
8577                         continue;
8578                 key.objectid = dback->root;
8579                 key.type = BTRFS_ROOT_ITEM_KEY;
8580                 key.offset = (u64)-1;
8581
8582                 dest_root = btrfs_read_fs_root(fs_info, &key);
8583
8584                 /* For non-exist root we just skip it */
8585                 if (IS_ERR(dest_root) || !dest_root)
8586                         continue;
8587
8588                 key.objectid = dback->owner;
8589                 key.type = BTRFS_EXTENT_DATA_KEY;
8590                 key.offset = dback->offset;
8591
8592                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8593                 btrfs_release_path(&path);
8594                 /*
8595                  * For ret < 0, it's OK since the fs-tree may be corrupted,
8596                  * we need to record it for inode/file extent rebuild.
8597                  * For ret > 0, we record it only for file extent rebuild.
8598                  * For ret == 0, the file extent exists but only bytenr
8599                  * mismatch, let the original bytenr fix routine to handle,
8600                  * don't record it.
8601                  */
8602                 if (ret == 0)
8603                         continue;
8604                 ret = 0;
8605                 orphan = malloc(sizeof(*orphan));
8606                 if (!orphan) {
8607                         ret = -ENOMEM;
8608                         goto out;
8609                 }
8610                 INIT_LIST_HEAD(&orphan->list);
8611                 orphan->root = dback->root;
8612                 orphan->objectid = dback->owner;
8613                 orphan->offset = dback->offset;
8614                 orphan->disk_bytenr = rec->cache.start;
8615                 orphan->disk_len = rec->cache.size;
8616                 list_add(&dest_root->orphan_data_extents, &orphan->list);
8617                 recorded_data_ref = 1;
8618         }
8619 out:
8620         btrfs_release_path(&path);
8621         if (!ret)
8622                 return !recorded_data_ref;
8623         else
8624                 return ret;
8625 }
8626
8627 /*
8628  * when an incorrect extent item is found, this will delete
8629  * all of the existing entries for it and recreate them
8630  * based on what the tree scan found.
8631  */
8632 static int fixup_extent_refs(struct btrfs_fs_info *info,
8633                              struct cache_tree *extent_cache,
8634                              struct extent_record *rec)
8635 {
8636         struct btrfs_trans_handle *trans = NULL;
8637         int ret;
8638         struct btrfs_path path;
8639         struct list_head *cur = rec->backrefs.next;
8640         struct cache_extent *cache;
8641         struct extent_backref *back;
8642         int allocated = 0;
8643         u64 flags = 0;
8644
8645         if (rec->flag_block_full_backref)
8646                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8647
8648         btrfs_init_path(&path);
8649         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8650                 /*
8651                  * Sometimes the backrefs themselves are so broken they don't
8652                  * get attached to any meaningful rec, so first go back and
8653                  * check any of our backrefs that we couldn't find and throw
8654                  * them into the list if we find the backref so that
8655                  * verify_backrefs can figure out what to do.
8656                  */
8657                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8658                 if (ret < 0)
8659                         goto out;
8660         }
8661
8662         /* step one, make sure all of the backrefs agree */
8663         ret = verify_backrefs(info, &path, rec);
8664         if (ret < 0)
8665                 goto out;
8666
8667         trans = btrfs_start_transaction(info->extent_root, 1);
8668         if (IS_ERR(trans)) {
8669                 ret = PTR_ERR(trans);
8670                 goto out;
8671         }
8672
8673         /* step two, delete all the existing records */
8674         ret = delete_extent_records(trans, info->extent_root, &path,
8675                                     rec->start, rec->max_size);
8676
8677         if (ret < 0)
8678                 goto out;
8679
8680         /* was this block corrupt?  If so, don't add references to it */
8681         cache = lookup_cache_extent(info->corrupt_blocks,
8682                                     rec->start, rec->max_size);
8683         if (cache) {
8684                 ret = 0;
8685                 goto out;
8686         }
8687
8688         /* step three, recreate all the refs we did find */
8689         while(cur != &rec->backrefs) {
8690                 back = to_extent_backref(cur);
8691                 cur = cur->next;
8692
8693                 /*
8694                  * if we didn't find any references, don't create a
8695                  * new extent record
8696                  */
8697                 if (!back->found_ref)
8698                         continue;
8699
8700                 rec->bad_full_backref = 0;
8701                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8702                 allocated = 1;
8703
8704                 if (ret)
8705                         goto out;
8706         }
8707 out:
8708         if (trans) {
8709                 int err = btrfs_commit_transaction(trans, info->extent_root);
8710                 if (!ret)
8711                         ret = err;
8712         }
8713
8714         btrfs_release_path(&path);
8715         return ret;
8716 }
8717
8718 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
8719                               struct extent_record *rec)
8720 {
8721         struct btrfs_trans_handle *trans;
8722         struct btrfs_root *root = fs_info->extent_root;
8723         struct btrfs_path path;
8724         struct btrfs_extent_item *ei;
8725         struct btrfs_key key;
8726         u64 flags;
8727         int ret = 0;
8728
8729         key.objectid = rec->start;
8730         if (rec->metadata) {
8731                 key.type = BTRFS_METADATA_ITEM_KEY;
8732                 key.offset = rec->info_level;
8733         } else {
8734                 key.type = BTRFS_EXTENT_ITEM_KEY;
8735                 key.offset = rec->max_size;
8736         }
8737
8738         trans = btrfs_start_transaction(root, 0);
8739         if (IS_ERR(trans))
8740                 return PTR_ERR(trans);
8741
8742         btrfs_init_path(&path);
8743         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8744         if (ret < 0) {
8745                 btrfs_release_path(&path);
8746                 btrfs_commit_transaction(trans, root);
8747                 return ret;
8748         } else if (ret) {
8749                 fprintf(stderr, "Didn't find extent for %llu\n",
8750                         (unsigned long long)rec->start);
8751                 btrfs_release_path(&path);
8752                 btrfs_commit_transaction(trans, root);
8753                 return -ENOENT;
8754         }
8755
8756         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8757                             struct btrfs_extent_item);
8758         flags = btrfs_extent_flags(path.nodes[0], ei);
8759         if (rec->flag_block_full_backref) {
8760                 fprintf(stderr, "setting full backref on %llu\n",
8761                         (unsigned long long)key.objectid);
8762                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8763         } else {
8764                 fprintf(stderr, "clearing full backref on %llu\n",
8765                         (unsigned long long)key.objectid);
8766                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8767         }
8768         btrfs_set_extent_flags(path.nodes[0], ei, flags);
8769         btrfs_mark_buffer_dirty(path.nodes[0]);
8770         btrfs_release_path(&path);
8771         return btrfs_commit_transaction(trans, root);
8772 }
8773
8774 /* right now we only prune from the extent allocation tree */
8775 static int prune_one_block(struct btrfs_trans_handle *trans,
8776                            struct btrfs_fs_info *info,
8777                            struct btrfs_corrupt_block *corrupt)
8778 {
8779         int ret;
8780         struct btrfs_path path;
8781         struct extent_buffer *eb;
8782         u64 found;
8783         int slot;
8784         int nritems;
8785         int level = corrupt->level + 1;
8786
8787         btrfs_init_path(&path);
8788 again:
8789         /* we want to stop at the parent to our busted block */
8790         path.lowest_level = level;
8791
8792         ret = btrfs_search_slot(trans, info->extent_root,
8793                                 &corrupt->key, &path, -1, 1);
8794
8795         if (ret < 0)
8796                 goto out;
8797
8798         eb = path.nodes[level];
8799         if (!eb) {
8800                 ret = -ENOENT;
8801                 goto out;
8802         }
8803
8804         /*
8805          * hopefully the search gave us the block we want to prune,
8806          * lets try that first
8807          */
8808         slot = path.slots[level];
8809         found =  btrfs_node_blockptr(eb, slot);
8810         if (found == corrupt->cache.start)
8811                 goto del_ptr;
8812
8813         nritems = btrfs_header_nritems(eb);
8814
8815         /* the search failed, lets scan this node and hope we find it */
8816         for (slot = 0; slot < nritems; slot++) {
8817                 found =  btrfs_node_blockptr(eb, slot);
8818                 if (found == corrupt->cache.start)
8819                         goto del_ptr;
8820         }
8821         /*
8822          * we couldn't find the bad block.  TODO, search all the nodes for pointers
8823          * to this block
8824          */
8825         if (eb == info->extent_root->node) {
8826                 ret = -ENOENT;
8827                 goto out;
8828         } else {
8829                 level++;
8830                 btrfs_release_path(&path);
8831                 goto again;
8832         }
8833
8834 del_ptr:
8835         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
8836         ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
8837
8838 out:
8839         btrfs_release_path(&path);
8840         return ret;
8841 }
8842
8843 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
8844 {
8845         struct btrfs_trans_handle *trans = NULL;
8846         struct cache_extent *cache;
8847         struct btrfs_corrupt_block *corrupt;
8848
8849         while (1) {
8850                 cache = search_cache_extent(info->corrupt_blocks, 0);
8851                 if (!cache)
8852                         break;
8853                 if (!trans) {
8854                         trans = btrfs_start_transaction(info->extent_root, 1);
8855                         if (IS_ERR(trans))
8856                                 return PTR_ERR(trans);
8857                 }
8858                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
8859                 prune_one_block(trans, info, corrupt);
8860                 remove_cache_extent(info->corrupt_blocks, cache);
8861         }
8862         if (trans)
8863                 return btrfs_commit_transaction(trans, info->extent_root);
8864         return 0;
8865 }
8866
8867 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
8868 {
8869         struct btrfs_block_group_cache *cache;
8870         u64 start, end;
8871         int ret;
8872
8873         while (1) {
8874                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
8875                                             &start, &end, EXTENT_DIRTY);
8876                 if (ret)
8877                         break;
8878                 clear_extent_dirty(&fs_info->free_space_cache, start, end,
8879                                    GFP_NOFS);
8880         }
8881
8882         start = 0;
8883         while (1) {
8884                 cache = btrfs_lookup_first_block_group(fs_info, start);
8885                 if (!cache)
8886                         break;
8887                 if (cache->cached)
8888                         cache->cached = 0;
8889                 start = cache->key.objectid + cache->key.offset;
8890         }
8891 }
8892
8893 static int check_extent_refs(struct btrfs_root *root,
8894                              struct cache_tree *extent_cache)
8895 {
8896         struct extent_record *rec;
8897         struct cache_extent *cache;
8898         int err = 0;
8899         int ret = 0;
8900         int fixed = 0;
8901         int had_dups = 0;
8902         int recorded = 0;
8903
8904         if (repair) {
8905                 /*
8906                  * if we're doing a repair, we have to make sure
8907                  * we don't allocate from the problem extents.
8908                  * In the worst case, this will be all the
8909                  * extents in the FS
8910                  */
8911                 cache = search_cache_extent(extent_cache, 0);
8912                 while(cache) {
8913                         rec = container_of(cache, struct extent_record, cache);
8914                         set_extent_dirty(root->fs_info->excluded_extents,
8915                                          rec->start,
8916                                          rec->start + rec->max_size - 1,
8917                                          GFP_NOFS);
8918                         cache = next_cache_extent(cache);
8919                 }
8920
8921                 /* pin down all the corrupted blocks too */
8922                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
8923                 while(cache) {
8924                         set_extent_dirty(root->fs_info->excluded_extents,
8925                                          cache->start,
8926                                          cache->start + cache->size - 1,
8927                                          GFP_NOFS);
8928                         cache = next_cache_extent(cache);
8929                 }
8930                 prune_corrupt_blocks(root->fs_info);
8931                 reset_cached_block_groups(root->fs_info);
8932         }
8933
8934         reset_cached_block_groups(root->fs_info);
8935
8936         /*
8937          * We need to delete any duplicate entries we find first otherwise we
8938          * could mess up the extent tree when we have backrefs that actually
8939          * belong to a different extent item and not the weird duplicate one.
8940          */
8941         while (repair && !list_empty(&duplicate_extents)) {
8942                 rec = to_extent_record(duplicate_extents.next);
8943                 list_del_init(&rec->list);
8944
8945                 /* Sometimes we can find a backref before we find an actual
8946                  * extent, so we need to process it a little bit to see if there
8947                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
8948                  * if this is a backref screwup.  If we need to delete stuff
8949                  * process_duplicates() will return 0, otherwise it will return
8950                  * 1 and we
8951                  */
8952                 if (process_duplicates(root, extent_cache, rec))
8953                         continue;
8954                 ret = delete_duplicate_records(root, rec);
8955                 if (ret < 0)
8956                         return ret;
8957                 /*
8958                  * delete_duplicate_records will return the number of entries
8959                  * deleted, so if it's greater than 0 then we know we actually
8960                  * did something and we need to remove.
8961                  */
8962                 if (ret)
8963                         had_dups = 1;
8964         }
8965
8966         if (had_dups)
8967                 return -EAGAIN;
8968
8969         while(1) {
8970                 int cur_err = 0;
8971
8972                 fixed = 0;
8973                 recorded = 0;
8974                 cache = search_cache_extent(extent_cache, 0);
8975                 if (!cache)
8976                         break;
8977                 rec = container_of(cache, struct extent_record, cache);
8978                 if (rec->num_duplicates) {
8979                         fprintf(stderr, "extent item %llu has multiple extent "
8980                                 "items\n", (unsigned long long)rec->start);
8981                         err = 1;
8982                         cur_err = 1;
8983                 }
8984
8985                 if (rec->refs != rec->extent_item_refs) {
8986                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
8987                                 (unsigned long long)rec->start,
8988                                 (unsigned long long)rec->nr);
8989                         fprintf(stderr, "extent item %llu, found %llu\n",
8990                                 (unsigned long long)rec->extent_item_refs,
8991                                 (unsigned long long)rec->refs);
8992                         ret = record_orphan_data_extents(root->fs_info, rec);
8993                         if (ret < 0)
8994                                 goto repair_abort;
8995                         if (ret == 0) {
8996                                 recorded = 1;
8997                         } else {
8998                                 /*
8999                                  * we can't use the extent to repair file
9000                                  * extent, let the fallback method handle it.
9001                                  */
9002                                 if (!fixed && repair) {
9003                                         ret = fixup_extent_refs(
9004                                                         root->fs_info,
9005                                                         extent_cache, rec);
9006                                         if (ret)
9007                                                 goto repair_abort;
9008                                         fixed = 1;
9009                                 }
9010                         }
9011                         err = 1;
9012                         cur_err = 1;
9013                 }
9014                 if (all_backpointers_checked(rec, 1)) {
9015                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9016                                 (unsigned long long)rec->start,
9017                                 (unsigned long long)rec->nr);
9018
9019                         if (!fixed && !recorded && repair) {
9020                                 ret = fixup_extent_refs(root->fs_info,
9021                                                         extent_cache, rec);
9022                                 if (ret)
9023                                         goto repair_abort;
9024                                 fixed = 1;
9025                         }
9026                         cur_err = 1;
9027                         err = 1;
9028                 }
9029                 if (!rec->owner_ref_checked) {
9030                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9031                                 (unsigned long long)rec->start,
9032                                 (unsigned long long)rec->nr);
9033                         if (!fixed && !recorded && repair) {
9034                                 ret = fixup_extent_refs(root->fs_info,
9035                                                         extent_cache, rec);
9036                                 if (ret)
9037                                         goto repair_abort;
9038                                 fixed = 1;
9039                         }
9040                         err = 1;
9041                         cur_err = 1;
9042                 }
9043                 if (rec->bad_full_backref) {
9044                         fprintf(stderr, "bad full backref, on [%llu]\n",
9045                                 (unsigned long long)rec->start);
9046                         if (repair) {
9047                                 ret = fixup_extent_flags(root->fs_info, rec);
9048                                 if (ret)
9049                                         goto repair_abort;
9050                                 fixed = 1;
9051                         }
9052                         err = 1;
9053                         cur_err = 1;
9054                 }
9055                 /*
9056                  * Although it's not a extent ref's problem, we reuse this
9057                  * routine for error reporting.
9058                  * No repair function yet.
9059                  */
9060                 if (rec->crossing_stripes) {
9061                         fprintf(stderr,
9062                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9063                                 rec->start, rec->start + rec->max_size);
9064                         err = 1;
9065                         cur_err = 1;
9066                 }
9067
9068                 if (rec->wrong_chunk_type) {
9069                         fprintf(stderr,
9070                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
9071                                 rec->start, rec->start + rec->max_size);
9072                         err = 1;
9073                         cur_err = 1;
9074                 }
9075
9076                 remove_cache_extent(extent_cache, cache);
9077                 free_all_extent_backrefs(rec);
9078                 if (!init_extent_tree && repair && (!cur_err || fixed))
9079                         clear_extent_dirty(root->fs_info->excluded_extents,
9080                                            rec->start,
9081                                            rec->start + rec->max_size - 1,
9082                                            GFP_NOFS);
9083                 free(rec);
9084         }
9085 repair_abort:
9086         if (repair) {
9087                 if (ret && ret != -EAGAIN) {
9088                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9089                         exit(1);
9090                 } else if (!ret) {
9091                         struct btrfs_trans_handle *trans;
9092
9093                         root = root->fs_info->extent_root;
9094                         trans = btrfs_start_transaction(root, 1);
9095                         if (IS_ERR(trans)) {
9096                                 ret = PTR_ERR(trans);
9097                                 goto repair_abort;
9098                         }
9099
9100                         btrfs_fix_block_accounting(trans, root);
9101                         ret = btrfs_commit_transaction(trans, root);
9102                         if (ret)
9103                                 goto repair_abort;
9104                 }
9105                 if (err)
9106                         fprintf(stderr, "repaired damaged extent references\n");
9107                 return ret;
9108         }
9109         return err;
9110 }
9111
9112 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9113 {
9114         u64 stripe_size;
9115
9116         if (type & BTRFS_BLOCK_GROUP_RAID0) {
9117                 stripe_size = length;
9118                 stripe_size /= num_stripes;
9119         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9120                 stripe_size = length * 2;
9121                 stripe_size /= num_stripes;
9122         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9123                 stripe_size = length;
9124                 stripe_size /= (num_stripes - 1);
9125         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9126                 stripe_size = length;
9127                 stripe_size /= (num_stripes - 2);
9128         } else {
9129                 stripe_size = length;
9130         }
9131         return stripe_size;
9132 }
9133
9134 /*
9135  * Check the chunk with its block group/dev list ref:
9136  * Return 0 if all refs seems valid.
9137  * Return 1 if part of refs seems valid, need later check for rebuild ref
9138  * like missing block group and needs to search extent tree to rebuild them.
9139  * Return -1 if essential refs are missing and unable to rebuild.
9140  */
9141 static int check_chunk_refs(struct chunk_record *chunk_rec,
9142                             struct block_group_tree *block_group_cache,
9143                             struct device_extent_tree *dev_extent_cache,
9144                             int silent)
9145 {
9146         struct cache_extent *block_group_item;
9147         struct block_group_record *block_group_rec;
9148         struct cache_extent *dev_extent_item;
9149         struct device_extent_record *dev_extent_rec;
9150         u64 devid;
9151         u64 offset;
9152         u64 length;
9153         int metadump_v2 = 0;
9154         int i;
9155         int ret = 0;
9156
9157         block_group_item = lookup_cache_extent(&block_group_cache->tree,
9158                                                chunk_rec->offset,
9159                                                chunk_rec->length);
9160         if (block_group_item) {
9161                 block_group_rec = container_of(block_group_item,
9162                                                struct block_group_record,
9163                                                cache);
9164                 if (chunk_rec->length != block_group_rec->offset ||
9165                     chunk_rec->offset != block_group_rec->objectid ||
9166                     (!metadump_v2 &&
9167                      chunk_rec->type_flags != block_group_rec->flags)) {
9168                         if (!silent)
9169                                 fprintf(stderr,
9170                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9171                                         chunk_rec->objectid,
9172                                         chunk_rec->type,
9173                                         chunk_rec->offset,
9174                                         chunk_rec->length,
9175                                         chunk_rec->offset,
9176                                         chunk_rec->type_flags,
9177                                         block_group_rec->objectid,
9178                                         block_group_rec->type,
9179                                         block_group_rec->offset,
9180                                         block_group_rec->offset,
9181                                         block_group_rec->objectid,
9182                                         block_group_rec->flags);
9183                         ret = -1;
9184                 } else {
9185                         list_del_init(&block_group_rec->list);
9186                         chunk_rec->bg_rec = block_group_rec;
9187                 }
9188         } else {
9189                 if (!silent)
9190                         fprintf(stderr,
9191                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9192                                 chunk_rec->objectid,
9193                                 chunk_rec->type,
9194                                 chunk_rec->offset,
9195                                 chunk_rec->length,
9196                                 chunk_rec->offset,
9197                                 chunk_rec->type_flags);
9198                 ret = 1;
9199         }
9200
9201         if (metadump_v2)
9202                 return ret;
9203
9204         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9205                                     chunk_rec->num_stripes);
9206         for (i = 0; i < chunk_rec->num_stripes; ++i) {
9207                 devid = chunk_rec->stripes[i].devid;
9208                 offset = chunk_rec->stripes[i].offset;
9209                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9210                                                        devid, offset, length);
9211                 if (dev_extent_item) {
9212                         dev_extent_rec = container_of(dev_extent_item,
9213                                                 struct device_extent_record,
9214                                                 cache);
9215                         if (dev_extent_rec->objectid != devid ||
9216                             dev_extent_rec->offset != offset ||
9217                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
9218                             dev_extent_rec->length != length) {
9219                                 if (!silent)
9220                                         fprintf(stderr,
9221                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9222                                                 chunk_rec->objectid,
9223                                                 chunk_rec->type,
9224                                                 chunk_rec->offset,
9225                                                 chunk_rec->stripes[i].devid,
9226                                                 chunk_rec->stripes[i].offset,
9227                                                 dev_extent_rec->objectid,
9228                                                 dev_extent_rec->offset,
9229                                                 dev_extent_rec->length);
9230                                 ret = -1;
9231                         } else {
9232                                 list_move(&dev_extent_rec->chunk_list,
9233                                           &chunk_rec->dextents);
9234                         }
9235                 } else {
9236                         if (!silent)
9237                                 fprintf(stderr,
9238                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9239                                         chunk_rec->objectid,
9240                                         chunk_rec->type,
9241                                         chunk_rec->offset,
9242                                         chunk_rec->stripes[i].devid,
9243                                         chunk_rec->stripes[i].offset);
9244                         ret = -1;
9245                 }
9246         }
9247         return ret;
9248 }
9249
9250 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9251 int check_chunks(struct cache_tree *chunk_cache,
9252                  struct block_group_tree *block_group_cache,
9253                  struct device_extent_tree *dev_extent_cache,
9254                  struct list_head *good, struct list_head *bad,
9255                  struct list_head *rebuild, int silent)
9256 {
9257         struct cache_extent *chunk_item;
9258         struct chunk_record *chunk_rec;
9259         struct block_group_record *bg_rec;
9260         struct device_extent_record *dext_rec;
9261         int err;
9262         int ret = 0;
9263
9264         chunk_item = first_cache_extent(chunk_cache);
9265         while (chunk_item) {
9266                 chunk_rec = container_of(chunk_item, struct chunk_record,
9267                                          cache);
9268                 err = check_chunk_refs(chunk_rec, block_group_cache,
9269                                        dev_extent_cache, silent);
9270                 if (err < 0)
9271                         ret = err;
9272                 if (err == 0 && good)
9273                         list_add_tail(&chunk_rec->list, good);
9274                 if (err > 0 && rebuild)
9275                         list_add_tail(&chunk_rec->list, rebuild);
9276                 if (err < 0 && bad)
9277                         list_add_tail(&chunk_rec->list, bad);
9278                 chunk_item = next_cache_extent(chunk_item);
9279         }
9280
9281         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9282                 if (!silent)
9283                         fprintf(stderr,
9284                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9285                                 bg_rec->objectid,
9286                                 bg_rec->offset,
9287                                 bg_rec->flags);
9288                 if (!ret)
9289                         ret = 1;
9290         }
9291
9292         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9293                             chunk_list) {
9294                 if (!silent)
9295                         fprintf(stderr,
9296                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9297                                 dext_rec->objectid,
9298                                 dext_rec->offset,
9299                                 dext_rec->length);
9300                 if (!ret)
9301                         ret = 1;
9302         }
9303         return ret;
9304 }
9305
9306
9307 static int check_device_used(struct device_record *dev_rec,
9308                              struct device_extent_tree *dext_cache)
9309 {
9310         struct cache_extent *cache;
9311         struct device_extent_record *dev_extent_rec;
9312         u64 total_byte = 0;
9313
9314         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9315         while (cache) {
9316                 dev_extent_rec = container_of(cache,
9317                                               struct device_extent_record,
9318                                               cache);
9319                 if (dev_extent_rec->objectid != dev_rec->devid)
9320                         break;
9321
9322                 list_del_init(&dev_extent_rec->device_list);
9323                 total_byte += dev_extent_rec->length;
9324                 cache = next_cache_extent(cache);
9325         }
9326
9327         if (total_byte != dev_rec->byte_used) {
9328                 fprintf(stderr,
9329                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9330                         total_byte, dev_rec->byte_used, dev_rec->objectid,
9331                         dev_rec->type, dev_rec->offset);
9332                 return -1;
9333         } else {
9334                 return 0;
9335         }
9336 }
9337
9338 /* check btrfs_dev_item -> btrfs_dev_extent */
9339 static int check_devices(struct rb_root *dev_cache,
9340                          struct device_extent_tree *dev_extent_cache)
9341 {
9342         struct rb_node *dev_node;
9343         struct device_record *dev_rec;
9344         struct device_extent_record *dext_rec;
9345         int err;
9346         int ret = 0;
9347
9348         dev_node = rb_first(dev_cache);
9349         while (dev_node) {
9350                 dev_rec = container_of(dev_node, struct device_record, node);
9351                 err = check_device_used(dev_rec, dev_extent_cache);
9352                 if (err)
9353                         ret = err;
9354
9355                 dev_node = rb_next(dev_node);
9356         }
9357         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9358                             device_list) {
9359                 fprintf(stderr,
9360                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9361                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
9362                 if (!ret)
9363                         ret = 1;
9364         }
9365         return ret;
9366 }
9367
9368 static int add_root_item_to_list(struct list_head *head,
9369                                   u64 objectid, u64 bytenr, u64 last_snapshot,
9370                                   u8 level, u8 drop_level,
9371                                   int level_size, struct btrfs_key *drop_key)
9372 {
9373
9374         struct root_item_record *ri_rec;
9375         ri_rec = malloc(sizeof(*ri_rec));
9376         if (!ri_rec)
9377                 return -ENOMEM;
9378         ri_rec->bytenr = bytenr;
9379         ri_rec->objectid = objectid;
9380         ri_rec->level = level;
9381         ri_rec->level_size = level_size;
9382         ri_rec->drop_level = drop_level;
9383         ri_rec->last_snapshot = last_snapshot;
9384         if (drop_key)
9385                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9386         list_add_tail(&ri_rec->list, head);
9387
9388         return 0;
9389 }
9390
9391 static void free_root_item_list(struct list_head *list)
9392 {
9393         struct root_item_record *ri_rec;
9394
9395         while (!list_empty(list)) {
9396                 ri_rec = list_first_entry(list, struct root_item_record,
9397                                           list);
9398                 list_del_init(&ri_rec->list);
9399                 free(ri_rec);
9400         }
9401 }
9402
9403 static int deal_root_from_list(struct list_head *list,
9404                                struct btrfs_root *root,
9405                                struct block_info *bits,
9406                                int bits_nr,
9407                                struct cache_tree *pending,
9408                                struct cache_tree *seen,
9409                                struct cache_tree *reada,
9410                                struct cache_tree *nodes,
9411                                struct cache_tree *extent_cache,
9412                                struct cache_tree *chunk_cache,
9413                                struct rb_root *dev_cache,
9414                                struct block_group_tree *block_group_cache,
9415                                struct device_extent_tree *dev_extent_cache)
9416 {
9417         int ret = 0;
9418         u64 last;
9419
9420         while (!list_empty(list)) {
9421                 struct root_item_record *rec;
9422                 struct extent_buffer *buf;
9423                 rec = list_entry(list->next,
9424                                  struct root_item_record, list);
9425                 last = 0;
9426                 buf = read_tree_block(root->fs_info->tree_root,
9427                                       rec->bytenr, rec->level_size, 0);
9428                 if (!extent_buffer_uptodate(buf)) {
9429                         free_extent_buffer(buf);
9430                         ret = -EIO;
9431                         break;
9432                 }
9433                 ret = add_root_to_pending(buf, extent_cache, pending,
9434                                     seen, nodes, rec->objectid);
9435                 if (ret < 0)
9436                         break;
9437                 /*
9438                  * To rebuild extent tree, we need deal with snapshot
9439                  * one by one, otherwise we deal with node firstly which
9440                  * can maximize readahead.
9441                  */
9442                 while (1) {
9443                         ret = run_next_block(root, bits, bits_nr, &last,
9444                                              pending, seen, reada, nodes,
9445                                              extent_cache, chunk_cache,
9446                                              dev_cache, block_group_cache,
9447                                              dev_extent_cache, rec);
9448                         if (ret != 0)
9449                                 break;
9450                 }
9451                 free_extent_buffer(buf);
9452                 list_del(&rec->list);
9453                 free(rec);
9454                 if (ret < 0)
9455                         break;
9456         }
9457         while (ret >= 0) {
9458                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9459                                      reada, nodes, extent_cache, chunk_cache,
9460                                      dev_cache, block_group_cache,
9461                                      dev_extent_cache, NULL);
9462                 if (ret != 0) {
9463                         if (ret > 0)
9464                                 ret = 0;
9465                         break;
9466                 }
9467         }
9468         return ret;
9469 }
9470
9471 static int check_chunks_and_extents(struct btrfs_root *root)
9472 {
9473         struct rb_root dev_cache;
9474         struct cache_tree chunk_cache;
9475         struct block_group_tree block_group_cache;
9476         struct device_extent_tree dev_extent_cache;
9477         struct cache_tree extent_cache;
9478         struct cache_tree seen;
9479         struct cache_tree pending;
9480         struct cache_tree reada;
9481         struct cache_tree nodes;
9482         struct extent_io_tree excluded_extents;
9483         struct cache_tree corrupt_blocks;
9484         struct btrfs_path path;
9485         struct btrfs_key key;
9486         struct btrfs_key found_key;
9487         int ret, err = 0;
9488         struct block_info *bits;
9489         int bits_nr;
9490         struct extent_buffer *leaf;
9491         int slot;
9492         struct btrfs_root_item ri;
9493         struct list_head dropping_trees;
9494         struct list_head normal_trees;
9495         struct btrfs_root *root1;
9496         u64 objectid;
9497         u32 level_size;
9498         u8 level;
9499
9500         dev_cache = RB_ROOT;
9501         cache_tree_init(&chunk_cache);
9502         block_group_tree_init(&block_group_cache);
9503         device_extent_tree_init(&dev_extent_cache);
9504
9505         cache_tree_init(&extent_cache);
9506         cache_tree_init(&seen);
9507         cache_tree_init(&pending);
9508         cache_tree_init(&nodes);
9509         cache_tree_init(&reada);
9510         cache_tree_init(&corrupt_blocks);
9511         extent_io_tree_init(&excluded_extents);
9512         INIT_LIST_HEAD(&dropping_trees);
9513         INIT_LIST_HEAD(&normal_trees);
9514
9515         if (repair) {
9516                 root->fs_info->excluded_extents = &excluded_extents;
9517                 root->fs_info->fsck_extent_cache = &extent_cache;
9518                 root->fs_info->free_extent_hook = free_extent_hook;
9519                 root->fs_info->corrupt_blocks = &corrupt_blocks;
9520         }
9521
9522         bits_nr = 1024;
9523         bits = malloc(bits_nr * sizeof(struct block_info));
9524         if (!bits) {
9525                 perror("malloc");
9526                 exit(1);
9527         }
9528
9529         if (ctx.progress_enabled) {
9530                 ctx.tp = TASK_EXTENTS;
9531                 task_start(ctx.info);
9532         }
9533
9534 again:
9535         root1 = root->fs_info->tree_root;
9536         level = btrfs_header_level(root1->node);
9537         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9538                                     root1->node->start, 0, level, 0,
9539                                     root1->nodesize, NULL);
9540         if (ret < 0)
9541                 goto out;
9542         root1 = root->fs_info->chunk_root;
9543         level = btrfs_header_level(root1->node);
9544         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9545                                     root1->node->start, 0, level, 0,
9546                                     root1->nodesize, NULL);
9547         if (ret < 0)
9548                 goto out;
9549         btrfs_init_path(&path);
9550         key.offset = 0;
9551         key.objectid = 0;
9552         key.type = BTRFS_ROOT_ITEM_KEY;
9553         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9554                                         &key, &path, 0, 0);
9555         if (ret < 0)
9556                 goto out;
9557         while(1) {
9558                 leaf = path.nodes[0];
9559                 slot = path.slots[0];
9560                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9561                         ret = btrfs_next_leaf(root, &path);
9562                         if (ret != 0)
9563                                 break;
9564                         leaf = path.nodes[0];
9565                         slot = path.slots[0];
9566                 }
9567                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9568                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9569                         unsigned long offset;
9570                         u64 last_snapshot;
9571
9572                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9573                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9574                         last_snapshot = btrfs_root_last_snapshot(&ri);
9575                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9576                                 level = btrfs_root_level(&ri);
9577                                 level_size = root->nodesize;
9578                                 ret = add_root_item_to_list(&normal_trees,
9579                                                 found_key.objectid,
9580                                                 btrfs_root_bytenr(&ri),
9581                                                 last_snapshot, level,
9582                                                 0, level_size, NULL);
9583                                 if (ret < 0)
9584                                         goto out;
9585                         } else {
9586                                 level = btrfs_root_level(&ri);
9587                                 level_size = root->nodesize;
9588                                 objectid = found_key.objectid;
9589                                 btrfs_disk_key_to_cpu(&found_key,
9590                                                       &ri.drop_progress);
9591                                 ret = add_root_item_to_list(&dropping_trees,
9592                                                 objectid,
9593                                                 btrfs_root_bytenr(&ri),
9594                                                 last_snapshot, level,
9595                                                 ri.drop_level,
9596                                                 level_size, &found_key);
9597                                 if (ret < 0)
9598                                         goto out;
9599                         }
9600                 }
9601                 path.slots[0]++;
9602         }
9603         btrfs_release_path(&path);
9604
9605         /*
9606          * check_block can return -EAGAIN if it fixes something, please keep
9607          * this in mind when dealing with return values from these functions, if
9608          * we get -EAGAIN we want to fall through and restart the loop.
9609          */
9610         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9611                                   &seen, &reada, &nodes, &extent_cache,
9612                                   &chunk_cache, &dev_cache, &block_group_cache,
9613                                   &dev_extent_cache);
9614         if (ret < 0) {
9615                 if (ret == -EAGAIN)
9616                         goto loop;
9617                 goto out;
9618         }
9619         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9620                                   &pending, &seen, &reada, &nodes,
9621                                   &extent_cache, &chunk_cache, &dev_cache,
9622                                   &block_group_cache, &dev_extent_cache);
9623         if (ret < 0) {
9624                 if (ret == -EAGAIN)
9625                         goto loop;
9626                 goto out;
9627         }
9628
9629         ret = check_chunks(&chunk_cache, &block_group_cache,
9630                            &dev_extent_cache, NULL, NULL, NULL, 0);
9631         if (ret) {
9632                 if (ret == -EAGAIN)
9633                         goto loop;
9634                 err = ret;
9635         }
9636
9637         ret = check_extent_refs(root, &extent_cache);
9638         if (ret < 0) {
9639                 if (ret == -EAGAIN)
9640                         goto loop;
9641                 goto out;
9642         }
9643
9644         ret = check_devices(&dev_cache, &dev_extent_cache);
9645         if (ret && err)
9646                 ret = err;
9647
9648 out:
9649         task_stop(ctx.info);
9650         if (repair) {
9651                 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9652                 extent_io_tree_cleanup(&excluded_extents);
9653                 root->fs_info->fsck_extent_cache = NULL;
9654                 root->fs_info->free_extent_hook = NULL;
9655                 root->fs_info->corrupt_blocks = NULL;
9656                 root->fs_info->excluded_extents = NULL;
9657         }
9658         free(bits);
9659         free_chunk_cache_tree(&chunk_cache);
9660         free_device_cache_tree(&dev_cache);
9661         free_block_group_tree(&block_group_cache);
9662         free_device_extent_tree(&dev_extent_cache);
9663         free_extent_cache_tree(&seen);
9664         free_extent_cache_tree(&pending);
9665         free_extent_cache_tree(&reada);
9666         free_extent_cache_tree(&nodes);
9667         return ret;
9668 loop:
9669         free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9670         free_extent_cache_tree(&seen);
9671         free_extent_cache_tree(&pending);
9672         free_extent_cache_tree(&reada);
9673         free_extent_cache_tree(&nodes);
9674         free_chunk_cache_tree(&chunk_cache);
9675         free_block_group_tree(&block_group_cache);
9676         free_device_cache_tree(&dev_cache);
9677         free_device_extent_tree(&dev_extent_cache);
9678         free_extent_record_cache(root->fs_info, &extent_cache);
9679         free_root_item_list(&normal_trees);
9680         free_root_item_list(&dropping_trees);
9681         extent_io_tree_cleanup(&excluded_extents);
9682         goto again;
9683 }
9684
9685 /*
9686  * Check backrefs of a tree block given by @bytenr or @eb.
9687  *
9688  * @root:       the root containing the @bytenr or @eb
9689  * @eb:         tree block extent buffer, can be NULL
9690  * @bytenr:     bytenr of the tree block to search
9691  * @level:      tree level of the tree block
9692  * @owner:      owner of the tree block
9693  *
9694  * Return >0 for any error found and output error message
9695  * Return 0 for no error found
9696  */
9697 static int check_tree_block_ref(struct btrfs_root *root,
9698                                 struct extent_buffer *eb, u64 bytenr,
9699                                 int level, u64 owner)
9700 {
9701         struct btrfs_key key;
9702         struct btrfs_root *extent_root = root->fs_info->extent_root;
9703         struct btrfs_path path;
9704         struct btrfs_extent_item *ei;
9705         struct btrfs_extent_inline_ref *iref;
9706         struct extent_buffer *leaf;
9707         unsigned long end;
9708         unsigned long ptr;
9709         int slot;
9710         int skinny_level;
9711         int type;
9712         u32 nodesize = root->nodesize;
9713         u32 item_size;
9714         u64 offset;
9715         int found_ref = 0;
9716         int err = 0;
9717         int ret;
9718
9719         btrfs_init_path(&path);
9720         key.objectid = bytenr;
9721         if (btrfs_fs_incompat(root->fs_info,
9722                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
9723                 key.type = BTRFS_METADATA_ITEM_KEY;
9724         else
9725                 key.type = BTRFS_EXTENT_ITEM_KEY;
9726         key.offset = (u64)-1;
9727
9728         /* Search for the backref in extent tree */
9729         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9730         if (ret < 0) {
9731                 err |= BACKREF_MISSING;
9732                 goto out;
9733         }
9734         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9735         if (ret) {
9736                 err |= BACKREF_MISSING;
9737                 goto out;
9738         }
9739
9740         leaf = path.nodes[0];
9741         slot = path.slots[0];
9742         btrfs_item_key_to_cpu(leaf, &key, slot);
9743
9744         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9745
9746         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9747                 skinny_level = (int)key.offset;
9748                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9749         } else {
9750                 struct btrfs_tree_block_info *info;
9751
9752                 info = (struct btrfs_tree_block_info *)(ei + 1);
9753                 skinny_level = btrfs_tree_block_level(leaf, info);
9754                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9755         }
9756
9757         if (eb) {
9758                 u64 header_gen;
9759                 u64 extent_gen;
9760
9761                 if (!(btrfs_extent_flags(leaf, ei) &
9762                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9763                         error(
9764                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
9765                                 key.objectid, nodesize,
9766                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
9767                         err = BACKREF_MISMATCH;
9768                 }
9769                 header_gen = btrfs_header_generation(eb);
9770                 extent_gen = btrfs_extent_generation(leaf, ei);
9771                 if (header_gen != extent_gen) {
9772                         error(
9773         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
9774                                 key.objectid, nodesize, header_gen,
9775                                 extent_gen);
9776                         err = BACKREF_MISMATCH;
9777                 }
9778                 if (level != skinny_level) {
9779                         error(
9780                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
9781                                 key.objectid, nodesize, level, skinny_level);
9782                         err = BACKREF_MISMATCH;
9783                 }
9784                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
9785                         error(
9786                         "extent[%llu %u] is referred by other roots than %llu",
9787                                 key.objectid, nodesize, root->objectid);
9788                         err = BACKREF_MISMATCH;
9789                 }
9790         }
9791
9792         /*
9793          * Iterate the extent/metadata item to find the exact backref
9794          */
9795         item_size = btrfs_item_size_nr(leaf, slot);
9796         ptr = (unsigned long)iref;
9797         end = (unsigned long)ei + item_size;
9798         while (ptr < end) {
9799                 iref = (struct btrfs_extent_inline_ref *)ptr;
9800                 type = btrfs_extent_inline_ref_type(leaf, iref);
9801                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9802
9803                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9804                         (offset == root->objectid || offset == owner)) {
9805                         found_ref = 1;
9806                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
9807                         /* Check if the backref points to valid referencer */
9808                         found_ref = !check_tree_block_ref(root, NULL, offset,
9809                                                           level + 1, owner);
9810                 }
9811
9812                 if (found_ref)
9813                         break;
9814                 ptr += btrfs_extent_inline_ref_size(type);
9815         }
9816
9817         /*
9818          * Inlined extent item doesn't have what we need, check
9819          * TREE_BLOCK_REF_KEY
9820          */
9821         if (!found_ref) {
9822                 btrfs_release_path(&path);
9823                 key.objectid = bytenr;
9824                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
9825                 key.offset = root->objectid;
9826
9827                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9828                 if (!ret)
9829                         found_ref = 1;
9830         }
9831         if (!found_ref)
9832                 err |= BACKREF_MISSING;
9833 out:
9834         btrfs_release_path(&path);
9835         if (eb && (err & BACKREF_MISSING))
9836                 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
9837                         bytenr, nodesize, owner, level);
9838         return err;
9839 }
9840
9841 /*
9842  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
9843  *
9844  * Return >0 any error found and output error message
9845  * Return 0 for no error found
9846  */
9847 static int check_extent_data_item(struct btrfs_root *root,
9848                                   struct extent_buffer *eb, int slot)
9849 {
9850         struct btrfs_file_extent_item *fi;
9851         struct btrfs_path path;
9852         struct btrfs_root *extent_root = root->fs_info->extent_root;
9853         struct btrfs_key fi_key;
9854         struct btrfs_key dbref_key;
9855         struct extent_buffer *leaf;
9856         struct btrfs_extent_item *ei;
9857         struct btrfs_extent_inline_ref *iref;
9858         struct btrfs_extent_data_ref *dref;
9859         u64 owner;
9860         u64 file_extent_gen;
9861         u64 disk_bytenr;
9862         u64 disk_num_bytes;
9863         u64 extent_num_bytes;
9864         u64 extent_flags;
9865         u64 extent_gen;
9866         u32 item_size;
9867         unsigned long end;
9868         unsigned long ptr;
9869         int type;
9870         u64 ref_root;
9871         int found_dbackref = 0;
9872         int err = 0;
9873         int ret;
9874
9875         btrfs_item_key_to_cpu(eb, &fi_key, slot);
9876         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
9877         file_extent_gen = btrfs_file_extent_generation(eb, fi);
9878
9879         /* Nothing to check for hole and inline data extents */
9880         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
9881             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
9882                 return 0;
9883
9884         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
9885         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
9886         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
9887
9888         /* Check unaligned disk_num_bytes and num_bytes */
9889         if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
9890                 error(
9891 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
9892                         fi_key.objectid, fi_key.offset, disk_num_bytes,
9893                         root->sectorsize);
9894                 err |= BYTES_UNALIGNED;
9895         } else {
9896                 data_bytes_allocated += disk_num_bytes;
9897         }
9898         if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
9899                 error(
9900 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
9901                         fi_key.objectid, fi_key.offset, extent_num_bytes,
9902                         root->sectorsize);
9903                 err |= BYTES_UNALIGNED;
9904         } else {
9905                 data_bytes_referenced += extent_num_bytes;
9906         }
9907         owner = btrfs_header_owner(eb);
9908
9909         /* Check the extent item of the file extent in extent tree */
9910         btrfs_init_path(&path);
9911         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9912         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
9913         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
9914
9915         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
9916         if (ret) {
9917                 err |= BACKREF_MISSING;
9918                 goto error;
9919         }
9920
9921         leaf = path.nodes[0];
9922         slot = path.slots[0];
9923         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9924
9925         extent_flags = btrfs_extent_flags(leaf, ei);
9926         extent_gen = btrfs_extent_generation(leaf, ei);
9927
9928         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
9929                 error(
9930                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
9931                     disk_bytenr, disk_num_bytes,
9932                     BTRFS_EXTENT_FLAG_DATA);
9933                 err |= BACKREF_MISMATCH;
9934         }
9935
9936         if (file_extent_gen < extent_gen) {
9937                 error(
9938 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
9939                         disk_bytenr, disk_num_bytes, file_extent_gen,
9940                         extent_gen);
9941                 err |= BACKREF_MISMATCH;
9942         }
9943
9944         /* Check data backref inside that extent item */
9945         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
9946         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9947         ptr = (unsigned long)iref;
9948         end = (unsigned long)ei + item_size;
9949         while (ptr < end) {
9950                 iref = (struct btrfs_extent_inline_ref *)ptr;
9951                 type = btrfs_extent_inline_ref_type(leaf, iref);
9952                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9953
9954                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
9955                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
9956                         if (ref_root == owner || ref_root == root->objectid)
9957                                 found_dbackref = 1;
9958                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
9959                         found_dbackref = !check_tree_block_ref(root, NULL,
9960                                 btrfs_extent_inline_ref_offset(leaf, iref),
9961                                 0, owner);
9962                 }
9963
9964                 if (found_dbackref)
9965                         break;
9966                 ptr += btrfs_extent_inline_ref_size(type);
9967         }
9968
9969         /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
9970         if (!found_dbackref) {
9971                 btrfs_release_path(&path);
9972
9973                 btrfs_init_path(&path);
9974                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9975                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
9976                 dbref_key.offset = hash_extent_data_ref(root->objectid,
9977                                 fi_key.objectid, fi_key.offset);
9978
9979                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
9980                                         &dbref_key, &path, 0, 0);
9981                 if (!ret)
9982                         found_dbackref = 1;
9983         }
9984
9985         if (!found_dbackref)
9986                 err |= BACKREF_MISSING;
9987 error:
9988         btrfs_release_path(&path);
9989         if (err & BACKREF_MISSING) {
9990                 error("data extent[%llu %llu] backref lost",
9991                       disk_bytenr, disk_num_bytes);
9992         }
9993         return err;
9994 }
9995
9996 /*
9997  * Get real tree block level for the case like shared block
9998  * Return >= 0 as tree level
9999  * Return <0 for error
10000  */
10001 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10002 {
10003         struct extent_buffer *eb;
10004         struct btrfs_path path;
10005         struct btrfs_key key;
10006         struct btrfs_extent_item *ei;
10007         u64 flags;
10008         u64 transid;
10009         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10010         u8 backref_level;
10011         u8 header_level;
10012         int ret;
10013
10014         /* Search extent tree for extent generation and level */
10015         key.objectid = bytenr;
10016         key.type = BTRFS_METADATA_ITEM_KEY;
10017         key.offset = (u64)-1;
10018
10019         btrfs_init_path(&path);
10020         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10021         if (ret < 0)
10022                 goto release_out;
10023         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10024         if (ret < 0)
10025                 goto release_out;
10026         if (ret > 0) {
10027                 ret = -ENOENT;
10028                 goto release_out;
10029         }
10030
10031         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10032         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10033                             struct btrfs_extent_item);
10034         flags = btrfs_extent_flags(path.nodes[0], ei);
10035         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10036                 ret = -ENOENT;
10037                 goto release_out;
10038         }
10039
10040         /* Get transid for later read_tree_block() check */
10041         transid = btrfs_extent_generation(path.nodes[0], ei);
10042
10043         /* Get backref level as one source */
10044         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10045                 backref_level = key.offset;
10046         } else {
10047                 struct btrfs_tree_block_info *info;
10048
10049                 info = (struct btrfs_tree_block_info *)(ei + 1);
10050                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10051         }
10052         btrfs_release_path(&path);
10053
10054         /* Get level from tree block as an alternative source */
10055         eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
10056         if (!extent_buffer_uptodate(eb)) {
10057                 free_extent_buffer(eb);
10058                 return -EIO;
10059         }
10060         header_level = btrfs_header_level(eb);
10061         free_extent_buffer(eb);
10062
10063         if (header_level != backref_level)
10064                 return -EIO;
10065         return header_level;
10066
10067 release_out:
10068         btrfs_release_path(&path);
10069         return ret;
10070 }
10071
10072 /*
10073  * Check if a tree block backref is valid (points to a valid tree block)
10074  * if level == -1, level will be resolved
10075  * Return >0 for any error found and print error message
10076  */
10077 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10078                                     u64 bytenr, int level)
10079 {
10080         struct btrfs_root *root;
10081         struct btrfs_key key;
10082         struct btrfs_path path;
10083         struct extent_buffer *eb;
10084         struct extent_buffer *node;
10085         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10086         int err = 0;
10087         int ret;
10088
10089         /* Query level for level == -1 special case */
10090         if (level == -1)
10091                 level = query_tree_block_level(fs_info, bytenr);
10092         if (level < 0) {
10093                 err |= REFERENCER_MISSING;
10094                 goto out;
10095         }
10096
10097         key.objectid = root_id;
10098         key.type = BTRFS_ROOT_ITEM_KEY;
10099         key.offset = (u64)-1;
10100
10101         root = btrfs_read_fs_root(fs_info, &key);
10102         if (IS_ERR(root)) {
10103                 err |= REFERENCER_MISSING;
10104                 goto out;
10105         }
10106
10107         /* Read out the tree block to get item/node key */
10108         eb = read_tree_block(root, bytenr, root->nodesize, 0);
10109         if (!extent_buffer_uptodate(eb)) {
10110                 err |= REFERENCER_MISSING;
10111                 free_extent_buffer(eb);
10112                 goto out;
10113         }
10114
10115         /* Empty tree, no need to check key */
10116         if (!btrfs_header_nritems(eb) && !level) {
10117                 free_extent_buffer(eb);
10118                 goto out;
10119         }
10120
10121         if (level)
10122                 btrfs_node_key_to_cpu(eb, &key, 0);
10123         else
10124                 btrfs_item_key_to_cpu(eb, &key, 0);
10125
10126         free_extent_buffer(eb);
10127
10128         btrfs_init_path(&path);
10129         path.lowest_level = level;
10130         /* Search with the first key, to ensure we can reach it */
10131         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10132         if (ret < 0) {
10133                 err |= REFERENCER_MISSING;
10134                 goto release_out;
10135         }
10136
10137         node = path.nodes[level];
10138         if (btrfs_header_bytenr(node) != bytenr) {
10139                 error(
10140         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10141                         bytenr, nodesize, bytenr,
10142                         btrfs_header_bytenr(node));
10143                 err |= REFERENCER_MISMATCH;
10144         }
10145         if (btrfs_header_level(node) != level) {
10146                 error(
10147         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10148                         bytenr, nodesize, level,
10149                         btrfs_header_level(node));
10150                 err |= REFERENCER_MISMATCH;
10151         }
10152
10153 release_out:
10154         btrfs_release_path(&path);
10155 out:
10156         if (err & REFERENCER_MISSING) {
10157                 if (level < 0)
10158                         error("extent [%llu %d] lost referencer (owner: %llu)",
10159                                 bytenr, nodesize, root_id);
10160                 else
10161                         error(
10162                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10163                                 bytenr, nodesize, root_id, level);
10164         }
10165
10166         return err;
10167 }
10168
10169 /*
10170  * Check referencer for shared block backref
10171  * If level == -1, this function will resolve the level.
10172  */
10173 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10174                                      u64 parent, u64 bytenr, int level)
10175 {
10176         struct extent_buffer *eb;
10177         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10178         u32 nr;
10179         int found_parent = 0;
10180         int i;
10181
10182         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10183         if (!extent_buffer_uptodate(eb))
10184                 goto out;
10185
10186         if (level == -1)
10187                 level = query_tree_block_level(fs_info, bytenr);
10188         if (level < 0)
10189                 goto out;
10190
10191         if (level + 1 != btrfs_header_level(eb))
10192                 goto out;
10193
10194         nr = btrfs_header_nritems(eb);
10195         for (i = 0; i < nr; i++) {
10196                 if (bytenr == btrfs_node_blockptr(eb, i)) {
10197                         found_parent = 1;
10198                         break;
10199                 }
10200         }
10201 out:
10202         free_extent_buffer(eb);
10203         if (!found_parent) {
10204                 error(
10205         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10206                         bytenr, nodesize, parent, level);
10207                 return REFERENCER_MISSING;
10208         }
10209         return 0;
10210 }
10211
10212 /*
10213  * Check referencer for normal (inlined) data ref
10214  * If len == 0, it will be resolved by searching in extent tree
10215  */
10216 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10217                                      u64 root_id, u64 objectid, u64 offset,
10218                                      u64 bytenr, u64 len, u32 count)
10219 {
10220         struct btrfs_root *root;
10221         struct btrfs_root *extent_root = fs_info->extent_root;
10222         struct btrfs_key key;
10223         struct btrfs_path path;
10224         struct extent_buffer *leaf;
10225         struct btrfs_file_extent_item *fi;
10226         u32 found_count = 0;
10227         int slot;
10228         int ret = 0;
10229
10230         if (!len) {
10231                 key.objectid = bytenr;
10232                 key.type = BTRFS_EXTENT_ITEM_KEY;
10233                 key.offset = (u64)-1;
10234
10235                 btrfs_init_path(&path);
10236                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10237                 if (ret < 0)
10238                         goto out;
10239                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10240                 if (ret)
10241                         goto out;
10242                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10243                 if (key.objectid != bytenr ||
10244                     key.type != BTRFS_EXTENT_ITEM_KEY)
10245                         goto out;
10246                 len = key.offset;
10247                 btrfs_release_path(&path);
10248         }
10249         key.objectid = root_id;
10250         key.type = BTRFS_ROOT_ITEM_KEY;
10251         key.offset = (u64)-1;
10252         btrfs_init_path(&path);
10253
10254         root = btrfs_read_fs_root(fs_info, &key);
10255         if (IS_ERR(root))
10256                 goto out;
10257
10258         key.objectid = objectid;
10259         key.type = BTRFS_EXTENT_DATA_KEY;
10260         /*
10261          * It can be nasty as data backref offset is
10262          * file offset - file extent offset, which is smaller or
10263          * equal to original backref offset.  The only special case is
10264          * overflow.  So we need to special check and do further search.
10265          */
10266         key.offset = offset & (1ULL << 63) ? 0 : offset;
10267
10268         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10269         if (ret < 0)
10270                 goto out;
10271
10272         /*
10273          * Search afterwards to get correct one
10274          * NOTE: As we must do a comprehensive check on the data backref to
10275          * make sure the dref count also matches, we must iterate all file
10276          * extents for that inode.
10277          */
10278         while (1) {
10279                 leaf = path.nodes[0];
10280                 slot = path.slots[0];
10281
10282                 btrfs_item_key_to_cpu(leaf, &key, slot);
10283                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10284                         break;
10285                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10286                 /*
10287                  * Except normal disk bytenr and disk num bytes, we still
10288                  * need to do extra check on dbackref offset as
10289                  * dbackref offset = file_offset - file_extent_offset
10290                  */
10291                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10292                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10293                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10294                     offset)
10295                         found_count++;
10296
10297                 ret = btrfs_next_item(root, &path);
10298                 if (ret)
10299                         break;
10300         }
10301 out:
10302         btrfs_release_path(&path);
10303         if (found_count != count) {
10304                 error(
10305 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10306                         bytenr, len, root_id, objectid, offset, count, found_count);
10307                 return REFERENCER_MISSING;
10308         }
10309         return 0;
10310 }
10311
10312 /*
10313  * Check if the referencer of a shared data backref exists
10314  */
10315 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10316                                      u64 parent, u64 bytenr)
10317 {
10318         struct extent_buffer *eb;
10319         struct btrfs_key key;
10320         struct btrfs_file_extent_item *fi;
10321         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10322         u32 nr;
10323         int found_parent = 0;
10324         int i;
10325
10326         eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10327         if (!extent_buffer_uptodate(eb))
10328                 goto out;
10329
10330         nr = btrfs_header_nritems(eb);
10331         for (i = 0; i < nr; i++) {
10332                 btrfs_item_key_to_cpu(eb, &key, i);
10333                 if (key.type != BTRFS_EXTENT_DATA_KEY)
10334                         continue;
10335
10336                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10337                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10338                         continue;
10339
10340                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10341                         found_parent = 1;
10342                         break;
10343                 }
10344         }
10345
10346 out:
10347         free_extent_buffer(eb);
10348         if (!found_parent) {
10349                 error("shared extent %llu referencer lost (parent: %llu)",
10350                         bytenr, parent);
10351                 return REFERENCER_MISSING;
10352         }
10353         return 0;
10354 }
10355
10356 /*
10357  * This function will check a given extent item, including its backref and
10358  * itself (like crossing stripe boundary and type)
10359  *
10360  * Since we don't use extent_record anymore, introduce new error bit
10361  */
10362 static int check_extent_item(struct btrfs_fs_info *fs_info,
10363                              struct extent_buffer *eb, int slot)
10364 {
10365         struct btrfs_extent_item *ei;
10366         struct btrfs_extent_inline_ref *iref;
10367         struct btrfs_extent_data_ref *dref;
10368         unsigned long end;
10369         unsigned long ptr;
10370         int type;
10371         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10372         u32 item_size = btrfs_item_size_nr(eb, slot);
10373         u64 flags;
10374         u64 offset;
10375         int metadata = 0;
10376         int level;
10377         struct btrfs_key key;
10378         int ret;
10379         int err = 0;
10380
10381         btrfs_item_key_to_cpu(eb, &key, slot);
10382         if (key.type == BTRFS_EXTENT_ITEM_KEY)
10383                 bytes_used += key.offset;
10384         else
10385                 bytes_used += nodesize;
10386
10387         if (item_size < sizeof(*ei)) {
10388                 /*
10389                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10390                  * old thing when on disk format is still un-determined.
10391                  * No need to care about it anymore
10392                  */
10393                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10394                 return -ENOTTY;
10395         }
10396
10397         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10398         flags = btrfs_extent_flags(eb, ei);
10399
10400         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10401                 metadata = 1;
10402         if (metadata && check_crossing_stripes(global_info, key.objectid,
10403                                                eb->len)) {
10404                 error("bad metadata [%llu, %llu) crossing stripe boundary",
10405                       key.objectid, key.objectid + nodesize);
10406                 err |= CROSSING_STRIPE_BOUNDARY;
10407         }
10408
10409         ptr = (unsigned long)(ei + 1);
10410
10411         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10412                 /* Old EXTENT_ITEM metadata */
10413                 struct btrfs_tree_block_info *info;
10414
10415                 info = (struct btrfs_tree_block_info *)ptr;
10416                 level = btrfs_tree_block_level(eb, info);
10417                 ptr += sizeof(struct btrfs_tree_block_info);
10418         } else {
10419                 /* New METADATA_ITEM */
10420                 level = key.offset;
10421         }
10422         end = (unsigned long)ei + item_size;
10423
10424         if (ptr >= end) {
10425                 err |= ITEM_SIZE_MISMATCH;
10426                 goto out;
10427         }
10428
10429         /* Now check every backref in this extent item */
10430 next:
10431         iref = (struct btrfs_extent_inline_ref *)ptr;
10432         type = btrfs_extent_inline_ref_type(eb, iref);
10433         offset = btrfs_extent_inline_ref_offset(eb, iref);
10434         switch (type) {
10435         case BTRFS_TREE_BLOCK_REF_KEY:
10436                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10437                                                level);
10438                 err |= ret;
10439                 break;
10440         case BTRFS_SHARED_BLOCK_REF_KEY:
10441                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10442                                                  level);
10443                 err |= ret;
10444                 break;
10445         case BTRFS_EXTENT_DATA_REF_KEY:
10446                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10447                 ret = check_extent_data_backref(fs_info,
10448                                 btrfs_extent_data_ref_root(eb, dref),
10449                                 btrfs_extent_data_ref_objectid(eb, dref),
10450                                 btrfs_extent_data_ref_offset(eb, dref),
10451                                 key.objectid, key.offset,
10452                                 btrfs_extent_data_ref_count(eb, dref));
10453                 err |= ret;
10454                 break;
10455         case BTRFS_SHARED_DATA_REF_KEY:
10456                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10457                 err |= ret;
10458                 break;
10459         default:
10460                 error("extent[%llu %d %llu] has unknown ref type: %d",
10461                         key.objectid, key.type, key.offset, type);
10462                 err |= UNKNOWN_TYPE;
10463                 goto out;
10464         }
10465
10466         ptr += btrfs_extent_inline_ref_size(type);
10467         if (ptr < end)
10468                 goto next;
10469
10470 out:
10471         return err;
10472 }
10473
10474 /*
10475  * Check if a dev extent item is referred correctly by its chunk
10476  */
10477 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10478                                  struct extent_buffer *eb, int slot)
10479 {
10480         struct btrfs_root *chunk_root = fs_info->chunk_root;
10481         struct btrfs_dev_extent *ptr;
10482         struct btrfs_path path;
10483         struct btrfs_key chunk_key;
10484         struct btrfs_key devext_key;
10485         struct btrfs_chunk *chunk;
10486         struct extent_buffer *l;
10487         int num_stripes;
10488         u64 length;
10489         int i;
10490         int found_chunk = 0;
10491         int ret;
10492
10493         btrfs_item_key_to_cpu(eb, &devext_key, slot);
10494         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10495         length = btrfs_dev_extent_length(eb, ptr);
10496
10497         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10498         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10499         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10500
10501         btrfs_init_path(&path);
10502         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10503         if (ret)
10504                 goto out;
10505
10506         l = path.nodes[0];
10507         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10508         if (btrfs_chunk_length(l, chunk) != length)
10509                 goto out;
10510
10511         num_stripes = btrfs_chunk_num_stripes(l, chunk);
10512         for (i = 0; i < num_stripes; i++) {
10513                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10514                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10515
10516                 if (devid == devext_key.objectid &&
10517                     offset == devext_key.offset) {
10518                         found_chunk = 1;
10519                         break;
10520                 }
10521         }
10522 out:
10523         btrfs_release_path(&path);
10524         if (!found_chunk) {
10525                 error(
10526                 "device extent[%llu, %llu, %llu] did not find the related chunk",
10527                         devext_key.objectid, devext_key.offset, length);
10528                 return REFERENCER_MISSING;
10529         }
10530         return 0;
10531 }
10532
10533 /*
10534  * Check if the used space is correct with the dev item
10535  */
10536 static int check_dev_item(struct btrfs_fs_info *fs_info,
10537                           struct extent_buffer *eb, int slot)
10538 {
10539         struct btrfs_root *dev_root = fs_info->dev_root;
10540         struct btrfs_dev_item *dev_item;
10541         struct btrfs_path path;
10542         struct btrfs_key key;
10543         struct btrfs_dev_extent *ptr;
10544         u64 dev_id;
10545         u64 used;
10546         u64 total = 0;
10547         int ret;
10548
10549         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10550         dev_id = btrfs_device_id(eb, dev_item);
10551         used = btrfs_device_bytes_used(eb, dev_item);
10552
10553         key.objectid = dev_id;
10554         key.type = BTRFS_DEV_EXTENT_KEY;
10555         key.offset = 0;
10556
10557         btrfs_init_path(&path);
10558         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10559         if (ret < 0) {
10560                 btrfs_item_key_to_cpu(eb, &key, slot);
10561                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10562                         key.objectid, key.type, key.offset);
10563                 btrfs_release_path(&path);
10564                 return REFERENCER_MISSING;
10565         }
10566
10567         /* Iterate dev_extents to calculate the used space of a device */
10568         while (1) {
10569                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10570
10571                 if (key.objectid > dev_id)
10572                         break;
10573                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10574                         goto next;
10575
10576                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10577                                      struct btrfs_dev_extent);
10578                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10579 next:
10580                 ret = btrfs_next_item(dev_root, &path);
10581                 if (ret)
10582                         break;
10583         }
10584         btrfs_release_path(&path);
10585
10586         if (used != total) {
10587                 btrfs_item_key_to_cpu(eb, &key, slot);
10588                 error(
10589 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10590                         total, used, BTRFS_ROOT_TREE_OBJECTID,
10591                         BTRFS_DEV_EXTENT_KEY, dev_id);
10592                 return ACCOUNTING_MISMATCH;
10593         }
10594         return 0;
10595 }
10596
10597 /*
10598  * Check a block group item with its referener (chunk) and its used space
10599  * with extent/metadata item
10600  */
10601 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10602                                   struct extent_buffer *eb, int slot)
10603 {
10604         struct btrfs_root *extent_root = fs_info->extent_root;
10605         struct btrfs_root *chunk_root = fs_info->chunk_root;
10606         struct btrfs_block_group_item *bi;
10607         struct btrfs_block_group_item bg_item;
10608         struct btrfs_path path;
10609         struct btrfs_key bg_key;
10610         struct btrfs_key chunk_key;
10611         struct btrfs_key extent_key;
10612         struct btrfs_chunk *chunk;
10613         struct extent_buffer *leaf;
10614         struct btrfs_extent_item *ei;
10615         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10616         u64 flags;
10617         u64 bg_flags;
10618         u64 used;
10619         u64 total = 0;
10620         int ret;
10621         int err = 0;
10622
10623         btrfs_item_key_to_cpu(eb, &bg_key, slot);
10624         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10625         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10626         used = btrfs_block_group_used(&bg_item);
10627         bg_flags = btrfs_block_group_flags(&bg_item);
10628
10629         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10630         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10631         chunk_key.offset = bg_key.objectid;
10632
10633         btrfs_init_path(&path);
10634         /* Search for the referencer chunk */
10635         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10636         if (ret) {
10637                 error(
10638                 "block group[%llu %llu] did not find the related chunk item",
10639                         bg_key.objectid, bg_key.offset);
10640                 err |= REFERENCER_MISSING;
10641         } else {
10642                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10643                                         struct btrfs_chunk);
10644                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10645                                                 bg_key.offset) {
10646                         error(
10647         "block group[%llu %llu] related chunk item length does not match",
10648                                 bg_key.objectid, bg_key.offset);
10649                         err |= REFERENCER_MISMATCH;
10650                 }
10651         }
10652         btrfs_release_path(&path);
10653
10654         /* Search from the block group bytenr */
10655         extent_key.objectid = bg_key.objectid;
10656         extent_key.type = 0;
10657         extent_key.offset = 0;
10658
10659         btrfs_init_path(&path);
10660         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10661         if (ret < 0)
10662                 goto out;
10663
10664         /* Iterate extent tree to account used space */
10665         while (1) {
10666                 leaf = path.nodes[0];
10667                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10668                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10669                         break;
10670
10671                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10672                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10673                         goto next;
10674                 if (extent_key.objectid < bg_key.objectid)
10675                         goto next;
10676
10677                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10678                         total += nodesize;
10679                 else
10680                         total += extent_key.offset;
10681
10682                 ei = btrfs_item_ptr(leaf, path.slots[0],
10683                                     struct btrfs_extent_item);
10684                 flags = btrfs_extent_flags(leaf, ei);
10685                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10686                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10687                                 error(
10688                         "bad extent[%llu, %llu) type mismatch with chunk",
10689                                         extent_key.objectid,
10690                                         extent_key.objectid + extent_key.offset);
10691                                 err |= CHUNK_TYPE_MISMATCH;
10692                         }
10693                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10694                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
10695                                     BTRFS_BLOCK_GROUP_METADATA))) {
10696                                 error(
10697                         "bad extent[%llu, %llu) type mismatch with chunk",
10698                                         extent_key.objectid,
10699                                         extent_key.objectid + nodesize);
10700                                 err |= CHUNK_TYPE_MISMATCH;
10701                         }
10702                 }
10703 next:
10704                 ret = btrfs_next_item(extent_root, &path);
10705                 if (ret)
10706                         break;
10707         }
10708
10709 out:
10710         btrfs_release_path(&path);
10711
10712         if (total != used) {
10713                 error(
10714                 "block group[%llu %llu] used %llu but extent items used %llu",
10715                         bg_key.objectid, bg_key.offset, used, total);
10716                 err |= ACCOUNTING_MISMATCH;
10717         }
10718         return err;
10719 }
10720
10721 /*
10722  * Check a chunk item.
10723  * Including checking all referred dev_extents and block group
10724  */
10725 static int check_chunk_item(struct btrfs_fs_info *fs_info,
10726                             struct extent_buffer *eb, int slot)
10727 {
10728         struct btrfs_root *extent_root = fs_info->extent_root;
10729         struct btrfs_root *dev_root = fs_info->dev_root;
10730         struct btrfs_path path;
10731         struct btrfs_key chunk_key;
10732         struct btrfs_key bg_key;
10733         struct btrfs_key devext_key;
10734         struct btrfs_chunk *chunk;
10735         struct extent_buffer *leaf;
10736         struct btrfs_block_group_item *bi;
10737         struct btrfs_block_group_item bg_item;
10738         struct btrfs_dev_extent *ptr;
10739         u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
10740         u64 length;
10741         u64 chunk_end;
10742         u64 type;
10743         u64 profile;
10744         int num_stripes;
10745         u64 offset;
10746         u64 objectid;
10747         int i;
10748         int ret;
10749         int err = 0;
10750
10751         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
10752         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
10753         length = btrfs_chunk_length(eb, chunk);
10754         chunk_end = chunk_key.offset + length;
10755         if (!IS_ALIGNED(length, sectorsize)) {
10756                 error("chunk[%llu %llu) not aligned to %u",
10757                         chunk_key.offset, chunk_end, sectorsize);
10758                 err |= BYTES_UNALIGNED;
10759                 goto out;
10760         }
10761
10762         type = btrfs_chunk_type(eb, chunk);
10763         profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
10764         if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
10765                 error("chunk[%llu %llu) has no chunk type",
10766                         chunk_key.offset, chunk_end);
10767                 err |= UNKNOWN_TYPE;
10768         }
10769         if (profile && (profile & (profile - 1))) {
10770                 error("chunk[%llu %llu) multiple profiles detected: %llx",
10771                         chunk_key.offset, chunk_end, profile);
10772                 err |= UNKNOWN_TYPE;
10773         }
10774
10775         bg_key.objectid = chunk_key.offset;
10776         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
10777         bg_key.offset = length;
10778
10779         btrfs_init_path(&path);
10780         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
10781         if (ret) {
10782                 error(
10783                 "chunk[%llu %llu) did not find the related block group item",
10784                         chunk_key.offset, chunk_end);
10785                 err |= REFERENCER_MISSING;
10786         } else{
10787                 leaf = path.nodes[0];
10788                 bi = btrfs_item_ptr(leaf, path.slots[0],
10789                                     struct btrfs_block_group_item);
10790                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
10791                                    sizeof(bg_item));
10792                 if (btrfs_block_group_flags(&bg_item) != type) {
10793                         error(
10794 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
10795                                 chunk_key.offset, chunk_end, type,
10796                                 btrfs_block_group_flags(&bg_item));
10797                         err |= REFERENCER_MISSING;
10798                 }
10799         }
10800
10801         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
10802         for (i = 0; i < num_stripes; i++) {
10803                 btrfs_release_path(&path);
10804                 btrfs_init_path(&path);
10805                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
10806                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
10807                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
10808
10809                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
10810                                         0, 0);
10811                 if (ret)
10812                         goto not_match_dev;
10813
10814                 leaf = path.nodes[0];
10815                 ptr = btrfs_item_ptr(leaf, path.slots[0],
10816                                      struct btrfs_dev_extent);
10817                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
10818                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
10819                 if (objectid != chunk_key.objectid ||
10820                     offset != chunk_key.offset ||
10821                     btrfs_dev_extent_length(leaf, ptr) != length)
10822                         goto not_match_dev;
10823                 continue;
10824 not_match_dev:
10825                 err |= BACKREF_MISSING;
10826                 error(
10827                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
10828                         chunk_key.objectid, chunk_end, i);
10829                 continue;
10830         }
10831         btrfs_release_path(&path);
10832 out:
10833         return err;
10834 }
10835
10836 /*
10837  * Main entry function to check known items and update related accounting info
10838  */
10839 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
10840 {
10841         struct btrfs_fs_info *fs_info = root->fs_info;
10842         struct btrfs_key key;
10843         int slot = 0;
10844         int type;
10845         struct btrfs_extent_data_ref *dref;
10846         int ret;
10847         int err = 0;
10848
10849 next:
10850         btrfs_item_key_to_cpu(eb, &key, slot);
10851         type = key.type;
10852
10853         switch (type) {
10854         case BTRFS_EXTENT_DATA_KEY:
10855                 ret = check_extent_data_item(root, eb, slot);
10856                 err |= ret;
10857                 break;
10858         case BTRFS_BLOCK_GROUP_ITEM_KEY:
10859                 ret = check_block_group_item(fs_info, eb, slot);
10860                 err |= ret;
10861                 break;
10862         case BTRFS_DEV_ITEM_KEY:
10863                 ret = check_dev_item(fs_info, eb, slot);
10864                 err |= ret;
10865                 break;
10866         case BTRFS_CHUNK_ITEM_KEY:
10867                 ret = check_chunk_item(fs_info, eb, slot);
10868                 err |= ret;
10869                 break;
10870         case BTRFS_DEV_EXTENT_KEY:
10871                 ret = check_dev_extent_item(fs_info, eb, slot);
10872                 err |= ret;
10873                 break;
10874         case BTRFS_EXTENT_ITEM_KEY:
10875         case BTRFS_METADATA_ITEM_KEY:
10876                 ret = check_extent_item(fs_info, eb, slot);
10877                 err |= ret;
10878                 break;
10879         case BTRFS_EXTENT_CSUM_KEY:
10880                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
10881                 break;
10882         case BTRFS_TREE_BLOCK_REF_KEY:
10883                 ret = check_tree_block_backref(fs_info, key.offset,
10884                                                key.objectid, -1);
10885                 err |= ret;
10886                 break;
10887         case BTRFS_EXTENT_DATA_REF_KEY:
10888                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
10889                 ret = check_extent_data_backref(fs_info,
10890                                 btrfs_extent_data_ref_root(eb, dref),
10891                                 btrfs_extent_data_ref_objectid(eb, dref),
10892                                 btrfs_extent_data_ref_offset(eb, dref),
10893                                 key.objectid, 0,
10894                                 btrfs_extent_data_ref_count(eb, dref));
10895                 err |= ret;
10896                 break;
10897         case BTRFS_SHARED_BLOCK_REF_KEY:
10898                 ret = check_shared_block_backref(fs_info, key.offset,
10899                                                  key.objectid, -1);
10900                 err |= ret;
10901                 break;
10902         case BTRFS_SHARED_DATA_REF_KEY:
10903                 ret = check_shared_data_backref(fs_info, key.offset,
10904                                                 key.objectid);
10905                 err |= ret;
10906                 break;
10907         default:
10908                 break;
10909         }
10910
10911         if (++slot < btrfs_header_nritems(eb))
10912                 goto next;
10913
10914         return err;
10915 }
10916
10917 /*
10918  * Helper function for later fs/subvol tree check.  To determine if a tree
10919  * block should be checked.
10920  * This function will ensure only the direct referencer with lowest rootid to
10921  * check a fs/subvolume tree block.
10922  *
10923  * Backref check at extent tree would detect errors like missing subvolume
10924  * tree, so we can do aggressive check to reduce duplicated checks.
10925  */
10926 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
10927 {
10928         struct btrfs_root *extent_root = root->fs_info->extent_root;
10929         struct btrfs_key key;
10930         struct btrfs_path path;
10931         struct extent_buffer *leaf;
10932         int slot;
10933         struct btrfs_extent_item *ei;
10934         unsigned long ptr;
10935         unsigned long end;
10936         int type;
10937         u32 item_size;
10938         u64 offset;
10939         struct btrfs_extent_inline_ref *iref;
10940         int ret;
10941
10942         btrfs_init_path(&path);
10943         key.objectid = btrfs_header_bytenr(eb);
10944         key.type = BTRFS_METADATA_ITEM_KEY;
10945         key.offset = (u64)-1;
10946
10947         /*
10948          * Any failure in backref resolving means we can't determine
10949          * whom the tree block belongs to.
10950          * So in that case, we need to check that tree block
10951          */
10952         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10953         if (ret < 0)
10954                 goto need_check;
10955
10956         ret = btrfs_previous_extent_item(extent_root, &path,
10957                                          btrfs_header_bytenr(eb));
10958         if (ret)
10959                 goto need_check;
10960
10961         leaf = path.nodes[0];
10962         slot = path.slots[0];
10963         btrfs_item_key_to_cpu(leaf, &key, slot);
10964         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10965
10966         if (key.type == BTRFS_METADATA_ITEM_KEY) {
10967                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10968         } else {
10969                 struct btrfs_tree_block_info *info;
10970
10971                 info = (struct btrfs_tree_block_info *)(ei + 1);
10972                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10973         }
10974
10975         item_size = btrfs_item_size_nr(leaf, slot);
10976         ptr = (unsigned long)iref;
10977         end = (unsigned long)ei + item_size;
10978         while (ptr < end) {
10979                 iref = (struct btrfs_extent_inline_ref *)ptr;
10980                 type = btrfs_extent_inline_ref_type(leaf, iref);
10981                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10982
10983                 /*
10984                  * We only check the tree block if current root is
10985                  * the lowest referencer of it.
10986                  */
10987                 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10988                     offset < root->objectid) {
10989                         btrfs_release_path(&path);
10990                         return 0;
10991                 }
10992
10993                 ptr += btrfs_extent_inline_ref_size(type);
10994         }
10995         /*
10996          * Normally we should also check keyed tree block ref, but that may be
10997          * very time consuming.  Inlined ref should already make us skip a lot
10998          * of refs now.  So skip search keyed tree block ref.
10999          */
11000
11001 need_check:
11002         btrfs_release_path(&path);
11003         return 1;
11004 }
11005
11006 /*
11007  * Traversal function for tree block. We will do:
11008  * 1) Skip shared fs/subvolume tree blocks
11009  * 2) Update related bytes accounting
11010  * 3) Pre-order traversal
11011  */
11012 static int traverse_tree_block(struct btrfs_root *root,
11013                                 struct extent_buffer *node)
11014 {
11015         struct extent_buffer *eb;
11016         struct btrfs_key key;
11017         struct btrfs_key drop_key;
11018         int level;
11019         u64 nr;
11020         int i;
11021         int err = 0;
11022         int ret;
11023
11024         /*
11025          * Skip shared fs/subvolume tree block, in that case they will
11026          * be checked by referencer with lowest rootid
11027          */
11028         if (is_fstree(root->objectid) && !should_check(root, node))
11029                 return 0;
11030
11031         /* Update bytes accounting */
11032         total_btree_bytes += node->len;
11033         if (fs_root_objectid(btrfs_header_owner(node)))
11034                 total_fs_tree_bytes += node->len;
11035         if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11036                 total_extent_tree_bytes += node->len;
11037         if (!found_old_backref &&
11038             btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11039             btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11040             !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11041                 found_old_backref = 1;
11042
11043         /* pre-order tranversal, check itself first */
11044         level = btrfs_header_level(node);
11045         ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11046                                    btrfs_header_level(node),
11047                                    btrfs_header_owner(node));
11048         err |= ret;
11049         if (err)
11050                 error(
11051         "check %s failed root %llu bytenr %llu level %d, force continue check",
11052                         level ? "node":"leaf", root->objectid,
11053                         btrfs_header_bytenr(node), btrfs_header_level(node));
11054
11055         if (!level) {
11056                 btree_space_waste += btrfs_leaf_free_space(root, node);
11057                 ret = check_leaf_items(root, node);
11058                 err |= ret;
11059                 return err;
11060         }
11061
11062         nr = btrfs_header_nritems(node);
11063         btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11064         btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11065                 sizeof(struct btrfs_key_ptr);
11066
11067         /* Then check all its children */
11068         for (i = 0; i < nr; i++) {
11069                 u64 blocknr = btrfs_node_blockptr(node, i);
11070
11071                 btrfs_node_key_to_cpu(node, &key, i);
11072                 if (level == root->root_item.drop_level &&
11073                     is_dropped_key(&key, &drop_key))
11074                         continue;
11075
11076                 /*
11077                  * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11078                  * to call the function itself.
11079                  */
11080                 eb = read_tree_block(root, blocknr, root->nodesize, 0);
11081                 if (extent_buffer_uptodate(eb)) {
11082                         ret = traverse_tree_block(root, eb);
11083                         err |= ret;
11084                 }
11085                 free_extent_buffer(eb);
11086         }
11087
11088         return err;
11089 }
11090
11091 /*
11092  * Low memory usage version check_chunks_and_extents.
11093  */
11094 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11095 {
11096         struct btrfs_path path;
11097         struct btrfs_key key;
11098         struct btrfs_root *root1;
11099         struct btrfs_root *cur_root;
11100         int err = 0;
11101         int ret;
11102
11103         root1 = root->fs_info->chunk_root;
11104         ret = traverse_tree_block(root1, root1->node);
11105         err |= ret;
11106
11107         root1 = root->fs_info->tree_root;
11108         ret = traverse_tree_block(root1, root1->node);
11109         err |= ret;
11110
11111         btrfs_init_path(&path);
11112         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11113         key.offset = 0;
11114         key.type = BTRFS_ROOT_ITEM_KEY;
11115
11116         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11117         if (ret) {
11118                 error("cannot find extent treet in tree_root");
11119                 goto out;
11120         }
11121
11122         while (1) {
11123                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11124                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11125                         goto next;
11126                 key.offset = (u64)-1;
11127
11128                 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11129                 if (IS_ERR(cur_root) || !cur_root) {
11130                         error("failed to read tree: %lld", key.objectid);
11131                         goto next;
11132                 }
11133
11134                 ret = traverse_tree_block(cur_root, cur_root->node);
11135                 err |= ret;
11136
11137 next:
11138                 ret = btrfs_next_item(root1, &path);
11139                 if (ret)
11140                         goto out;
11141         }
11142
11143 out:
11144         btrfs_release_path(&path);
11145         return err;
11146 }
11147
11148 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11149                            struct btrfs_root *root, int overwrite)
11150 {
11151         struct extent_buffer *c;
11152         struct extent_buffer *old = root->node;
11153         int level;
11154         int ret;
11155         struct btrfs_disk_key disk_key = {0,0,0};
11156
11157         level = 0;
11158
11159         if (overwrite) {
11160                 c = old;
11161                 extent_buffer_get(c);
11162                 goto init;
11163         }
11164         c = btrfs_alloc_free_block(trans, root,
11165                                    root->nodesize,
11166                                    root->root_key.objectid,
11167                                    &disk_key, level, 0, 0);
11168         if (IS_ERR(c)) {
11169                 c = old;
11170                 extent_buffer_get(c);
11171                 overwrite = 1;
11172         }
11173 init:
11174         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11175         btrfs_set_header_level(c, level);
11176         btrfs_set_header_bytenr(c, c->start);
11177         btrfs_set_header_generation(c, trans->transid);
11178         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11179         btrfs_set_header_owner(c, root->root_key.objectid);
11180
11181         write_extent_buffer(c, root->fs_info->fsid,
11182                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
11183
11184         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11185                             btrfs_header_chunk_tree_uuid(c),
11186                             BTRFS_UUID_SIZE);
11187
11188         btrfs_mark_buffer_dirty(c);
11189         /*
11190          * this case can happen in the following case:
11191          *
11192          * 1.overwrite previous root.
11193          *
11194          * 2.reinit reloc data root, this is because we skip pin
11195          * down reloc data tree before which means we can allocate
11196          * same block bytenr here.
11197          */
11198         if (old->start == c->start) {
11199                 btrfs_set_root_generation(&root->root_item,
11200                                           trans->transid);
11201                 root->root_item.level = btrfs_header_level(root->node);
11202                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11203                                         &root->root_key, &root->root_item);
11204                 if (ret) {
11205                         free_extent_buffer(c);
11206                         return ret;
11207                 }
11208         }
11209         free_extent_buffer(old);
11210         root->node = c;
11211         add_root_to_dirty_list(root);
11212         return 0;
11213 }
11214
11215 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11216                                 struct extent_buffer *eb, int tree_root)
11217 {
11218         struct extent_buffer *tmp;
11219         struct btrfs_root_item *ri;
11220         struct btrfs_key key;
11221         u64 bytenr;
11222         u32 nodesize;
11223         int level = btrfs_header_level(eb);
11224         int nritems;
11225         int ret;
11226         int i;
11227
11228         /*
11229          * If we have pinned this block before, don't pin it again.
11230          * This can not only avoid forever loop with broken filesystem
11231          * but also give us some speedups.
11232          */
11233         if (test_range_bit(&fs_info->pinned_extents, eb->start,
11234                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11235                 return 0;
11236
11237         btrfs_pin_extent(fs_info, eb->start, eb->len);
11238
11239         nodesize = btrfs_super_nodesize(fs_info->super_copy);
11240         nritems = btrfs_header_nritems(eb);
11241         for (i = 0; i < nritems; i++) {
11242                 if (level == 0) {
11243                         btrfs_item_key_to_cpu(eb, &key, i);
11244                         if (key.type != BTRFS_ROOT_ITEM_KEY)
11245                                 continue;
11246                         /* Skip the extent root and reloc roots */
11247                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11248                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11249                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11250                                 continue;
11251                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11252                         bytenr = btrfs_disk_root_bytenr(eb, ri);
11253
11254                         /*
11255                          * If at any point we start needing the real root we
11256                          * will have to build a stump root for the root we are
11257                          * in, but for now this doesn't actually use the root so
11258                          * just pass in extent_root.
11259                          */
11260                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11261                                               nodesize, 0);
11262                         if (!extent_buffer_uptodate(tmp)) {
11263                                 fprintf(stderr, "Error reading root block\n");
11264                                 return -EIO;
11265                         }
11266                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
11267                         free_extent_buffer(tmp);
11268                         if (ret)
11269                                 return ret;
11270                 } else {
11271                         bytenr = btrfs_node_blockptr(eb, i);
11272
11273                         /* If we aren't the tree root don't read the block */
11274                         if (level == 1 && !tree_root) {
11275                                 btrfs_pin_extent(fs_info, bytenr, nodesize);
11276                                 continue;
11277                         }
11278
11279                         tmp = read_tree_block(fs_info->extent_root, bytenr,
11280                                               nodesize, 0);
11281                         if (!extent_buffer_uptodate(tmp)) {
11282                                 fprintf(stderr, "Error reading tree block\n");
11283                                 return -EIO;
11284                         }
11285                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11286                         free_extent_buffer(tmp);
11287                         if (ret)
11288                                 return ret;
11289                 }
11290         }
11291
11292         return 0;
11293 }
11294
11295 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11296 {
11297         int ret;
11298
11299         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11300         if (ret)
11301                 return ret;
11302
11303         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11304 }
11305
11306 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11307 {
11308         struct btrfs_block_group_cache *cache;
11309         struct btrfs_path path;
11310         struct extent_buffer *leaf;
11311         struct btrfs_chunk *chunk;
11312         struct btrfs_key key;
11313         int ret;
11314         u64 start;
11315
11316         btrfs_init_path(&path);
11317         key.objectid = 0;
11318         key.type = BTRFS_CHUNK_ITEM_KEY;
11319         key.offset = 0;
11320         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11321         if (ret < 0) {
11322                 btrfs_release_path(&path);
11323                 return ret;
11324         }
11325
11326         /*
11327          * We do this in case the block groups were screwed up and had alloc
11328          * bits that aren't actually set on the chunks.  This happens with
11329          * restored images every time and could happen in real life I guess.
11330          */
11331         fs_info->avail_data_alloc_bits = 0;
11332         fs_info->avail_metadata_alloc_bits = 0;
11333         fs_info->avail_system_alloc_bits = 0;
11334
11335         /* First we need to create the in-memory block groups */
11336         while (1) {
11337                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11338                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11339                         if (ret < 0) {
11340                                 btrfs_release_path(&path);
11341                                 return ret;
11342                         }
11343                         if (ret) {
11344                                 ret = 0;
11345                                 break;
11346                         }
11347                 }
11348                 leaf = path.nodes[0];
11349                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11350                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11351                         path.slots[0]++;
11352                         continue;
11353                 }
11354
11355                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11356                 btrfs_add_block_group(fs_info, 0,
11357                                       btrfs_chunk_type(leaf, chunk),
11358                                       key.objectid, key.offset,
11359                                       btrfs_chunk_length(leaf, chunk));
11360                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11361                                  key.offset + btrfs_chunk_length(leaf, chunk),
11362                                  GFP_NOFS);
11363                 path.slots[0]++;
11364         }
11365         start = 0;
11366         while (1) {
11367                 cache = btrfs_lookup_first_block_group(fs_info, start);
11368                 if (!cache)
11369                         break;
11370                 cache->cached = 1;
11371                 start = cache->key.objectid + cache->key.offset;
11372         }
11373
11374         btrfs_release_path(&path);
11375         return 0;
11376 }
11377
11378 static int reset_balance(struct btrfs_trans_handle *trans,
11379                          struct btrfs_fs_info *fs_info)
11380 {
11381         struct btrfs_root *root = fs_info->tree_root;
11382         struct btrfs_path path;
11383         struct extent_buffer *leaf;
11384         struct btrfs_key key;
11385         int del_slot, del_nr = 0;
11386         int ret;
11387         int found = 0;
11388
11389         btrfs_init_path(&path);
11390         key.objectid = BTRFS_BALANCE_OBJECTID;
11391         key.type = BTRFS_BALANCE_ITEM_KEY;
11392         key.offset = 0;
11393         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11394         if (ret) {
11395                 if (ret > 0)
11396                         ret = 0;
11397                 if (!ret)
11398                         goto reinit_data_reloc;
11399                 else
11400                         goto out;
11401         }
11402
11403         ret = btrfs_del_item(trans, root, &path);
11404         if (ret)
11405                 goto out;
11406         btrfs_release_path(&path);
11407
11408         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11409         key.type = BTRFS_ROOT_ITEM_KEY;
11410         key.offset = 0;
11411         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11412         if (ret < 0)
11413                 goto out;
11414         while (1) {
11415                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11416                         if (!found)
11417                                 break;
11418
11419                         if (del_nr) {
11420                                 ret = btrfs_del_items(trans, root, &path,
11421                                                       del_slot, del_nr);
11422                                 del_nr = 0;
11423                                 if (ret)
11424                                         goto out;
11425                         }
11426                         key.offset++;
11427                         btrfs_release_path(&path);
11428
11429                         found = 0;
11430                         ret = btrfs_search_slot(trans, root, &key, &path,
11431                                                 -1, 1);
11432                         if (ret < 0)
11433                                 goto out;
11434                         continue;
11435                 }
11436                 found = 1;
11437                 leaf = path.nodes[0];
11438                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11439                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11440                         break;
11441                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11442                         path.slots[0]++;
11443                         continue;
11444                 }
11445                 if (!del_nr) {
11446                         del_slot = path.slots[0];
11447                         del_nr = 1;
11448                 } else {
11449                         del_nr++;
11450                 }
11451                 path.slots[0]++;
11452         }
11453
11454         if (del_nr) {
11455                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11456                 if (ret)
11457                         goto out;
11458         }
11459         btrfs_release_path(&path);
11460
11461 reinit_data_reloc:
11462         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11463         key.type = BTRFS_ROOT_ITEM_KEY;
11464         key.offset = (u64)-1;
11465         root = btrfs_read_fs_root(fs_info, &key);
11466         if (IS_ERR(root)) {
11467                 fprintf(stderr, "Error reading data reloc tree\n");
11468                 ret = PTR_ERR(root);
11469                 goto out;
11470         }
11471         record_root_in_trans(trans, root);
11472         ret = btrfs_fsck_reinit_root(trans, root, 0);
11473         if (ret)
11474                 goto out;
11475         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11476 out:
11477         btrfs_release_path(&path);
11478         return ret;
11479 }
11480
11481 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11482                               struct btrfs_fs_info *fs_info)
11483 {
11484         u64 start = 0;
11485         int ret;
11486
11487         /*
11488          * The only reason we don't do this is because right now we're just
11489          * walking the trees we find and pinning down their bytes, we don't look
11490          * at any of the leaves.  In order to do mixed groups we'd have to check
11491          * the leaves of any fs roots and pin down the bytes for any file
11492          * extents we find.  Not hard but why do it if we don't have to?
11493          */
11494         if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
11495                 fprintf(stderr, "We don't support re-initing the extent tree "
11496                         "for mixed block groups yet, please notify a btrfs "
11497                         "developer you want to do this so they can add this "
11498                         "functionality.\n");
11499                 return -EINVAL;
11500         }
11501
11502         /*
11503          * first we need to walk all of the trees except the extent tree and pin
11504          * down the bytes that are in use so we don't overwrite any existing
11505          * metadata.
11506          */
11507         ret = pin_metadata_blocks(fs_info);
11508         if (ret) {
11509                 fprintf(stderr, "error pinning down used bytes\n");
11510                 return ret;
11511         }
11512
11513         /*
11514          * Need to drop all the block groups since we're going to recreate all
11515          * of them again.
11516          */
11517         btrfs_free_block_groups(fs_info);
11518         ret = reset_block_groups(fs_info);
11519         if (ret) {
11520                 fprintf(stderr, "error resetting the block groups\n");
11521                 return ret;
11522         }
11523
11524         /* Ok we can allocate now, reinit the extent root */
11525         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11526         if (ret) {
11527                 fprintf(stderr, "extent root initialization failed\n");
11528                 /*
11529                  * When the transaction code is updated we should end the
11530                  * transaction, but for now progs only knows about commit so
11531                  * just return an error.
11532                  */
11533                 return ret;
11534         }
11535
11536         /*
11537          * Now we have all the in-memory block groups setup so we can make
11538          * allocations properly, and the metadata we care about is safe since we
11539          * pinned all of it above.
11540          */
11541         while (1) {
11542                 struct btrfs_block_group_cache *cache;
11543
11544                 cache = btrfs_lookup_first_block_group(fs_info, start);
11545                 if (!cache)
11546                         break;
11547                 start = cache->key.objectid + cache->key.offset;
11548                 ret = btrfs_insert_item(trans, fs_info->extent_root,
11549                                         &cache->key, &cache->item,
11550                                         sizeof(cache->item));
11551                 if (ret) {
11552                         fprintf(stderr, "Error adding block group\n");
11553                         return ret;
11554                 }
11555                 btrfs_extent_post_op(trans, fs_info->extent_root);
11556         }
11557
11558         ret = reset_balance(trans, fs_info);
11559         if (ret)
11560                 fprintf(stderr, "error resetting the pending balance\n");
11561
11562         return ret;
11563 }
11564
11565 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11566 {
11567         struct btrfs_path path;
11568         struct btrfs_trans_handle *trans;
11569         struct btrfs_key key;
11570         int ret;
11571
11572         printf("Recowing metadata block %llu\n", eb->start);
11573         key.objectid = btrfs_header_owner(eb);
11574         key.type = BTRFS_ROOT_ITEM_KEY;
11575         key.offset = (u64)-1;
11576
11577         root = btrfs_read_fs_root(root->fs_info, &key);
11578         if (IS_ERR(root)) {
11579                 fprintf(stderr, "Couldn't find owner root %llu\n",
11580                         key.objectid);
11581                 return PTR_ERR(root);
11582         }
11583
11584         trans = btrfs_start_transaction(root, 1);
11585         if (IS_ERR(trans))
11586                 return PTR_ERR(trans);
11587
11588         btrfs_init_path(&path);
11589         path.lowest_level = btrfs_header_level(eb);
11590         if (path.lowest_level)
11591                 btrfs_node_key_to_cpu(eb, &key, 0);
11592         else
11593                 btrfs_item_key_to_cpu(eb, &key, 0);
11594
11595         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11596         btrfs_commit_transaction(trans, root);
11597         btrfs_release_path(&path);
11598         return ret;
11599 }
11600
11601 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11602 {
11603         struct btrfs_path path;
11604         struct btrfs_trans_handle *trans;
11605         struct btrfs_key key;
11606         int ret;
11607
11608         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11609                bad->key.type, bad->key.offset);
11610         key.objectid = bad->root_id;
11611         key.type = BTRFS_ROOT_ITEM_KEY;
11612         key.offset = (u64)-1;
11613
11614         root = btrfs_read_fs_root(root->fs_info, &key);
11615         if (IS_ERR(root)) {
11616                 fprintf(stderr, "Couldn't find owner root %llu\n",
11617                         key.objectid);
11618                 return PTR_ERR(root);
11619         }
11620
11621         trans = btrfs_start_transaction(root, 1);
11622         if (IS_ERR(trans))
11623                 return PTR_ERR(trans);
11624
11625         btrfs_init_path(&path);
11626         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11627         if (ret) {
11628                 if (ret > 0)
11629                         ret = 0;
11630                 goto out;
11631         }
11632         ret = btrfs_del_item(trans, root, &path);
11633 out:
11634         btrfs_commit_transaction(trans, root);
11635         btrfs_release_path(&path);
11636         return ret;
11637 }
11638
11639 static int zero_log_tree(struct btrfs_root *root)
11640 {
11641         struct btrfs_trans_handle *trans;
11642         int ret;
11643
11644         trans = btrfs_start_transaction(root, 1);
11645         if (IS_ERR(trans)) {
11646                 ret = PTR_ERR(trans);
11647                 return ret;
11648         }
11649         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11650         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11651         ret = btrfs_commit_transaction(trans, root);
11652         return ret;
11653 }
11654
11655 static int populate_csum(struct btrfs_trans_handle *trans,
11656                          struct btrfs_root *csum_root, char *buf, u64 start,
11657                          u64 len)
11658 {
11659         u64 offset = 0;
11660         u64 sectorsize;
11661         int ret = 0;
11662
11663         while (offset < len) {
11664                 sectorsize = csum_root->sectorsize;
11665                 ret = read_extent_data(csum_root, buf, start + offset,
11666                                        &sectorsize, 0);
11667                 if (ret)
11668                         break;
11669                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11670                                             start + offset, buf, sectorsize);
11671                 if (ret)
11672                         break;
11673                 offset += sectorsize;
11674         }
11675         return ret;
11676 }
11677
11678 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11679                                       struct btrfs_root *csum_root,
11680                                       struct btrfs_root *cur_root)
11681 {
11682         struct btrfs_path path;
11683         struct btrfs_key key;
11684         struct extent_buffer *node;
11685         struct btrfs_file_extent_item *fi;
11686         char *buf = NULL;
11687         u64 start = 0;
11688         u64 len = 0;
11689         int slot = 0;
11690         int ret = 0;
11691
11692         buf = malloc(cur_root->fs_info->csum_root->sectorsize);
11693         if (!buf)
11694                 return -ENOMEM;
11695
11696         btrfs_init_path(&path);
11697         key.objectid = 0;
11698         key.offset = 0;
11699         key.type = 0;
11700         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
11701         if (ret < 0)
11702                 goto out;
11703         /* Iterate all regular file extents and fill its csum */
11704         while (1) {
11705                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11706
11707                 if (key.type != BTRFS_EXTENT_DATA_KEY)
11708                         goto next;
11709                 node = path.nodes[0];
11710                 slot = path.slots[0];
11711                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
11712                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
11713                         goto next;
11714                 start = btrfs_file_extent_disk_bytenr(node, fi);
11715                 len = btrfs_file_extent_disk_num_bytes(node, fi);
11716
11717                 ret = populate_csum(trans, csum_root, buf, start, len);
11718                 if (ret == -EEXIST)
11719                         ret = 0;
11720                 if (ret < 0)
11721                         goto out;
11722 next:
11723                 /*
11724                  * TODO: if next leaf is corrupted, jump to nearest next valid
11725                  * leaf.
11726                  */
11727                 ret = btrfs_next_item(cur_root, &path);
11728                 if (ret < 0)
11729                         goto out;
11730                 if (ret > 0) {
11731                         ret = 0;
11732                         goto out;
11733                 }
11734         }
11735
11736 out:
11737         btrfs_release_path(&path);
11738         free(buf);
11739         return ret;
11740 }
11741
11742 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
11743                                   struct btrfs_root *csum_root)
11744 {
11745         struct btrfs_fs_info *fs_info = csum_root->fs_info;
11746         struct btrfs_path path;
11747         struct btrfs_root *tree_root = fs_info->tree_root;
11748         struct btrfs_root *cur_root;
11749         struct extent_buffer *node;
11750         struct btrfs_key key;
11751         int slot = 0;
11752         int ret = 0;
11753
11754         btrfs_init_path(&path);
11755         key.objectid = BTRFS_FS_TREE_OBJECTID;
11756         key.offset = 0;
11757         key.type = BTRFS_ROOT_ITEM_KEY;
11758         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
11759         if (ret < 0)
11760                 goto out;
11761         if (ret > 0) {
11762                 ret = -ENOENT;
11763                 goto out;
11764         }
11765
11766         while (1) {
11767                 node = path.nodes[0];
11768                 slot = path.slots[0];
11769                 btrfs_item_key_to_cpu(node, &key, slot);
11770                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
11771                         goto out;
11772                 if (key.type != BTRFS_ROOT_ITEM_KEY)
11773                         goto next;
11774                 if (!is_fstree(key.objectid))
11775                         goto next;
11776                 key.offset = (u64)-1;
11777
11778                 cur_root = btrfs_read_fs_root(fs_info, &key);
11779                 if (IS_ERR(cur_root) || !cur_root) {
11780                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
11781                                 key.objectid);
11782                         goto out;
11783                 }
11784                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
11785                                 cur_root);
11786                 if (ret < 0)
11787                         goto out;
11788 next:
11789                 ret = btrfs_next_item(tree_root, &path);
11790                 if (ret > 0) {
11791                         ret = 0;
11792                         goto out;
11793                 }
11794                 if (ret < 0)
11795                         goto out;
11796         }
11797
11798 out:
11799         btrfs_release_path(&path);
11800         return ret;
11801 }
11802
11803 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
11804                                       struct btrfs_root *csum_root)
11805 {
11806         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
11807         struct btrfs_path path;
11808         struct btrfs_extent_item *ei;
11809         struct extent_buffer *leaf;
11810         char *buf;
11811         struct btrfs_key key;
11812         int ret;
11813
11814         btrfs_init_path(&path);
11815         key.objectid = 0;
11816         key.type = BTRFS_EXTENT_ITEM_KEY;
11817         key.offset = 0;
11818         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11819         if (ret < 0) {
11820                 btrfs_release_path(&path);
11821                 return ret;
11822         }
11823
11824         buf = malloc(csum_root->sectorsize);
11825         if (!buf) {
11826                 btrfs_release_path(&path);
11827                 return -ENOMEM;
11828         }
11829
11830         while (1) {
11831                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11832                         ret = btrfs_next_leaf(extent_root, &path);
11833                         if (ret < 0)
11834                                 break;
11835                         if (ret) {
11836                                 ret = 0;
11837                                 break;
11838                         }
11839                 }
11840                 leaf = path.nodes[0];
11841
11842                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11843                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
11844                         path.slots[0]++;
11845                         continue;
11846                 }
11847
11848                 ei = btrfs_item_ptr(leaf, path.slots[0],
11849                                     struct btrfs_extent_item);
11850                 if (!(btrfs_extent_flags(leaf, ei) &
11851                       BTRFS_EXTENT_FLAG_DATA)) {
11852                         path.slots[0]++;
11853                         continue;
11854                 }
11855
11856                 ret = populate_csum(trans, csum_root, buf, key.objectid,
11857                                     key.offset);
11858                 if (ret)
11859                         break;
11860                 path.slots[0]++;
11861         }
11862
11863         btrfs_release_path(&path);
11864         free(buf);
11865         return ret;
11866 }
11867
11868 /*
11869  * Recalculate the csum and put it into the csum tree.
11870  *
11871  * Extent tree init will wipe out all the extent info, so in that case, we
11872  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
11873  * will use fs/subvol trees to init the csum tree.
11874  */
11875 static int fill_csum_tree(struct btrfs_trans_handle *trans,
11876                           struct btrfs_root *csum_root,
11877                           int search_fs_tree)
11878 {
11879         if (search_fs_tree)
11880                 return fill_csum_tree_from_fs(trans, csum_root);
11881         else
11882                 return fill_csum_tree_from_extent(trans, csum_root);
11883 }
11884
11885 static void free_roots_info_cache(void)
11886 {
11887         if (!roots_info_cache)
11888                 return;
11889
11890         while (!cache_tree_empty(roots_info_cache)) {
11891                 struct cache_extent *entry;
11892                 struct root_item_info *rii;
11893
11894                 entry = first_cache_extent(roots_info_cache);
11895                 if (!entry)
11896                         break;
11897                 remove_cache_extent(roots_info_cache, entry);
11898                 rii = container_of(entry, struct root_item_info, cache_extent);
11899                 free(rii);
11900         }
11901
11902         free(roots_info_cache);
11903         roots_info_cache = NULL;
11904 }
11905
11906 static int build_roots_info_cache(struct btrfs_fs_info *info)
11907 {
11908         int ret = 0;
11909         struct btrfs_key key;
11910         struct extent_buffer *leaf;
11911         struct btrfs_path path;
11912
11913         if (!roots_info_cache) {
11914                 roots_info_cache = malloc(sizeof(*roots_info_cache));
11915                 if (!roots_info_cache)
11916                         return -ENOMEM;
11917                 cache_tree_init(roots_info_cache);
11918         }
11919
11920         btrfs_init_path(&path);
11921         key.objectid = 0;
11922         key.type = BTRFS_EXTENT_ITEM_KEY;
11923         key.offset = 0;
11924         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
11925         if (ret < 0)
11926                 goto out;
11927         leaf = path.nodes[0];
11928
11929         while (1) {
11930                 struct btrfs_key found_key;
11931                 struct btrfs_extent_item *ei;
11932                 struct btrfs_extent_inline_ref *iref;
11933                 int slot = path.slots[0];
11934                 int type;
11935                 u64 flags;
11936                 u64 root_id;
11937                 u8 level;
11938                 struct cache_extent *entry;
11939                 struct root_item_info *rii;
11940
11941                 if (slot >= btrfs_header_nritems(leaf)) {
11942                         ret = btrfs_next_leaf(info->extent_root, &path);
11943                         if (ret < 0) {
11944                                 break;
11945                         } else if (ret) {
11946                                 ret = 0;
11947                                 break;
11948                         }
11949                         leaf = path.nodes[0];
11950                         slot = path.slots[0];
11951                 }
11952
11953                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11954
11955                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
11956                     found_key.type != BTRFS_METADATA_ITEM_KEY)
11957                         goto next;
11958
11959                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11960                 flags = btrfs_extent_flags(leaf, ei);
11961
11962                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
11963                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
11964                         goto next;
11965
11966                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
11967                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11968                         level = found_key.offset;
11969                 } else {
11970                         struct btrfs_tree_block_info *binfo;
11971
11972                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
11973                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
11974                         level = btrfs_tree_block_level(leaf, binfo);
11975                 }
11976
11977                 /*
11978                  * For a root extent, it must be of the following type and the
11979                  * first (and only one) iref in the item.
11980                  */
11981                 type = btrfs_extent_inline_ref_type(leaf, iref);
11982                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
11983                         goto next;
11984
11985                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
11986                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11987                 if (!entry) {
11988                         rii = malloc(sizeof(struct root_item_info));
11989                         if (!rii) {
11990                                 ret = -ENOMEM;
11991                                 goto out;
11992                         }
11993                         rii->cache_extent.start = root_id;
11994                         rii->cache_extent.size = 1;
11995                         rii->level = (u8)-1;
11996                         entry = &rii->cache_extent;
11997                         ret = insert_cache_extent(roots_info_cache, entry);
11998                         ASSERT(ret == 0);
11999                 } else {
12000                         rii = container_of(entry, struct root_item_info,
12001                                            cache_extent);
12002                 }
12003
12004                 ASSERT(rii->cache_extent.start == root_id);
12005                 ASSERT(rii->cache_extent.size == 1);
12006
12007                 if (level > rii->level || rii->level == (u8)-1) {
12008                         rii->level = level;
12009                         rii->bytenr = found_key.objectid;
12010                         rii->gen = btrfs_extent_generation(leaf, ei);
12011                         rii->node_count = 1;
12012                 } else if (level == rii->level) {
12013                         rii->node_count++;
12014                 }
12015 next:
12016                 path.slots[0]++;
12017         }
12018
12019 out:
12020         btrfs_release_path(&path);
12021
12022         return ret;
12023 }
12024
12025 static int maybe_repair_root_item(struct btrfs_fs_info *info,
12026                                   struct btrfs_path *path,
12027                                   const struct btrfs_key *root_key,
12028                                   const int read_only_mode)
12029 {
12030         const u64 root_id = root_key->objectid;
12031         struct cache_extent *entry;
12032         struct root_item_info *rii;
12033         struct btrfs_root_item ri;
12034         unsigned long offset;
12035
12036         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12037         if (!entry) {
12038                 fprintf(stderr,
12039                         "Error: could not find extent items for root %llu\n",
12040                         root_key->objectid);
12041                 return -ENOENT;
12042         }
12043
12044         rii = container_of(entry, struct root_item_info, cache_extent);
12045         ASSERT(rii->cache_extent.start == root_id);
12046         ASSERT(rii->cache_extent.size == 1);
12047
12048         if (rii->node_count != 1) {
12049                 fprintf(stderr,
12050                         "Error: could not find btree root extent for root %llu\n",
12051                         root_id);
12052                 return -ENOENT;
12053         }
12054
12055         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12056         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12057
12058         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12059             btrfs_root_level(&ri) != rii->level ||
12060             btrfs_root_generation(&ri) != rii->gen) {
12061
12062                 /*
12063                  * If we're in repair mode but our caller told us to not update
12064                  * the root item, i.e. just check if it needs to be updated, don't
12065                  * print this message, since the caller will call us again shortly
12066                  * for the same root item without read only mode (the caller will
12067                  * open a transaction first).
12068                  */
12069                 if (!(read_only_mode && repair))
12070                         fprintf(stderr,
12071                                 "%sroot item for root %llu,"
12072                                 " current bytenr %llu, current gen %llu, current level %u,"
12073                                 " new bytenr %llu, new gen %llu, new level %u\n",
12074                                 (read_only_mode ? "" : "fixing "),
12075                                 root_id,
12076                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12077                                 btrfs_root_level(&ri),
12078                                 rii->bytenr, rii->gen, rii->level);
12079
12080                 if (btrfs_root_generation(&ri) > rii->gen) {
12081                         fprintf(stderr,
12082                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12083                                 root_id, btrfs_root_generation(&ri), rii->gen);
12084                         return -EINVAL;
12085                 }
12086
12087                 if (!read_only_mode) {
12088                         btrfs_set_root_bytenr(&ri, rii->bytenr);
12089                         btrfs_set_root_level(&ri, rii->level);
12090                         btrfs_set_root_generation(&ri, rii->gen);
12091                         write_extent_buffer(path->nodes[0], &ri,
12092                                             offset, sizeof(ri));
12093                 }
12094
12095                 return 1;
12096         }
12097
12098         return 0;
12099 }
12100
12101 /*
12102  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12103  * caused read-only snapshots to be corrupted if they were created at a moment
12104  * when the source subvolume/snapshot had orphan items. The issue was that the
12105  * on-disk root items became incorrect, referring to the pre orphan cleanup root
12106  * node instead of the post orphan cleanup root node.
12107  * So this function, and its callees, just detects and fixes those cases. Even
12108  * though the regression was for read-only snapshots, this function applies to
12109  * any snapshot/subvolume root.
12110  * This must be run before any other repair code - not doing it so, makes other
12111  * repair code delete or modify backrefs in the extent tree for example, which
12112  * will result in an inconsistent fs after repairing the root items.
12113  */
12114 static int repair_root_items(struct btrfs_fs_info *info)
12115 {
12116         struct btrfs_path path;
12117         struct btrfs_key key;
12118         struct extent_buffer *leaf;
12119         struct btrfs_trans_handle *trans = NULL;
12120         int ret = 0;
12121         int bad_roots = 0;
12122         int need_trans = 0;
12123
12124         btrfs_init_path(&path);
12125
12126         ret = build_roots_info_cache(info);
12127         if (ret)
12128                 goto out;
12129
12130         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12131         key.type = BTRFS_ROOT_ITEM_KEY;
12132         key.offset = 0;
12133
12134 again:
12135         /*
12136          * Avoid opening and committing transactions if a leaf doesn't have
12137          * any root items that need to be fixed, so that we avoid rotating
12138          * backup roots unnecessarily.
12139          */
12140         if (need_trans) {
12141                 trans = btrfs_start_transaction(info->tree_root, 1);
12142                 if (IS_ERR(trans)) {
12143                         ret = PTR_ERR(trans);
12144                         goto out;
12145                 }
12146         }
12147
12148         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12149                                 0, trans ? 1 : 0);
12150         if (ret < 0)
12151                 goto out;
12152         leaf = path.nodes[0];
12153
12154         while (1) {
12155                 struct btrfs_key found_key;
12156
12157                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12158                         int no_more_keys = find_next_key(&path, &key);
12159
12160                         btrfs_release_path(&path);
12161                         if (trans) {
12162                                 ret = btrfs_commit_transaction(trans,
12163                                                                info->tree_root);
12164                                 trans = NULL;
12165                                 if (ret < 0)
12166                                         goto out;
12167                         }
12168                         need_trans = 0;
12169                         if (no_more_keys)
12170                                 break;
12171                         goto again;
12172                 }
12173
12174                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12175
12176                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12177                         goto next;
12178                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12179                         goto next;
12180
12181                 ret = maybe_repair_root_item(info, &path, &found_key,
12182                                              trans ? 0 : 1);
12183                 if (ret < 0)
12184                         goto out;
12185                 if (ret) {
12186                         if (!trans && repair) {
12187                                 need_trans = 1;
12188                                 key = found_key;
12189                                 btrfs_release_path(&path);
12190                                 goto again;
12191                         }
12192                         bad_roots++;
12193                 }
12194 next:
12195                 path.slots[0]++;
12196         }
12197         ret = 0;
12198 out:
12199         free_roots_info_cache();
12200         btrfs_release_path(&path);
12201         if (trans)
12202                 btrfs_commit_transaction(trans, info->tree_root);
12203         if (ret < 0)
12204                 return ret;
12205
12206         return bad_roots;
12207 }
12208
12209 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12210 {
12211         struct btrfs_trans_handle *trans;
12212         struct btrfs_block_group_cache *bg_cache;
12213         u64 current = 0;
12214         int ret = 0;
12215
12216         /* Clear all free space cache inodes and its extent data */
12217         while (1) {
12218                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12219                 if (!bg_cache)
12220                         break;
12221                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12222                 if (ret < 0)
12223                         return ret;
12224                 current = bg_cache->key.objectid + bg_cache->key.offset;
12225         }
12226
12227         /* Don't forget to set cache_generation to -1 */
12228         trans = btrfs_start_transaction(fs_info->tree_root, 0);
12229         if (IS_ERR(trans)) {
12230                 error("failed to update super block cache generation");
12231                 return PTR_ERR(trans);
12232         }
12233         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12234         btrfs_commit_transaction(trans, fs_info->tree_root);
12235
12236         return ret;
12237 }
12238
12239 const char * const cmd_check_usage[] = {
12240         "btrfs check [options] <device>",
12241         "Check structural integrity of a filesystem (unmounted).",
12242         "Check structural integrity of an unmounted filesystem. Verify internal",
12243         "trees' consistency and item connectivity. In the repair mode try to",
12244         "fix the problems found. ",
12245         "WARNING: the repair mode is considered dangerous",
12246         "",
12247         "-s|--super <superblock>     use this superblock copy",
12248         "-b|--backup                 use the first valid backup root copy",
12249         "--repair                    try to repair the filesystem",
12250         "--readonly                  run in read-only mode (default)",
12251         "--init-csum-tree            create a new CRC tree",
12252         "--init-extent-tree          create a new extent tree",
12253         "--mode <MODE>               allows choice of memory/IO trade-offs",
12254         "                            where MODE is one of:",
12255         "                            original - read inodes and extents to memory (requires",
12256         "                                       more memory, does less IO)",
12257         "                            lowmem   - try to use less memory but read blocks again",
12258         "                                       when needed",
12259         "--check-data-csum           verify checksums of data blocks",
12260         "-Q|--qgroup-report          print a report on qgroup consistency",
12261         "-E|--subvol-extents <subvolid>",
12262         "                            print subvolume extents and sharing state",
12263         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
12264         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
12265         "-p|--progress               indicate progress",
12266         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
12267         NULL
12268 };
12269
12270 int cmd_check(int argc, char **argv)
12271 {
12272         struct cache_tree root_cache;
12273         struct btrfs_root *root;
12274         struct btrfs_fs_info *info;
12275         u64 bytenr = 0;
12276         u64 subvolid = 0;
12277         u64 tree_root_bytenr = 0;
12278         u64 chunk_root_bytenr = 0;
12279         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12280         int ret;
12281         int err = 0;
12282         u64 num;
12283         int init_csum_tree = 0;
12284         int readonly = 0;
12285         int clear_space_cache = 0;
12286         int qgroup_report = 0;
12287         int qgroups_repaired = 0;
12288         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12289
12290         while(1) {
12291                 int c;
12292                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12293                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12294                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12295                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12296                 static const struct option long_options[] = {
12297                         { "super", required_argument, NULL, 's' },
12298                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12299                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12300                         { "init-csum-tree", no_argument, NULL,
12301                                 GETOPT_VAL_INIT_CSUM },
12302                         { "init-extent-tree", no_argument, NULL,
12303                                 GETOPT_VAL_INIT_EXTENT },
12304                         { "check-data-csum", no_argument, NULL,
12305                                 GETOPT_VAL_CHECK_CSUM },
12306                         { "backup", no_argument, NULL, 'b' },
12307                         { "subvol-extents", required_argument, NULL, 'E' },
12308                         { "qgroup-report", no_argument, NULL, 'Q' },
12309                         { "tree-root", required_argument, NULL, 'r' },
12310                         { "chunk-root", required_argument, NULL,
12311                                 GETOPT_VAL_CHUNK_TREE },
12312                         { "progress", no_argument, NULL, 'p' },
12313                         { "mode", required_argument, NULL,
12314                                 GETOPT_VAL_MODE },
12315                         { "clear-space-cache", required_argument, NULL,
12316                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
12317                         { NULL, 0, NULL, 0}
12318                 };
12319
12320                 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12321                 if (c < 0)
12322                         break;
12323                 switch(c) {
12324                         case 'a': /* ignored */ break;
12325                         case 'b':
12326                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12327                                 break;
12328                         case 's':
12329                                 num = arg_strtou64(optarg);
12330                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12331                                         error(
12332                                         "super mirror should be less than %d",
12333                                                 BTRFS_SUPER_MIRROR_MAX);
12334                                         exit(1);
12335                                 }
12336                                 bytenr = btrfs_sb_offset(((int)num));
12337                                 printf("using SB copy %llu, bytenr %llu\n", num,
12338                                        (unsigned long long)bytenr);
12339                                 break;
12340                         case 'Q':
12341                                 qgroup_report = 1;
12342                                 break;
12343                         case 'E':
12344                                 subvolid = arg_strtou64(optarg);
12345                                 break;
12346                         case 'r':
12347                                 tree_root_bytenr = arg_strtou64(optarg);
12348                                 break;
12349                         case GETOPT_VAL_CHUNK_TREE:
12350                                 chunk_root_bytenr = arg_strtou64(optarg);
12351                                 break;
12352                         case 'p':
12353                                 ctx.progress_enabled = true;
12354                                 break;
12355                         case '?':
12356                         case 'h':
12357                                 usage(cmd_check_usage);
12358                         case GETOPT_VAL_REPAIR:
12359                                 printf("enabling repair mode\n");
12360                                 repair = 1;
12361                                 ctree_flags |= OPEN_CTREE_WRITES;
12362                                 break;
12363                         case GETOPT_VAL_READONLY:
12364                                 readonly = 1;
12365                                 break;
12366                         case GETOPT_VAL_INIT_CSUM:
12367                                 printf("Creating a new CRC tree\n");
12368                                 init_csum_tree = 1;
12369                                 repair = 1;
12370                                 ctree_flags |= OPEN_CTREE_WRITES;
12371                                 break;
12372                         case GETOPT_VAL_INIT_EXTENT:
12373                                 init_extent_tree = 1;
12374                                 ctree_flags |= (OPEN_CTREE_WRITES |
12375                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
12376                                 repair = 1;
12377                                 break;
12378                         case GETOPT_VAL_CHECK_CSUM:
12379                                 check_data_csum = 1;
12380                                 break;
12381                         case GETOPT_VAL_MODE:
12382                                 check_mode = parse_check_mode(optarg);
12383                                 if (check_mode == CHECK_MODE_UNKNOWN) {
12384                                         error("unknown mode: %s", optarg);
12385                                         exit(1);
12386                                 }
12387                                 break;
12388                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
12389                                 if (strcmp(optarg, "v1") == 0) {
12390                                         clear_space_cache = 1;
12391                                 } else if (strcmp(optarg, "v2") == 0) {
12392                                         clear_space_cache = 2;
12393                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12394                                 } else {
12395                                         error(
12396                 "invalid argument to --clear-space-cache, must be v1 or v2");
12397                                         exit(1);
12398                                 }
12399                                 ctree_flags |= OPEN_CTREE_WRITES;
12400                                 break;
12401                 }
12402         }
12403
12404         if (check_argc_exact(argc - optind, 1))
12405                 usage(cmd_check_usage);
12406
12407         if (ctx.progress_enabled) {
12408                 ctx.tp = TASK_NOTHING;
12409                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12410         }
12411
12412         /* This check is the only reason for --readonly to exist */
12413         if (readonly && repair) {
12414                 error("repair options are not compatible with --readonly");
12415                 exit(1);
12416         }
12417
12418         /*
12419          * Not supported yet
12420          */
12421         if (repair && check_mode == CHECK_MODE_LOWMEM) {
12422                 error("low memory mode doesn't support repair yet");
12423                 exit(1);
12424         }
12425
12426         radix_tree_init();
12427         cache_tree_init(&root_cache);
12428
12429         if((ret = check_mounted(argv[optind])) < 0) {
12430                 error("could not check mount status: %s", strerror(-ret));
12431                 err |= !!ret;
12432                 goto err_out;
12433         } else if(ret) {
12434                 error("%s is currently mounted, aborting", argv[optind]);
12435                 ret = -EBUSY;
12436                 err |= !!ret;
12437                 goto err_out;
12438         }
12439
12440         /* only allow partial opening under repair mode */
12441         if (repair)
12442                 ctree_flags |= OPEN_CTREE_PARTIAL;
12443
12444         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12445                                   chunk_root_bytenr, ctree_flags);
12446         if (!info) {
12447                 error("cannot open file system");
12448                 ret = -EIO;
12449                 err |= !!ret;
12450                 goto err_out;
12451         }
12452
12453         global_info = info;
12454         root = info->fs_root;
12455         if (clear_space_cache == 1) {
12456                 if (btrfs_fs_compat_ro(info,
12457                                 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12458                         error(
12459                 "free space cache v2 detected, use --clear-space-cache v2");
12460                         ret = 1;
12461                         goto close_out;
12462                 }
12463                 printf("Clearing free space cache\n");
12464                 ret = clear_free_space_cache(info);
12465                 if (ret) {
12466                         error("failed to clear free space cache");
12467                         ret = 1;
12468                 } else {
12469                         printf("Free space cache cleared\n");
12470                 }
12471                 goto close_out;
12472         } else if (clear_space_cache == 2) {
12473                 if (!btrfs_fs_compat_ro(info,
12474                                         BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12475                         printf("no free space cache v2 to clear\n");
12476                         ret = 0;
12477                         goto close_out;
12478                 }
12479                 printf("Clear free space cache v2\n");
12480                 ret = btrfs_clear_free_space_tree(info);
12481                 if (ret) {
12482                         error("failed to clear free space cache v2: %d", ret);
12483                         ret = 1;
12484                 } else {
12485                         printf("free space cache v2 cleared\n");
12486                 }
12487                 goto close_out;
12488         }
12489
12490         /*
12491          * repair mode will force us to commit transaction which
12492          * will make us fail to load log tree when mounting.
12493          */
12494         if (repair && btrfs_super_log_root(info->super_copy)) {
12495                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12496                 if (!ret) {
12497                         ret = 1;
12498                         err |= !!ret;
12499                         goto close_out;
12500                 }
12501                 ret = zero_log_tree(root);
12502                 err |= !!ret;
12503                 if (ret) {
12504                         error("failed to zero log tree: %d", ret);
12505                         goto close_out;
12506                 }
12507         }
12508
12509         uuid_unparse(info->super_copy->fsid, uuidbuf);
12510         if (qgroup_report) {
12511                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12512                        uuidbuf);
12513                 ret = qgroup_verify_all(info);
12514                 err |= !!ret;
12515                 if (ret == 0)
12516                         report_qgroups(1);
12517                 goto close_out;
12518         }
12519         if (subvolid) {
12520                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12521                        subvolid, argv[optind], uuidbuf);
12522                 ret = print_extent_state(info, subvolid);
12523                 err |= !!ret;
12524                 goto close_out;
12525         }
12526         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12527
12528         if (!extent_buffer_uptodate(info->tree_root->node) ||
12529             !extent_buffer_uptodate(info->dev_root->node) ||
12530             !extent_buffer_uptodate(info->chunk_root->node)) {
12531                 error("critical roots corrupted, unable to check the filesystem");
12532                 err |= !!ret;
12533                 ret = -EIO;
12534                 goto close_out;
12535         }
12536
12537         if (init_extent_tree || init_csum_tree) {
12538                 struct btrfs_trans_handle *trans;
12539
12540                 trans = btrfs_start_transaction(info->extent_root, 0);
12541                 if (IS_ERR(trans)) {
12542                         error("error starting transaction");
12543                         ret = PTR_ERR(trans);
12544                         err |= !!ret;
12545                         goto close_out;
12546                 }
12547
12548                 if (init_extent_tree) {
12549                         printf("Creating a new extent tree\n");
12550                         ret = reinit_extent_tree(trans, info);
12551                         err |= !!ret;
12552                         if (ret)
12553                                 goto close_out;
12554                 }
12555
12556                 if (init_csum_tree) {
12557                         printf("Reinitialize checksum tree\n");
12558                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12559                         if (ret) {
12560                                 error("checksum tree initialization failed: %d",
12561                                                 ret);
12562                                 ret = -EIO;
12563                                 err |= !!ret;
12564                                 goto close_out;
12565                         }
12566
12567                         ret = fill_csum_tree(trans, info->csum_root,
12568                                              init_extent_tree);
12569                         err |= !!ret;
12570                         if (ret) {
12571                                 error("checksum tree refilling failed: %d", ret);
12572                                 return -EIO;
12573                         }
12574                 }
12575                 /*
12576                  * Ok now we commit and run the normal fsck, which will add
12577                  * extent entries for all of the items it finds.
12578                  */
12579                 ret = btrfs_commit_transaction(trans, info->extent_root);
12580                 err |= !!ret;
12581                 if (ret)
12582                         goto close_out;
12583         }
12584         if (!extent_buffer_uptodate(info->extent_root->node)) {
12585                 error("critical: extent_root, unable to check the filesystem");
12586                 ret = -EIO;
12587                 err |= !!ret;
12588                 goto close_out;
12589         }
12590         if (!extent_buffer_uptodate(info->csum_root->node)) {
12591                 error("critical: csum_root, unable to check the filesystem");
12592                 ret = -EIO;
12593                 err |= !!ret;
12594                 goto close_out;
12595         }
12596
12597         if (!ctx.progress_enabled)
12598                 fprintf(stderr, "checking extents\n");
12599         if (check_mode == CHECK_MODE_LOWMEM)
12600                 ret = check_chunks_and_extents_v2(root);
12601         else
12602                 ret = check_chunks_and_extents(root);
12603         err |= !!ret;
12604         if (ret)
12605                 error(
12606                 "errors found in extent allocation tree or chunk allocation");
12607
12608         ret = repair_root_items(info);
12609         err |= !!ret;
12610         if (ret < 0)
12611                 goto close_out;
12612         if (repair) {
12613                 fprintf(stderr, "Fixed %d roots.\n", ret);
12614                 ret = 0;
12615         } else if (ret > 0) {
12616                 fprintf(stderr,
12617                        "Found %d roots with an outdated root item.\n",
12618                        ret);
12619                 fprintf(stderr,
12620                         "Please run a filesystem check with the option --repair to fix them.\n");
12621                 ret = 1;
12622                 err |= !!ret;
12623                 goto close_out;
12624         }
12625
12626         if (!ctx.progress_enabled) {
12627                 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
12628                         fprintf(stderr, "checking free space tree\n");
12629                 else
12630                         fprintf(stderr, "checking free space cache\n");
12631         }
12632         ret = check_space_cache(root);
12633         err |= !!ret;
12634         if (ret)
12635                 goto out;
12636
12637         /*
12638          * We used to have to have these hole extents in between our real
12639          * extents so if we don't have this flag set we need to make sure there
12640          * are no gaps in the file extents for inodes, otherwise we can just
12641          * ignore it when this happens.
12642          */
12643         no_holes = btrfs_fs_incompat(root->fs_info,
12644                                      BTRFS_FEATURE_INCOMPAT_NO_HOLES);
12645         if (!ctx.progress_enabled)
12646                 fprintf(stderr, "checking fs roots\n");
12647         if (check_mode == CHECK_MODE_LOWMEM)
12648                 ret = check_fs_roots_v2(root->fs_info);
12649         else
12650                 ret = check_fs_roots(root, &root_cache);
12651         err |= !!ret;
12652         if (ret)
12653                 goto out;
12654
12655         fprintf(stderr, "checking csums\n");
12656         ret = check_csums(root);
12657         err |= !!ret;
12658         if (ret)
12659                 goto out;
12660
12661         fprintf(stderr, "checking root refs\n");
12662         /* For low memory mode, check_fs_roots_v2 handles root refs */
12663         if (check_mode != CHECK_MODE_LOWMEM) {
12664                 ret = check_root_refs(root, &root_cache);
12665                 err |= !!ret;
12666                 if (ret)
12667                         goto out;
12668         }
12669
12670         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12671                 struct extent_buffer *eb;
12672
12673                 eb = list_first_entry(&root->fs_info->recow_ebs,
12674                                       struct extent_buffer, recow);
12675                 list_del_init(&eb->recow);
12676                 ret = recow_extent_buffer(root, eb);
12677                 err |= !!ret;
12678                 if (ret)
12679                         break;
12680         }
12681
12682         while (!list_empty(&delete_items)) {
12683                 struct bad_item *bad;
12684
12685                 bad = list_first_entry(&delete_items, struct bad_item, list);
12686                 list_del_init(&bad->list);
12687                 if (repair) {
12688                         ret = delete_bad_item(root, bad);
12689                         err |= !!ret;
12690                 }
12691                 free(bad);
12692         }
12693
12694         if (info->quota_enabled) {
12695                 fprintf(stderr, "checking quota groups\n");
12696                 ret = qgroup_verify_all(info);
12697                 err |= !!ret;
12698                 if (ret)
12699                         goto out;
12700                 report_qgroups(0);
12701                 ret = repair_qgroups(info, &qgroups_repaired);
12702                 err |= !!ret;
12703                 if (err)
12704                         goto out;
12705                 ret = 0;
12706         }
12707
12708         if (!list_empty(&root->fs_info->recow_ebs)) {
12709                 error("transid errors in file system");
12710                 ret = 1;
12711                 err |= !!ret;
12712         }
12713 out:
12714         if (found_old_backref) { /*
12715                  * there was a disk format change when mixed
12716                  * backref was in testing tree. The old format
12717                  * existed about one week.
12718                  */
12719                 printf("\n * Found old mixed backref format. "
12720                        "The old format is not supported! *"
12721                        "\n * Please mount the FS in readonly mode, "
12722                        "backup data and re-format the FS. *\n\n");
12723                 err |= 1;
12724         }
12725         printf("found %llu bytes used err is %d\n",
12726                (unsigned long long)bytes_used, ret);
12727         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
12728         printf("total tree bytes: %llu\n",
12729                (unsigned long long)total_btree_bytes);
12730         printf("total fs tree bytes: %llu\n",
12731                (unsigned long long)total_fs_tree_bytes);
12732         printf("total extent tree bytes: %llu\n",
12733                (unsigned long long)total_extent_tree_bytes);
12734         printf("btree space waste bytes: %llu\n",
12735                (unsigned long long)btree_space_waste);
12736         printf("file data blocks allocated: %llu\n referenced %llu\n",
12737                 (unsigned long long)data_bytes_allocated,
12738                 (unsigned long long)data_bytes_referenced);
12739
12740         free_qgroup_counts();
12741         free_root_recs_tree(&root_cache);
12742 close_out:
12743         close_ctree(root);
12744 err_out:
12745         if (ctx.progress_enabled)
12746                 task_deinit(ctx.info);
12747
12748         return err;
12749 }