cb34f27a3cc1c3031b039fe984a7ca0ae7d1d406
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING       (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH      (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN         (1<<20) /* DIR isize should be recalculated */
136 #define BG_ACCOUNTING_ERROR     (1<<21) /* Block group accounting error */
137
138 static inline struct data_backref* to_data_backref(struct extent_backref *back)
139 {
140         return container_of(back, struct data_backref, node);
141 }
142
143 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
144 {
145         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
146         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
147         struct data_backref *back1 = to_data_backref(ext1);
148         struct data_backref *back2 = to_data_backref(ext2);
149
150         WARN_ON(!ext1->is_data);
151         WARN_ON(!ext2->is_data);
152
153         /* parent and root are a union, so this covers both */
154         if (back1->parent > back2->parent)
155                 return 1;
156         if (back1->parent < back2->parent)
157                 return -1;
158
159         /* This is a full backref and the parents match. */
160         if (back1->node.full_backref)
161                 return 0;
162
163         if (back1->owner > back2->owner)
164                 return 1;
165         if (back1->owner < back2->owner)
166                 return -1;
167
168         if (back1->offset > back2->offset)
169                 return 1;
170         if (back1->offset < back2->offset)
171                 return -1;
172
173         if (back1->found_ref && back2->found_ref) {
174                 if (back1->disk_bytenr > back2->disk_bytenr)
175                         return 1;
176                 if (back1->disk_bytenr < back2->disk_bytenr)
177                         return -1;
178
179                 if (back1->bytes > back2->bytes)
180                         return 1;
181                 if (back1->bytes < back2->bytes)
182                         return -1;
183         }
184
185         return 0;
186 }
187
188 /*
189  * Much like data_backref, just removed the undetermined members
190  * and change it to use list_head.
191  * During extent scan, it is stored in root->orphan_data_extent.
192  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
193  */
194 struct orphan_data_extent {
195         struct list_head list;
196         u64 root;
197         u64 objectid;
198         u64 offset;
199         u64 disk_bytenr;
200         u64 disk_len;
201 };
202
203 struct tree_backref {
204         struct extent_backref node;
205         union {
206                 u64 parent;
207                 u64 root;
208         };
209 };
210
211 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
212 {
213         return container_of(back, struct tree_backref, node);
214 }
215
216 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
217 {
218         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
219         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
220         struct tree_backref *back1 = to_tree_backref(ext1);
221         struct tree_backref *back2 = to_tree_backref(ext2);
222
223         WARN_ON(ext1->is_data);
224         WARN_ON(ext2->is_data);
225
226         /* parent and root are a union, so this covers both */
227         if (back1->parent > back2->parent)
228                 return 1;
229         if (back1->parent < back2->parent)
230                 return -1;
231
232         return 0;
233 }
234
235 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
236 {
237         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
238         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
239
240         if (ext1->is_data > ext2->is_data)
241                 return 1;
242
243         if (ext1->is_data < ext2->is_data)
244                 return -1;
245
246         if (ext1->full_backref > ext2->full_backref)
247                 return 1;
248         if (ext1->full_backref < ext2->full_backref)
249                 return -1;
250
251         if (ext1->is_data)
252                 return compare_data_backref(node1, node2);
253         else
254                 return compare_tree_backref(node1, node2);
255 }
256
257 /* Explicit initialization for extent_record::flag_block_full_backref */
258 enum { FLAG_UNSET = 2 };
259
260 struct extent_record {
261         struct list_head backrefs;
262         struct list_head dups;
263         struct rb_root backref_tree;
264         struct list_head list;
265         struct cache_extent cache;
266         struct btrfs_disk_key parent_key;
267         u64 start;
268         u64 max_size;
269         u64 nr;
270         u64 refs;
271         u64 extent_item_refs;
272         u64 generation;
273         u64 parent_generation;
274         u64 info_objectid;
275         u32 num_duplicates;
276         u8 info_level;
277         unsigned int flag_block_full_backref:2;
278         unsigned int found_rec:1;
279         unsigned int content_checked:1;
280         unsigned int owner_ref_checked:1;
281         unsigned int is_root:1;
282         unsigned int metadata:1;
283         unsigned int bad_full_backref:1;
284         unsigned int crossing_stripes:1;
285         unsigned int wrong_chunk_type:1;
286 };
287
288 static inline struct extent_record* to_extent_record(struct list_head *entry)
289 {
290         return container_of(entry, struct extent_record, list);
291 }
292
293 struct inode_backref {
294         struct list_head list;
295         unsigned int found_dir_item:1;
296         unsigned int found_dir_index:1;
297         unsigned int found_inode_ref:1;
298         u8 filetype;
299         u8 ref_type;
300         int errors;
301         u64 dir;
302         u64 index;
303         u16 namelen;
304         char name[0];
305 };
306
307 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
308 {
309         return list_entry(entry, struct inode_backref, list);
310 }
311
312 struct root_item_record {
313         struct list_head list;
314         u64 objectid;
315         u64 bytenr;
316         u64 last_snapshot;
317         u8 level;
318         u8 drop_level;
319         struct btrfs_key drop_key;
320 };
321
322 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
323 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
324 #define REF_ERR_NO_INODE_REF            (1 << 2)
325 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
326 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
327 #define REF_ERR_DUP_INODE_REF           (1 << 5)
328 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
329 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
330 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
331 #define REF_ERR_NO_ROOT_REF             (1 << 9)
332 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
333 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
334 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
335
336 struct file_extent_hole {
337         struct rb_node node;
338         u64 start;
339         u64 len;
340 };
341
342 struct inode_record {
343         struct list_head backrefs;
344         unsigned int checked:1;
345         unsigned int merging:1;
346         unsigned int found_inode_item:1;
347         unsigned int found_dir_item:1;
348         unsigned int found_file_extent:1;
349         unsigned int found_csum_item:1;
350         unsigned int some_csum_missing:1;
351         unsigned int nodatasum:1;
352         int errors;
353
354         u64 ino;
355         u32 nlink;
356         u32 imode;
357         u64 isize;
358         u64 nbytes;
359
360         u32 found_link;
361         u64 found_size;
362         u64 extent_start;
363         u64 extent_end;
364         struct rb_root holes;
365         struct list_head orphan_extents;
366
367         u32 refs;
368 };
369
370 #define I_ERR_NO_INODE_ITEM             (1 << 0)
371 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
372 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
373 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
374 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
375 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
376 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
377 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
378 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
379 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
380 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
381 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
382 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
383 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
384 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
385
386 struct root_backref {
387         struct list_head list;
388         unsigned int found_dir_item:1;
389         unsigned int found_dir_index:1;
390         unsigned int found_back_ref:1;
391         unsigned int found_forward_ref:1;
392         unsigned int reachable:1;
393         int errors;
394         u64 ref_root;
395         u64 dir;
396         u64 index;
397         u16 namelen;
398         char name[0];
399 };
400
401 static inline struct root_backref* to_root_backref(struct list_head *entry)
402 {
403         return list_entry(entry, struct root_backref, list);
404 }
405
406 struct root_record {
407         struct list_head backrefs;
408         struct cache_extent cache;
409         unsigned int found_root_item:1;
410         u64 objectid;
411         u32 found_ref;
412 };
413
414 struct ptr_node {
415         struct cache_extent cache;
416         void *data;
417 };
418
419 struct shared_node {
420         struct cache_extent cache;
421         struct cache_tree root_cache;
422         struct cache_tree inode_cache;
423         struct inode_record *current;
424         u32 refs;
425 };
426
427 struct block_info {
428         u64 start;
429         u32 size;
430 };
431
432 struct walk_control {
433         struct cache_tree shared;
434         struct shared_node *nodes[BTRFS_MAX_LEVEL];
435         int active_node;
436         int root_level;
437 };
438
439 struct bad_item {
440         struct btrfs_key key;
441         u64 root_id;
442         struct list_head list;
443 };
444
445 struct extent_entry {
446         u64 bytenr;
447         u64 bytes;
448         int count;
449         int broken;
450         struct list_head list;
451 };
452
453 struct root_item_info {
454         /* level of the root */
455         u8 level;
456         /* number of nodes at this level, must be 1 for a root */
457         int node_count;
458         u64 bytenr;
459         u64 gen;
460         struct cache_extent cache_extent;
461 };
462
463 /*
464  * Error bit for low memory mode check.
465  *
466  * Currently no caller cares about it yet.  Just internal use for error
467  * classification.
468  */
469 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
470 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
471 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
472 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
473 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
474 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
475 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
476 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
477 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
478 #define CHUNK_TYPE_MISMATCH     (1 << 8)
479
480 static void *print_status_check(void *p)
481 {
482         struct task_ctx *priv = p;
483         const char work_indicator[] = { '.', 'o', 'O', 'o' };
484         uint32_t count = 0;
485         static char *task_position_string[] = {
486                 "checking extents",
487                 "checking free space cache",
488                 "checking fs roots",
489         };
490
491         task_period_start(priv->info, 1000 /* 1s */);
492
493         if (priv->tp == TASK_NOTHING)
494                 return NULL;
495
496         while (1) {
497                 printf("%s [%c]\r", task_position_string[priv->tp],
498                                 work_indicator[count % 4]);
499                 count++;
500                 fflush(stdout);
501                 task_period_wait(priv->info);
502         }
503         return NULL;
504 }
505
506 static int print_status_return(void *p)
507 {
508         printf("\n");
509         fflush(stdout);
510
511         return 0;
512 }
513
514 static enum btrfs_check_mode parse_check_mode(const char *str)
515 {
516         if (strcmp(str, "lowmem") == 0)
517                 return CHECK_MODE_LOWMEM;
518         if (strcmp(str, "orig") == 0)
519                 return CHECK_MODE_ORIGINAL;
520         if (strcmp(str, "original") == 0)
521                 return CHECK_MODE_ORIGINAL;
522
523         return CHECK_MODE_UNKNOWN;
524 }
525
526 /* Compatible function to allow reuse of old codes */
527 static u64 first_extent_gap(struct rb_root *holes)
528 {
529         struct file_extent_hole *hole;
530
531         if (RB_EMPTY_ROOT(holes))
532                 return (u64)-1;
533
534         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
535         return hole->start;
536 }
537
538 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
539 {
540         struct file_extent_hole *hole1;
541         struct file_extent_hole *hole2;
542
543         hole1 = rb_entry(node1, struct file_extent_hole, node);
544         hole2 = rb_entry(node2, struct file_extent_hole, node);
545
546         if (hole1->start > hole2->start)
547                 return -1;
548         if (hole1->start < hole2->start)
549                 return 1;
550         /* Now hole1->start == hole2->start */
551         if (hole1->len >= hole2->len)
552                 /*
553                  * Hole 1 will be merge center
554                  * Same hole will be merged later
555                  */
556                 return -1;
557         /* Hole 2 will be merge center */
558         return 1;
559 }
560
561 /*
562  * Add a hole to the record
563  *
564  * This will do hole merge for copy_file_extent_holes(),
565  * which will ensure there won't be continuous holes.
566  */
567 static int add_file_extent_hole(struct rb_root *holes,
568                                 u64 start, u64 len)
569 {
570         struct file_extent_hole *hole;
571         struct file_extent_hole *prev = NULL;
572         struct file_extent_hole *next = NULL;
573
574         hole = malloc(sizeof(*hole));
575         if (!hole)
576                 return -ENOMEM;
577         hole->start = start;
578         hole->len = len;
579         /* Since compare will not return 0, no -EEXIST will happen */
580         rb_insert(holes, &hole->node, compare_hole);
581
582         /* simple merge with previous hole */
583         if (rb_prev(&hole->node))
584                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
585                                 node);
586         if (prev && prev->start + prev->len >= hole->start) {
587                 hole->len = hole->start + hole->len - prev->start;
588                 hole->start = prev->start;
589                 rb_erase(&prev->node, holes);
590                 free(prev);
591                 prev = NULL;
592         }
593
594         /* iterate merge with next holes */
595         while (1) {
596                 if (!rb_next(&hole->node))
597                         break;
598                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
599                                         node);
600                 if (hole->start + hole->len >= next->start) {
601                         if (hole->start + hole->len <= next->start + next->len)
602                                 hole->len = next->start + next->len -
603                                             hole->start;
604                         rb_erase(&next->node, holes);
605                         free(next);
606                         next = NULL;
607                 } else
608                         break;
609         }
610         return 0;
611 }
612
613 static int compare_hole_range(struct rb_node *node, void *data)
614 {
615         struct file_extent_hole *hole;
616         u64 start;
617
618         hole = (struct file_extent_hole *)data;
619         start = hole->start;
620
621         hole = rb_entry(node, struct file_extent_hole, node);
622         if (start < hole->start)
623                 return -1;
624         if (start >= hole->start && start < hole->start + hole->len)
625                 return 0;
626         return 1;
627 }
628
629 /*
630  * Delete a hole in the record
631  *
632  * This will do the hole split and is much restrict than add.
633  */
634 static int del_file_extent_hole(struct rb_root *holes,
635                                 u64 start, u64 len)
636 {
637         struct file_extent_hole *hole;
638         struct file_extent_hole tmp;
639         u64 prev_start = 0;
640         u64 prev_len = 0;
641         u64 next_start = 0;
642         u64 next_len = 0;
643         struct rb_node *node;
644         int have_prev = 0;
645         int have_next = 0;
646         int ret = 0;
647
648         tmp.start = start;
649         tmp.len = len;
650         node = rb_search(holes, &tmp, compare_hole_range, NULL);
651         if (!node)
652                 return -EEXIST;
653         hole = rb_entry(node, struct file_extent_hole, node);
654         if (start + len > hole->start + hole->len)
655                 return -EEXIST;
656
657         /*
658          * Now there will be no overlap, delete the hole and re-add the
659          * split(s) if they exists.
660          */
661         if (start > hole->start) {
662                 prev_start = hole->start;
663                 prev_len = start - hole->start;
664                 have_prev = 1;
665         }
666         if (hole->start + hole->len > start + len) {
667                 next_start = start + len;
668                 next_len = hole->start + hole->len - start - len;
669                 have_next = 1;
670         }
671         rb_erase(node, holes);
672         free(hole);
673         if (have_prev) {
674                 ret = add_file_extent_hole(holes, prev_start, prev_len);
675                 if (ret < 0)
676                         return ret;
677         }
678         if (have_next) {
679                 ret = add_file_extent_hole(holes, next_start, next_len);
680                 if (ret < 0)
681                         return ret;
682         }
683         return 0;
684 }
685
686 static int copy_file_extent_holes(struct rb_root *dst,
687                                   struct rb_root *src)
688 {
689         struct file_extent_hole *hole;
690         struct rb_node *node;
691         int ret = 0;
692
693         node = rb_first(src);
694         while (node) {
695                 hole = rb_entry(node, struct file_extent_hole, node);
696                 ret = add_file_extent_hole(dst, hole->start, hole->len);
697                 if (ret)
698                         break;
699                 node = rb_next(node);
700         }
701         return ret;
702 }
703
704 static void free_file_extent_holes(struct rb_root *holes)
705 {
706         struct rb_node *node;
707         struct file_extent_hole *hole;
708
709         node = rb_first(holes);
710         while (node) {
711                 hole = rb_entry(node, struct file_extent_hole, node);
712                 rb_erase(node, holes);
713                 free(hole);
714                 node = rb_first(holes);
715         }
716 }
717
718 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
719
720 static void record_root_in_trans(struct btrfs_trans_handle *trans,
721                                  struct btrfs_root *root)
722 {
723         if (root->last_trans != trans->transid) {
724                 root->track_dirty = 1;
725                 root->last_trans = trans->transid;
726                 root->commit_root = root->node;
727                 extent_buffer_get(root->node);
728         }
729 }
730
731 static u8 imode_to_type(u32 imode)
732 {
733 #define S_SHIFT 12
734         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
735                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
736                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
737                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
738                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
739                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
740                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
741                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
742         };
743
744         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
745 #undef S_SHIFT
746 }
747
748 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
749 {
750         struct device_record *rec1;
751         struct device_record *rec2;
752
753         rec1 = rb_entry(node1, struct device_record, node);
754         rec2 = rb_entry(node2, struct device_record, node);
755         if (rec1->devid > rec2->devid)
756                 return -1;
757         else if (rec1->devid < rec2->devid)
758                 return 1;
759         else
760                 return 0;
761 }
762
763 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
764 {
765         struct inode_record *rec;
766         struct inode_backref *backref;
767         struct inode_backref *orig;
768         struct inode_backref *tmp;
769         struct orphan_data_extent *src_orphan;
770         struct orphan_data_extent *dst_orphan;
771         struct rb_node *rb;
772         size_t size;
773         int ret;
774
775         rec = malloc(sizeof(*rec));
776         if (!rec)
777                 return ERR_PTR(-ENOMEM);
778         memcpy(rec, orig_rec, sizeof(*rec));
779         rec->refs = 1;
780         INIT_LIST_HEAD(&rec->backrefs);
781         INIT_LIST_HEAD(&rec->orphan_extents);
782         rec->holes = RB_ROOT;
783
784         list_for_each_entry(orig, &orig_rec->backrefs, list) {
785                 size = sizeof(*orig) + orig->namelen + 1;
786                 backref = malloc(size);
787                 if (!backref) {
788                         ret = -ENOMEM;
789                         goto cleanup;
790                 }
791                 memcpy(backref, orig, size);
792                 list_add_tail(&backref->list, &rec->backrefs);
793         }
794         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
795                 dst_orphan = malloc(sizeof(*dst_orphan));
796                 if (!dst_orphan) {
797                         ret = -ENOMEM;
798                         goto cleanup;
799                 }
800                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
801                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
802         }
803         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
804         if (ret < 0)
805                 goto cleanup_rb;
806
807         return rec;
808
809 cleanup_rb:
810         rb = rb_first(&rec->holes);
811         while (rb) {
812                 struct file_extent_hole *hole;
813
814                 hole = rb_entry(rb, struct file_extent_hole, node);
815                 rb = rb_next(rb);
816                 free(hole);
817         }
818
819 cleanup:
820         if (!list_empty(&rec->backrefs))
821                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
822                         list_del(&orig->list);
823                         free(orig);
824                 }
825
826         if (!list_empty(&rec->orphan_extents))
827                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
828                         list_del(&orig->list);
829                         free(orig);
830                 }
831
832         free(rec);
833
834         return ERR_PTR(ret);
835 }
836
837 static void print_orphan_data_extents(struct list_head *orphan_extents,
838                                       u64 objectid)
839 {
840         struct orphan_data_extent *orphan;
841
842         if (list_empty(orphan_extents))
843                 return;
844         printf("The following data extent is lost in tree %llu:\n",
845                objectid);
846         list_for_each_entry(orphan, orphan_extents, list) {
847                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
848                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
849                        orphan->disk_len);
850         }
851 }
852
853 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
854 {
855         u64 root_objectid = root->root_key.objectid;
856         int errors = rec->errors;
857
858         if (!errors)
859                 return;
860         /* reloc root errors, we print its corresponding fs root objectid*/
861         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
862                 root_objectid = root->root_key.offset;
863                 fprintf(stderr, "reloc");
864         }
865         fprintf(stderr, "root %llu inode %llu errors %x",
866                 (unsigned long long) root_objectid,
867                 (unsigned long long) rec->ino, rec->errors);
868
869         if (errors & I_ERR_NO_INODE_ITEM)
870                 fprintf(stderr, ", no inode item");
871         if (errors & I_ERR_NO_ORPHAN_ITEM)
872                 fprintf(stderr, ", no orphan item");
873         if (errors & I_ERR_DUP_INODE_ITEM)
874                 fprintf(stderr, ", dup inode item");
875         if (errors & I_ERR_DUP_DIR_INDEX)
876                 fprintf(stderr, ", dup dir index");
877         if (errors & I_ERR_ODD_DIR_ITEM)
878                 fprintf(stderr, ", odd dir item");
879         if (errors & I_ERR_ODD_FILE_EXTENT)
880                 fprintf(stderr, ", odd file extent");
881         if (errors & I_ERR_BAD_FILE_EXTENT)
882                 fprintf(stderr, ", bad file extent");
883         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
884                 fprintf(stderr, ", file extent overlap");
885         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
886                 fprintf(stderr, ", file extent discount");
887         if (errors & I_ERR_DIR_ISIZE_WRONG)
888                 fprintf(stderr, ", dir isize wrong");
889         if (errors & I_ERR_FILE_NBYTES_WRONG)
890                 fprintf(stderr, ", nbytes wrong");
891         if (errors & I_ERR_ODD_CSUM_ITEM)
892                 fprintf(stderr, ", odd csum item");
893         if (errors & I_ERR_SOME_CSUM_MISSING)
894                 fprintf(stderr, ", some csum missing");
895         if (errors & I_ERR_LINK_COUNT_WRONG)
896                 fprintf(stderr, ", link count wrong");
897         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898                 fprintf(stderr, ", orphan file extent");
899         fprintf(stderr, "\n");
900         /* Print the orphan extents if needed */
901         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
902                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
903
904         /* Print the holes if needed */
905         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
906                 struct file_extent_hole *hole;
907                 struct rb_node *node;
908                 int found = 0;
909
910                 node = rb_first(&rec->holes);
911                 fprintf(stderr, "Found file extent holes:\n");
912                 while (node) {
913                         found = 1;
914                         hole = rb_entry(node, struct file_extent_hole, node);
915                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
916                                 hole->start, hole->len);
917                         node = rb_next(node);
918                 }
919                 if (!found)
920                         fprintf(stderr, "\tstart: 0, len: %llu\n",
921                                 round_up(rec->isize,
922                                          root->fs_info->sectorsize));
923         }
924 }
925
926 static void print_ref_error(int errors)
927 {
928         if (errors & REF_ERR_NO_DIR_ITEM)
929                 fprintf(stderr, ", no dir item");
930         if (errors & REF_ERR_NO_DIR_INDEX)
931                 fprintf(stderr, ", no dir index");
932         if (errors & REF_ERR_NO_INODE_REF)
933                 fprintf(stderr, ", no inode ref");
934         if (errors & REF_ERR_DUP_DIR_ITEM)
935                 fprintf(stderr, ", dup dir item");
936         if (errors & REF_ERR_DUP_DIR_INDEX)
937                 fprintf(stderr, ", dup dir index");
938         if (errors & REF_ERR_DUP_INODE_REF)
939                 fprintf(stderr, ", dup inode ref");
940         if (errors & REF_ERR_INDEX_UNMATCH)
941                 fprintf(stderr, ", index mismatch");
942         if (errors & REF_ERR_FILETYPE_UNMATCH)
943                 fprintf(stderr, ", filetype mismatch");
944         if (errors & REF_ERR_NAME_TOO_LONG)
945                 fprintf(stderr, ", name too long");
946         if (errors & REF_ERR_NO_ROOT_REF)
947                 fprintf(stderr, ", no root ref");
948         if (errors & REF_ERR_NO_ROOT_BACKREF)
949                 fprintf(stderr, ", no root backref");
950         if (errors & REF_ERR_DUP_ROOT_REF)
951                 fprintf(stderr, ", dup root ref");
952         if (errors & REF_ERR_DUP_ROOT_BACKREF)
953                 fprintf(stderr, ", dup root backref");
954         fprintf(stderr, "\n");
955 }
956
957 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
958                                           u64 ino, int mod)
959 {
960         struct ptr_node *node;
961         struct cache_extent *cache;
962         struct inode_record *rec = NULL;
963         int ret;
964
965         cache = lookup_cache_extent(inode_cache, ino, 1);
966         if (cache) {
967                 node = container_of(cache, struct ptr_node, cache);
968                 rec = node->data;
969                 if (mod && rec->refs > 1) {
970                         node->data = clone_inode_rec(rec);
971                         if (IS_ERR(node->data))
972                                 return node->data;
973                         rec->refs--;
974                         rec = node->data;
975                 }
976         } else if (mod) {
977                 rec = calloc(1, sizeof(*rec));
978                 if (!rec)
979                         return ERR_PTR(-ENOMEM);
980                 rec->ino = ino;
981                 rec->extent_start = (u64)-1;
982                 rec->refs = 1;
983                 INIT_LIST_HEAD(&rec->backrefs);
984                 INIT_LIST_HEAD(&rec->orphan_extents);
985                 rec->holes = RB_ROOT;
986
987                 node = malloc(sizeof(*node));
988                 if (!node) {
989                         free(rec);
990                         return ERR_PTR(-ENOMEM);
991                 }
992                 node->cache.start = ino;
993                 node->cache.size = 1;
994                 node->data = rec;
995
996                 if (ino == BTRFS_FREE_INO_OBJECTID)
997                         rec->found_link = 1;
998
999                 ret = insert_cache_extent(inode_cache, &node->cache);
1000                 if (ret)
1001                         return ERR_PTR(-EEXIST);
1002         }
1003         return rec;
1004 }
1005
1006 static void free_orphan_data_extents(struct list_head *orphan_extents)
1007 {
1008         struct orphan_data_extent *orphan;
1009
1010         while (!list_empty(orphan_extents)) {
1011                 orphan = list_entry(orphan_extents->next,
1012                                     struct orphan_data_extent, list);
1013                 list_del(&orphan->list);
1014                 free(orphan);
1015         }
1016 }
1017
1018 static void free_inode_rec(struct inode_record *rec)
1019 {
1020         struct inode_backref *backref;
1021
1022         if (--rec->refs > 0)
1023                 return;
1024
1025         while (!list_empty(&rec->backrefs)) {
1026                 backref = to_inode_backref(rec->backrefs.next);
1027                 list_del(&backref->list);
1028                 free(backref);
1029         }
1030         free_orphan_data_extents(&rec->orphan_extents);
1031         free_file_extent_holes(&rec->holes);
1032         free(rec);
1033 }
1034
1035 static int can_free_inode_rec(struct inode_record *rec)
1036 {
1037         if (!rec->errors && rec->checked && rec->found_inode_item &&
1038             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1039                 return 1;
1040         return 0;
1041 }
1042
1043 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1044                                  struct inode_record *rec)
1045 {
1046         struct cache_extent *cache;
1047         struct inode_backref *tmp, *backref;
1048         struct ptr_node *node;
1049         u8 filetype;
1050
1051         if (!rec->found_inode_item)
1052                 return;
1053
1054         filetype = imode_to_type(rec->imode);
1055         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1056                 if (backref->found_dir_item && backref->found_dir_index) {
1057                         if (backref->filetype != filetype)
1058                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1059                         if (!backref->errors && backref->found_inode_ref &&
1060                             rec->nlink == rec->found_link) {
1061                                 list_del(&backref->list);
1062                                 free(backref);
1063                         }
1064                 }
1065         }
1066
1067         if (!rec->checked || rec->merging)
1068                 return;
1069
1070         if (S_ISDIR(rec->imode)) {
1071                 if (rec->found_size != rec->isize)
1072                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1073                 if (rec->found_file_extent)
1074                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1075         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1076                 if (rec->found_dir_item)
1077                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1078                 if (rec->found_size != rec->nbytes)
1079                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1080                 if (rec->nlink > 0 && !no_holes &&
1081                     (rec->extent_end < rec->isize ||
1082                      first_extent_gap(&rec->holes) < rec->isize))
1083                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1084         }
1085
1086         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1087                 if (rec->found_csum_item && rec->nodatasum)
1088                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1089                 if (rec->some_csum_missing && !rec->nodatasum)
1090                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1091         }
1092
1093         BUG_ON(rec->refs != 1);
1094         if (can_free_inode_rec(rec)) {
1095                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1096                 node = container_of(cache, struct ptr_node, cache);
1097                 BUG_ON(node->data != rec);
1098                 remove_cache_extent(inode_cache, &node->cache);
1099                 free(node);
1100                 free_inode_rec(rec);
1101         }
1102 }
1103
1104 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1105 {
1106         struct btrfs_path path;
1107         struct btrfs_key key;
1108         int ret;
1109
1110         key.objectid = BTRFS_ORPHAN_OBJECTID;
1111         key.type = BTRFS_ORPHAN_ITEM_KEY;
1112         key.offset = ino;
1113
1114         btrfs_init_path(&path);
1115         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1116         btrfs_release_path(&path);
1117         if (ret > 0)
1118                 ret = -ENOENT;
1119         return ret;
1120 }
1121
1122 static int process_inode_item(struct extent_buffer *eb,
1123                               int slot, struct btrfs_key *key,
1124                               struct shared_node *active_node)
1125 {
1126         struct inode_record *rec;
1127         struct btrfs_inode_item *item;
1128
1129         rec = active_node->current;
1130         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1131         if (rec->found_inode_item) {
1132                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1133                 return 1;
1134         }
1135         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1136         rec->nlink = btrfs_inode_nlink(eb, item);
1137         rec->isize = btrfs_inode_size(eb, item);
1138         rec->nbytes = btrfs_inode_nbytes(eb, item);
1139         rec->imode = btrfs_inode_mode(eb, item);
1140         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1141                 rec->nodatasum = 1;
1142         rec->found_inode_item = 1;
1143         if (rec->nlink == 0)
1144                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1145         maybe_free_inode_rec(&active_node->inode_cache, rec);
1146         return 0;
1147 }
1148
1149 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1150                                                 const char *name,
1151                                                 int namelen, u64 dir)
1152 {
1153         struct inode_backref *backref;
1154
1155         list_for_each_entry(backref, &rec->backrefs, list) {
1156                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1157                         break;
1158                 if (backref->dir != dir || backref->namelen != namelen)
1159                         continue;
1160                 if (memcmp(name, backref->name, namelen))
1161                         continue;
1162                 return backref;
1163         }
1164
1165         backref = malloc(sizeof(*backref) + namelen + 1);
1166         if (!backref)
1167                 return NULL;
1168         memset(backref, 0, sizeof(*backref));
1169         backref->dir = dir;
1170         backref->namelen = namelen;
1171         memcpy(backref->name, name, namelen);
1172         backref->name[namelen] = '\0';
1173         list_add_tail(&backref->list, &rec->backrefs);
1174         return backref;
1175 }
1176
1177 static int add_inode_backref(struct cache_tree *inode_cache,
1178                              u64 ino, u64 dir, u64 index,
1179                              const char *name, int namelen,
1180                              u8 filetype, u8 itemtype, int errors)
1181 {
1182         struct inode_record *rec;
1183         struct inode_backref *backref;
1184
1185         rec = get_inode_rec(inode_cache, ino, 1);
1186         BUG_ON(IS_ERR(rec));
1187         backref = get_inode_backref(rec, name, namelen, dir);
1188         BUG_ON(!backref);
1189         if (errors)
1190                 backref->errors |= errors;
1191         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1192                 if (backref->found_dir_index)
1193                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1194                 if (backref->found_inode_ref && backref->index != index)
1195                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1196                 if (backref->found_dir_item && backref->filetype != filetype)
1197                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1198
1199                 backref->index = index;
1200                 backref->filetype = filetype;
1201                 backref->found_dir_index = 1;
1202         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1203                 rec->found_link++;
1204                 if (backref->found_dir_item)
1205                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1206                 if (backref->found_dir_index && backref->filetype != filetype)
1207                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1208
1209                 backref->filetype = filetype;
1210                 backref->found_dir_item = 1;
1211         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1212                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1213                 if (backref->found_inode_ref)
1214                         backref->errors |= REF_ERR_DUP_INODE_REF;
1215                 if (backref->found_dir_index && backref->index != index)
1216                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1217                 else
1218                         backref->index = index;
1219
1220                 backref->ref_type = itemtype;
1221                 backref->found_inode_ref = 1;
1222         } else {
1223                 BUG_ON(1);
1224         }
1225
1226         maybe_free_inode_rec(inode_cache, rec);
1227         return 0;
1228 }
1229
1230 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1231                             struct cache_tree *dst_cache)
1232 {
1233         struct inode_backref *backref;
1234         u32 dir_count = 0;
1235         int ret = 0;
1236
1237         dst->merging = 1;
1238         list_for_each_entry(backref, &src->backrefs, list) {
1239                 if (backref->found_dir_index) {
1240                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1241                                         backref->index, backref->name,
1242                                         backref->namelen, backref->filetype,
1243                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1244                 }
1245                 if (backref->found_dir_item) {
1246                         dir_count++;
1247                         add_inode_backref(dst_cache, dst->ino,
1248                                         backref->dir, 0, backref->name,
1249                                         backref->namelen, backref->filetype,
1250                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1251                 }
1252                 if (backref->found_inode_ref) {
1253                         add_inode_backref(dst_cache, dst->ino,
1254                                         backref->dir, backref->index,
1255                                         backref->name, backref->namelen, 0,
1256                                         backref->ref_type, backref->errors);
1257                 }
1258         }
1259
1260         if (src->found_dir_item)
1261                 dst->found_dir_item = 1;
1262         if (src->found_file_extent)
1263                 dst->found_file_extent = 1;
1264         if (src->found_csum_item)
1265                 dst->found_csum_item = 1;
1266         if (src->some_csum_missing)
1267                 dst->some_csum_missing = 1;
1268         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1269                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1270                 if (ret < 0)
1271                         return ret;
1272         }
1273
1274         BUG_ON(src->found_link < dir_count);
1275         dst->found_link += src->found_link - dir_count;
1276         dst->found_size += src->found_size;
1277         if (src->extent_start != (u64)-1) {
1278                 if (dst->extent_start == (u64)-1) {
1279                         dst->extent_start = src->extent_start;
1280                         dst->extent_end = src->extent_end;
1281                 } else {
1282                         if (dst->extent_end > src->extent_start)
1283                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1284                         else if (dst->extent_end < src->extent_start) {
1285                                 ret = add_file_extent_hole(&dst->holes,
1286                                         dst->extent_end,
1287                                         src->extent_start - dst->extent_end);
1288                         }
1289                         if (dst->extent_end < src->extent_end)
1290                                 dst->extent_end = src->extent_end;
1291                 }
1292         }
1293
1294         dst->errors |= src->errors;
1295         if (src->found_inode_item) {
1296                 if (!dst->found_inode_item) {
1297                         dst->nlink = src->nlink;
1298                         dst->isize = src->isize;
1299                         dst->nbytes = src->nbytes;
1300                         dst->imode = src->imode;
1301                         dst->nodatasum = src->nodatasum;
1302                         dst->found_inode_item = 1;
1303                 } else {
1304                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1305                 }
1306         }
1307         dst->merging = 0;
1308
1309         return 0;
1310 }
1311
1312 static int splice_shared_node(struct shared_node *src_node,
1313                               struct shared_node *dst_node)
1314 {
1315         struct cache_extent *cache;
1316         struct ptr_node *node, *ins;
1317         struct cache_tree *src, *dst;
1318         struct inode_record *rec, *conflict;
1319         u64 current_ino = 0;
1320         int splice = 0;
1321         int ret;
1322
1323         if (--src_node->refs == 0)
1324                 splice = 1;
1325         if (src_node->current)
1326                 current_ino = src_node->current->ino;
1327
1328         src = &src_node->root_cache;
1329         dst = &dst_node->root_cache;
1330 again:
1331         cache = search_cache_extent(src, 0);
1332         while (cache) {
1333                 node = container_of(cache, struct ptr_node, cache);
1334                 rec = node->data;
1335                 cache = next_cache_extent(cache);
1336
1337                 if (splice) {
1338                         remove_cache_extent(src, &node->cache);
1339                         ins = node;
1340                 } else {
1341                         ins = malloc(sizeof(*ins));
1342                         BUG_ON(!ins);
1343                         ins->cache.start = node->cache.start;
1344                         ins->cache.size = node->cache.size;
1345                         ins->data = rec;
1346                         rec->refs++;
1347                 }
1348                 ret = insert_cache_extent(dst, &ins->cache);
1349                 if (ret == -EEXIST) {
1350                         conflict = get_inode_rec(dst, rec->ino, 1);
1351                         BUG_ON(IS_ERR(conflict));
1352                         merge_inode_recs(rec, conflict, dst);
1353                         if (rec->checked) {
1354                                 conflict->checked = 1;
1355                                 if (dst_node->current == conflict)
1356                                         dst_node->current = NULL;
1357                         }
1358                         maybe_free_inode_rec(dst, conflict);
1359                         free_inode_rec(rec);
1360                         free(ins);
1361                 } else {
1362                         BUG_ON(ret);
1363                 }
1364         }
1365
1366         if (src == &src_node->root_cache) {
1367                 src = &src_node->inode_cache;
1368                 dst = &dst_node->inode_cache;
1369                 goto again;
1370         }
1371
1372         if (current_ino > 0 && (!dst_node->current ||
1373             current_ino > dst_node->current->ino)) {
1374                 if (dst_node->current) {
1375                         dst_node->current->checked = 1;
1376                         maybe_free_inode_rec(dst, dst_node->current);
1377                 }
1378                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1379                 BUG_ON(IS_ERR(dst_node->current));
1380         }
1381         return 0;
1382 }
1383
1384 static void free_inode_ptr(struct cache_extent *cache)
1385 {
1386         struct ptr_node *node;
1387         struct inode_record *rec;
1388
1389         node = container_of(cache, struct ptr_node, cache);
1390         rec = node->data;
1391         free_inode_rec(rec);
1392         free(node);
1393 }
1394
1395 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1396
1397 static struct shared_node *find_shared_node(struct cache_tree *shared,
1398                                             u64 bytenr)
1399 {
1400         struct cache_extent *cache;
1401         struct shared_node *node;
1402
1403         cache = lookup_cache_extent(shared, bytenr, 1);
1404         if (cache) {
1405                 node = container_of(cache, struct shared_node, cache);
1406                 return node;
1407         }
1408         return NULL;
1409 }
1410
1411 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1412 {
1413         int ret;
1414         struct shared_node *node;
1415
1416         node = calloc(1, sizeof(*node));
1417         if (!node)
1418                 return -ENOMEM;
1419         node->cache.start = bytenr;
1420         node->cache.size = 1;
1421         cache_tree_init(&node->root_cache);
1422         cache_tree_init(&node->inode_cache);
1423         node->refs = refs;
1424
1425         ret = insert_cache_extent(shared, &node->cache);
1426
1427         return ret;
1428 }
1429
1430 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1431                              struct walk_control *wc, int level)
1432 {
1433         struct shared_node *node;
1434         struct shared_node *dest;
1435         int ret;
1436
1437         if (level == wc->active_node)
1438                 return 0;
1439
1440         BUG_ON(wc->active_node <= level);
1441         node = find_shared_node(&wc->shared, bytenr);
1442         if (!node) {
1443                 ret = add_shared_node(&wc->shared, bytenr, refs);
1444                 BUG_ON(ret);
1445                 node = find_shared_node(&wc->shared, bytenr);
1446                 wc->nodes[level] = node;
1447                 wc->active_node = level;
1448                 return 0;
1449         }
1450
1451         if (wc->root_level == wc->active_node &&
1452             btrfs_root_refs(&root->root_item) == 0) {
1453                 if (--node->refs == 0) {
1454                         free_inode_recs_tree(&node->root_cache);
1455                         free_inode_recs_tree(&node->inode_cache);
1456                         remove_cache_extent(&wc->shared, &node->cache);
1457                         free(node);
1458                 }
1459                 return 1;
1460         }
1461
1462         dest = wc->nodes[wc->active_node];
1463         splice_shared_node(node, dest);
1464         if (node->refs == 0) {
1465                 remove_cache_extent(&wc->shared, &node->cache);
1466                 free(node);
1467         }
1468         return 1;
1469 }
1470
1471 static int leave_shared_node(struct btrfs_root *root,
1472                              struct walk_control *wc, int level)
1473 {
1474         struct shared_node *node;
1475         struct shared_node *dest;
1476         int i;
1477
1478         if (level == wc->root_level)
1479                 return 0;
1480
1481         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1482                 if (wc->nodes[i])
1483                         break;
1484         }
1485         BUG_ON(i >= BTRFS_MAX_LEVEL);
1486
1487         node = wc->nodes[wc->active_node];
1488         wc->nodes[wc->active_node] = NULL;
1489         wc->active_node = i;
1490
1491         dest = wc->nodes[wc->active_node];
1492         if (wc->active_node < wc->root_level ||
1493             btrfs_root_refs(&root->root_item) > 0) {
1494                 BUG_ON(node->refs <= 1);
1495                 splice_shared_node(node, dest);
1496         } else {
1497                 BUG_ON(node->refs < 2);
1498                 node->refs--;
1499         }
1500         return 0;
1501 }
1502
1503 /*
1504  * Returns:
1505  * < 0 - on error
1506  * 1   - if the root with id child_root_id is a child of root parent_root_id
1507  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1508  *       has other root(s) as parent(s)
1509  * 2   - if the root child_root_id doesn't have any parent roots
1510  */
1511 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1512                          u64 child_root_id)
1513 {
1514         struct btrfs_path path;
1515         struct btrfs_key key;
1516         struct extent_buffer *leaf;
1517         int has_parent = 0;
1518         int ret;
1519
1520         btrfs_init_path(&path);
1521
1522         key.objectid = parent_root_id;
1523         key.type = BTRFS_ROOT_REF_KEY;
1524         key.offset = child_root_id;
1525         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1526                                 0, 0);
1527         if (ret < 0)
1528                 return ret;
1529         btrfs_release_path(&path);
1530         if (!ret)
1531                 return 1;
1532
1533         key.objectid = child_root_id;
1534         key.type = BTRFS_ROOT_BACKREF_KEY;
1535         key.offset = 0;
1536         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1537                                 0, 0);
1538         if (ret < 0)
1539                 goto out;
1540
1541         while (1) {
1542                 leaf = path.nodes[0];
1543                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1544                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1545                         if (ret)
1546                                 break;
1547                         leaf = path.nodes[0];
1548                 }
1549
1550                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1551                 if (key.objectid != child_root_id ||
1552                     key.type != BTRFS_ROOT_BACKREF_KEY)
1553                         break;
1554
1555                 has_parent = 1;
1556
1557                 if (key.offset == parent_root_id) {
1558                         btrfs_release_path(&path);
1559                         return 1;
1560                 }
1561
1562                 path.slots[0]++;
1563         }
1564 out:
1565         btrfs_release_path(&path);
1566         if (ret < 0)
1567                 return ret;
1568         return has_parent ? 0 : 2;
1569 }
1570
1571 static int process_dir_item(struct extent_buffer *eb,
1572                             int slot, struct btrfs_key *key,
1573                             struct shared_node *active_node)
1574 {
1575         u32 total;
1576         u32 cur = 0;
1577         u32 len;
1578         u32 name_len;
1579         u32 data_len;
1580         int error;
1581         int nritems = 0;
1582         u8 filetype;
1583         struct btrfs_dir_item *di;
1584         struct inode_record *rec;
1585         struct cache_tree *root_cache;
1586         struct cache_tree *inode_cache;
1587         struct btrfs_key location;
1588         char namebuf[BTRFS_NAME_LEN];
1589
1590         root_cache = &active_node->root_cache;
1591         inode_cache = &active_node->inode_cache;
1592         rec = active_node->current;
1593         rec->found_dir_item = 1;
1594
1595         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1596         total = btrfs_item_size_nr(eb, slot);
1597         while (cur < total) {
1598                 nritems++;
1599                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1600                 name_len = btrfs_dir_name_len(eb, di);
1601                 data_len = btrfs_dir_data_len(eb, di);
1602                 filetype = btrfs_dir_type(eb, di);
1603
1604                 rec->found_size += name_len;
1605                 if (cur + sizeof(*di) + name_len > total ||
1606                     name_len > BTRFS_NAME_LEN) {
1607                         error = REF_ERR_NAME_TOO_LONG;
1608
1609                         if (cur + sizeof(*di) > total)
1610                                 break;
1611                         len = min_t(u32, total - cur - sizeof(*di),
1612                                     BTRFS_NAME_LEN);
1613                 } else {
1614                         len = name_len;
1615                         error = 0;
1616                 }
1617
1618                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1619
1620                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1621                     key->offset != btrfs_name_hash(namebuf, len)) {
1622                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1623                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1624                         key->objectid, key->offset, namebuf, len, filetype,
1625                         key->offset, btrfs_name_hash(namebuf, len));
1626                 }
1627
1628                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1629                         add_inode_backref(inode_cache, location.objectid,
1630                                           key->objectid, key->offset, namebuf,
1631                                           len, filetype, key->type, error);
1632                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1633                         add_inode_backref(root_cache, location.objectid,
1634                                           key->objectid, key->offset,
1635                                           namebuf, len, filetype,
1636                                           key->type, error);
1637                 } else {
1638                         fprintf(stderr,
1639                                 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1640                                 location.type, key->objectid, key->offset);
1641                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1642                                           key->objectid, key->offset, namebuf,
1643                                           len, filetype, key->type, error);
1644                 }
1645
1646                 len = sizeof(*di) + name_len + data_len;
1647                 di = (struct btrfs_dir_item *)((char *)di + len);
1648                 cur += len;
1649         }
1650         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1651                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1652
1653         return 0;
1654 }
1655
1656 static int process_inode_ref(struct extent_buffer *eb,
1657                              int slot, struct btrfs_key *key,
1658                              struct shared_node *active_node)
1659 {
1660         u32 total;
1661         u32 cur = 0;
1662         u32 len;
1663         u32 name_len;
1664         u64 index;
1665         int error;
1666         struct cache_tree *inode_cache;
1667         struct btrfs_inode_ref *ref;
1668         char namebuf[BTRFS_NAME_LEN];
1669
1670         inode_cache = &active_node->inode_cache;
1671
1672         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1673         total = btrfs_item_size_nr(eb, slot);
1674         while (cur < total) {
1675                 name_len = btrfs_inode_ref_name_len(eb, ref);
1676                 index = btrfs_inode_ref_index(eb, ref);
1677
1678                 /* inode_ref + namelen should not cross item boundary */
1679                 if (cur + sizeof(*ref) + name_len > total ||
1680                     name_len > BTRFS_NAME_LEN) {
1681                         if (total < cur + sizeof(*ref))
1682                                 break;
1683
1684                         /* Still try to read out the remaining part */
1685                         len = min_t(u32, total - cur - sizeof(*ref),
1686                                     BTRFS_NAME_LEN);
1687                         error = REF_ERR_NAME_TOO_LONG;
1688                 } else {
1689                         len = name_len;
1690                         error = 0;
1691                 }
1692
1693                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1694                 add_inode_backref(inode_cache, key->objectid, key->offset,
1695                                   index, namebuf, len, 0, key->type, error);
1696
1697                 len = sizeof(*ref) + name_len;
1698                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1699                 cur += len;
1700         }
1701         return 0;
1702 }
1703
1704 static int process_inode_extref(struct extent_buffer *eb,
1705                                 int slot, struct btrfs_key *key,
1706                                 struct shared_node *active_node)
1707 {
1708         u32 total;
1709         u32 cur = 0;
1710         u32 len;
1711         u32 name_len;
1712         u64 index;
1713         u64 parent;
1714         int error;
1715         struct cache_tree *inode_cache;
1716         struct btrfs_inode_extref *extref;
1717         char namebuf[BTRFS_NAME_LEN];
1718
1719         inode_cache = &active_node->inode_cache;
1720
1721         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1722         total = btrfs_item_size_nr(eb, slot);
1723         while (cur < total) {
1724                 name_len = btrfs_inode_extref_name_len(eb, extref);
1725                 index = btrfs_inode_extref_index(eb, extref);
1726                 parent = btrfs_inode_extref_parent(eb, extref);
1727                 if (name_len <= BTRFS_NAME_LEN) {
1728                         len = name_len;
1729                         error = 0;
1730                 } else {
1731                         len = BTRFS_NAME_LEN;
1732                         error = REF_ERR_NAME_TOO_LONG;
1733                 }
1734                 read_extent_buffer(eb, namebuf,
1735                                    (unsigned long)(extref + 1), len);
1736                 add_inode_backref(inode_cache, key->objectid, parent,
1737                                   index, namebuf, len, 0, key->type, error);
1738
1739                 len = sizeof(*extref) + name_len;
1740                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1741                 cur += len;
1742         }
1743         return 0;
1744
1745 }
1746
1747 static int count_csum_range(struct btrfs_root *root, u64 start,
1748                             u64 len, u64 *found)
1749 {
1750         struct btrfs_key key;
1751         struct btrfs_path path;
1752         struct extent_buffer *leaf;
1753         int ret;
1754         size_t size;
1755         *found = 0;
1756         u64 csum_end;
1757         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1758
1759         btrfs_init_path(&path);
1760
1761         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1762         key.offset = start;
1763         key.type = BTRFS_EXTENT_CSUM_KEY;
1764
1765         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1766                                 &key, &path, 0, 0);
1767         if (ret < 0)
1768                 goto out;
1769         if (ret > 0 && path.slots[0] > 0) {
1770                 leaf = path.nodes[0];
1771                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1772                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1773                     key.type == BTRFS_EXTENT_CSUM_KEY)
1774                         path.slots[0]--;
1775         }
1776
1777         while (len > 0) {
1778                 leaf = path.nodes[0];
1779                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1780                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1781                         if (ret > 0)
1782                                 break;
1783                         else if (ret < 0)
1784                                 goto out;
1785                         leaf = path.nodes[0];
1786                 }
1787
1788                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1789                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1790                     key.type != BTRFS_EXTENT_CSUM_KEY)
1791                         break;
1792
1793                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1794                 if (key.offset >= start + len)
1795                         break;
1796
1797                 if (key.offset > start)
1798                         start = key.offset;
1799
1800                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1801                 csum_end = key.offset + (size / csum_size) *
1802                            root->fs_info->sectorsize;
1803                 if (csum_end > start) {
1804                         size = min(csum_end - start, len);
1805                         len -= size;
1806                         start += size;
1807                         *found += size;
1808                 }
1809
1810                 path.slots[0]++;
1811         }
1812 out:
1813         btrfs_release_path(&path);
1814         if (ret < 0)
1815                 return ret;
1816         return 0;
1817 }
1818
1819 static int process_file_extent(struct btrfs_root *root,
1820                                 struct extent_buffer *eb,
1821                                 int slot, struct btrfs_key *key,
1822                                 struct shared_node *active_node)
1823 {
1824         struct inode_record *rec;
1825         struct btrfs_file_extent_item *fi;
1826         u64 num_bytes = 0;
1827         u64 disk_bytenr = 0;
1828         u64 extent_offset = 0;
1829         u64 mask = root->fs_info->sectorsize - 1;
1830         int extent_type;
1831         int ret;
1832
1833         rec = active_node->current;
1834         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1835         rec->found_file_extent = 1;
1836
1837         if (rec->extent_start == (u64)-1) {
1838                 rec->extent_start = key->offset;
1839                 rec->extent_end = key->offset;
1840         }
1841
1842         if (rec->extent_end > key->offset)
1843                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1844         else if (rec->extent_end < key->offset) {
1845                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1846                                            key->offset - rec->extent_end);
1847                 if (ret < 0)
1848                         return ret;
1849         }
1850
1851         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1852         extent_type = btrfs_file_extent_type(eb, fi);
1853
1854         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1855                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1856                 if (num_bytes == 0)
1857                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1858                 rec->found_size += num_bytes;
1859                 num_bytes = (num_bytes + mask) & ~mask;
1860         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1861                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1862                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1863                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1864                 extent_offset = btrfs_file_extent_offset(eb, fi);
1865                 if (num_bytes == 0 || (num_bytes & mask))
1866                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1867                 if (num_bytes + extent_offset >
1868                     btrfs_file_extent_ram_bytes(eb, fi))
1869                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1870                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1871                     (btrfs_file_extent_compression(eb, fi) ||
1872                      btrfs_file_extent_encryption(eb, fi) ||
1873                      btrfs_file_extent_other_encoding(eb, fi)))
1874                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1875                 if (disk_bytenr > 0)
1876                         rec->found_size += num_bytes;
1877         } else {
1878                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1879         }
1880         rec->extent_end = key->offset + num_bytes;
1881
1882         /*
1883          * The data reloc tree will copy full extents into its inode and then
1884          * copy the corresponding csums.  Because the extent it copied could be
1885          * a preallocated extent that hasn't been written to yet there may be no
1886          * csums to copy, ergo we won't have csums for our file extent.  This is
1887          * ok so just don't bother checking csums if the inode belongs to the
1888          * data reloc tree.
1889          */
1890         if (disk_bytenr > 0 &&
1891             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1892                 u64 found;
1893                 if (btrfs_file_extent_compression(eb, fi))
1894                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1895                 else
1896                         disk_bytenr += extent_offset;
1897
1898                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1899                 if (ret < 0)
1900                         return ret;
1901                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1902                         if (found > 0)
1903                                 rec->found_csum_item = 1;
1904                         if (found < num_bytes)
1905                                 rec->some_csum_missing = 1;
1906                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1907                         if (found > 0)
1908                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1909                 }
1910         }
1911         return 0;
1912 }
1913
1914 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1915                             struct walk_control *wc)
1916 {
1917         struct btrfs_key key;
1918         u32 nritems;
1919         int i;
1920         int ret = 0;
1921         struct cache_tree *inode_cache;
1922         struct shared_node *active_node;
1923
1924         if (wc->root_level == wc->active_node &&
1925             btrfs_root_refs(&root->root_item) == 0)
1926                 return 0;
1927
1928         active_node = wc->nodes[wc->active_node];
1929         inode_cache = &active_node->inode_cache;
1930         nritems = btrfs_header_nritems(eb);
1931         for (i = 0; i < nritems; i++) {
1932                 btrfs_item_key_to_cpu(eb, &key, i);
1933
1934                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1935                         continue;
1936                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1937                         continue;
1938
1939                 if (active_node->current == NULL ||
1940                     active_node->current->ino < key.objectid) {
1941                         if (active_node->current) {
1942                                 active_node->current->checked = 1;
1943                                 maybe_free_inode_rec(inode_cache,
1944                                                      active_node->current);
1945                         }
1946                         active_node->current = get_inode_rec(inode_cache,
1947                                                              key.objectid, 1);
1948                         BUG_ON(IS_ERR(active_node->current));
1949                 }
1950                 switch (key.type) {
1951                 case BTRFS_DIR_ITEM_KEY:
1952                 case BTRFS_DIR_INDEX_KEY:
1953                         ret = process_dir_item(eb, i, &key, active_node);
1954                         break;
1955                 case BTRFS_INODE_REF_KEY:
1956                         ret = process_inode_ref(eb, i, &key, active_node);
1957                         break;
1958                 case BTRFS_INODE_EXTREF_KEY:
1959                         ret = process_inode_extref(eb, i, &key, active_node);
1960                         break;
1961                 case BTRFS_INODE_ITEM_KEY:
1962                         ret = process_inode_item(eb, i, &key, active_node);
1963                         break;
1964                 case BTRFS_EXTENT_DATA_KEY:
1965                         ret = process_file_extent(root, eb, i, &key,
1966                                                   active_node);
1967                         break;
1968                 default:
1969                         break;
1970                 };
1971         }
1972         return ret;
1973 }
1974
1975 struct node_refs {
1976         u64 bytenr[BTRFS_MAX_LEVEL];
1977         u64 refs[BTRFS_MAX_LEVEL];
1978         int need_check[BTRFS_MAX_LEVEL];
1979         /* field for checking all trees */
1980         int checked[BTRFS_MAX_LEVEL];
1981         /* the corresponding extent should be marked as full backref or not */
1982         int full_backref[BTRFS_MAX_LEVEL];
1983 };
1984
1985 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1986                              struct extent_buffer *eb, struct node_refs *nrefs,
1987                              u64 level, int check_all);
1988 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1989                             unsigned int ext_ref);
1990
1991 /*
1992  * Returns >0  Found error, not fatal, should continue
1993  * Returns <0  Fatal error, must exit the whole check
1994  * Returns 0   No errors found
1995  */
1996 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1997                                struct node_refs *nrefs, int *level, int ext_ref)
1998 {
1999         struct extent_buffer *cur = path->nodes[0];
2000         struct btrfs_key key;
2001         u64 cur_bytenr;
2002         u32 nritems;
2003         u64 first_ino = 0;
2004         int root_level = btrfs_header_level(root->node);
2005         int i;
2006         int ret = 0; /* Final return value */
2007         int err = 0; /* Positive error bitmap */
2008
2009         cur_bytenr = cur->start;
2010
2011         /* skip to first inode item or the first inode number change */
2012         nritems = btrfs_header_nritems(cur);
2013         for (i = 0; i < nritems; i++) {
2014                 btrfs_item_key_to_cpu(cur, &key, i);
2015                 if (i == 0)
2016                         first_ino = key.objectid;
2017                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2018                     (first_ino && first_ino != key.objectid))
2019                         break;
2020         }
2021         if (i == nritems) {
2022                 path->slots[0] = nritems;
2023                 return 0;
2024         }
2025         path->slots[0] = i;
2026
2027 again:
2028         err |= check_inode_item(root, path, ext_ref);
2029
2030         /* modify cur since check_inode_item may change path */
2031         cur = path->nodes[0];
2032
2033         if (err & LAST_ITEM)
2034                 goto out;
2035
2036         /* still have inode items in thie leaf */
2037         if (cur->start == cur_bytenr)
2038                 goto again;
2039
2040         /*
2041          * we have switched to another leaf, above nodes may
2042          * have changed, here walk down the path, if a node
2043          * or leaf is shared, check whether we can skip this
2044          * node or leaf.
2045          */
2046         for (i = root_level; i >= 0; i--) {
2047                 if (path->nodes[i]->start == nrefs->bytenr[i])
2048                         continue;
2049
2050                 ret = update_nodes_refs(root, path->nodes[i]->start,
2051                                 path->nodes[i], nrefs, i, 0);
2052                 if (ret)
2053                         goto out;
2054
2055                 if (!nrefs->need_check[i]) {
2056                         *level += 1;
2057                         break;
2058                 }
2059         }
2060
2061         for (i = 0; i < *level; i++) {
2062                 free_extent_buffer(path->nodes[i]);
2063                 path->nodes[i] = NULL;
2064         }
2065 out:
2066         err &= ~LAST_ITEM;
2067         if (err && !ret)
2068                 ret = err;
2069         return ret;
2070 }
2071
2072 static void reada_walk_down(struct btrfs_root *root,
2073                             struct extent_buffer *node, int slot)
2074 {
2075         struct btrfs_fs_info *fs_info = root->fs_info;
2076         u64 bytenr;
2077         u64 ptr_gen;
2078         u32 nritems;
2079         int i;
2080         int level;
2081
2082         level = btrfs_header_level(node);
2083         if (level != 1)
2084                 return;
2085
2086         nritems = btrfs_header_nritems(node);
2087         for (i = slot; i < nritems; i++) {
2088                 bytenr = btrfs_node_blockptr(node, i);
2089                 ptr_gen = btrfs_node_ptr_generation(node, i);
2090                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2091         }
2092 }
2093
2094 /*
2095  * Check the child node/leaf by the following condition:
2096  * 1. the first item key of the node/leaf should be the same with the one
2097  *    in parent.
2098  * 2. block in parent node should match the child node/leaf.
2099  * 3. generation of parent node and child's header should be consistent.
2100  *
2101  * Or the child node/leaf pointed by the key in parent is not valid.
2102  *
2103  * We hope to check leaf owner too, but since subvol may share leaves,
2104  * which makes leaf owner check not so strong, key check should be
2105  * sufficient enough for that case.
2106  */
2107 static int check_child_node(struct extent_buffer *parent, int slot,
2108                             struct extent_buffer *child)
2109 {
2110         struct btrfs_key parent_key;
2111         struct btrfs_key child_key;
2112         int ret = 0;
2113
2114         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2115         if (btrfs_header_level(child) == 0)
2116                 btrfs_item_key_to_cpu(child, &child_key, 0);
2117         else
2118                 btrfs_node_key_to_cpu(child, &child_key, 0);
2119
2120         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2121                 ret = -EINVAL;
2122                 fprintf(stderr,
2123                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2124                         parent_key.objectid, parent_key.type, parent_key.offset,
2125                         child_key.objectid, child_key.type, child_key.offset);
2126         }
2127         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2128                 ret = -EINVAL;
2129                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2130                         btrfs_node_blockptr(parent, slot),
2131                         btrfs_header_bytenr(child));
2132         }
2133         if (btrfs_node_ptr_generation(parent, slot) !=
2134             btrfs_header_generation(child)) {
2135                 ret = -EINVAL;
2136                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2137                         btrfs_header_generation(child),
2138                         btrfs_node_ptr_generation(parent, slot));
2139         }
2140         return ret;
2141 }
2142
2143 /*
2144  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2145  * in every fs or file tree check. Here we find its all root ids, and only check
2146  * it in the fs or file tree which has the smallest root id.
2147  */
2148 static int need_check(struct btrfs_root *root, struct ulist *roots)
2149 {
2150         struct rb_node *node;
2151         struct ulist_node *u;
2152
2153         /*
2154          * @roots can be empty if it belongs to tree reloc tree
2155          * In that case, we should always check the leaf, as we can't use
2156          * the tree owner to ensure some other root will check it.
2157          */
2158         if (roots->nnodes == 1 || roots->nnodes == 0)
2159                 return 1;
2160
2161         node = rb_first(&roots->root);
2162         u = rb_entry(node, struct ulist_node, rb_node);
2163         /*
2164          * current root id is not smallest, we skip it and let it be checked
2165          * in the fs or file tree who hash the smallest root id.
2166          */
2167         if (root->objectid != u->val)
2168                 return 0;
2169
2170         return 1;
2171 }
2172
2173 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
2174                                u64 *flags_ret)
2175 {
2176         struct btrfs_root *extent_root = root->fs_info->extent_root;
2177         struct btrfs_root_item *ri = &root->root_item;
2178         struct btrfs_extent_inline_ref *iref;
2179         struct btrfs_extent_item *ei;
2180         struct btrfs_key key;
2181         struct btrfs_path *path = NULL;
2182         unsigned long ptr;
2183         unsigned long end;
2184         u64 flags;
2185         u64 owner = 0;
2186         u64 offset;
2187         int slot;
2188         int type;
2189         int ret = 0;
2190
2191         /*
2192          * Except file/reloc tree, we can not have FULL BACKREF MODE
2193          */
2194         if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
2195                 goto normal;
2196
2197         /* root node */
2198         if (eb->start == btrfs_root_bytenr(ri))
2199                 goto normal;
2200
2201         if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
2202                 goto full_backref;
2203
2204         owner = btrfs_header_owner(eb);
2205         if (owner == root->objectid)
2206                 goto normal;
2207
2208         path = btrfs_alloc_path();
2209         if (!path)
2210                 return -ENOMEM;
2211
2212         key.objectid = btrfs_header_bytenr(eb);
2213         key.type = (u8)-1;
2214         key.offset = (u64)-1;
2215
2216         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2217         if (ret <= 0) {
2218                 ret = -EIO;
2219                 goto out;
2220         }
2221
2222         if (ret > 0) {
2223                 ret = btrfs_previous_extent_item(extent_root, path,
2224                                                  key.objectid);
2225                 if (ret)
2226                         goto full_backref;
2227
2228         }
2229         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2230
2231         eb = path->nodes[0];
2232         slot = path->slots[0];
2233         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
2234
2235         flags = btrfs_extent_flags(eb, ei);
2236         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2237                 goto full_backref;
2238
2239         ptr = (unsigned long)(ei + 1);
2240         end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
2241
2242         if (key.type == BTRFS_EXTENT_ITEM_KEY)
2243                 ptr += sizeof(struct btrfs_tree_block_info);
2244
2245 next:
2246         /* Reached extent item ends normally */
2247         if (ptr == end)
2248                 goto full_backref;
2249
2250         /* Beyond extent item end, wrong item size */
2251         if (ptr > end) {
2252                 error("extent item at bytenr %llu slot %d has wrong size",
2253                         eb->start, slot);
2254                 goto full_backref;
2255         }
2256
2257         iref = (struct btrfs_extent_inline_ref *)ptr;
2258         offset = btrfs_extent_inline_ref_offset(eb, iref);
2259         type = btrfs_extent_inline_ref_type(eb, iref);
2260
2261         if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
2262                 goto normal;
2263         ptr += btrfs_extent_inline_ref_size(type);
2264         goto next;
2265
2266 normal:
2267         *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
2268         goto out;
2269
2270 full_backref:
2271         *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2272 out:
2273         btrfs_free_path(path);
2274         return ret;
2275 }
2276
2277 /*
2278  * for a tree node or leaf, we record its reference count, so later if we still
2279  * process this node or leaf, don't need to compute its reference count again.
2280  *
2281  * @bytenr  if @bytenr == (u64)-1, only update nrefs->full_backref[level]
2282  */
2283 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2284                              struct extent_buffer *eb, struct node_refs *nrefs,
2285                              u64 level, int check_all)
2286 {
2287         struct ulist *roots;
2288         u64 refs = 0;
2289         u64 flags = 0;
2290         int root_level = btrfs_header_level(root->node);
2291         int check;
2292         int ret;
2293
2294         if (nrefs->bytenr[level] == bytenr)
2295                 return 0;
2296
2297         if (bytenr != (u64)-1) {
2298                 /* the return value of this function seems a mistake */
2299                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2300                                        level, 1, &refs, &flags);
2301                 /* temporary fix */
2302                 if (ret < 0 && !check_all)
2303                         return ret;
2304
2305                 nrefs->bytenr[level] = bytenr;
2306                 nrefs->refs[level] = refs;
2307                 nrefs->full_backref[level] = 0;
2308                 nrefs->checked[level] = 0;
2309
2310                 if (refs > 1) {
2311                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2312                                                    0, &roots);
2313                         if (ret)
2314                                 return -EIO;
2315
2316                         check = need_check(root, roots);
2317                         ulist_free(roots);
2318                         nrefs->need_check[level] = check;
2319                 } else {
2320                         if (!check_all) {
2321                                 nrefs->need_check[level] = 1;
2322                         } else {
2323                                 if (level == root_level) {
2324                                         nrefs->need_check[level] = 1;
2325                                 } else {
2326                                         /*
2327                                          * The node refs may have not been
2328                                          * updated if upper needs checking (the
2329                                          * lowest root_objectid) the node can
2330                                          * be checked.
2331                                          */
2332                                         nrefs->need_check[level] =
2333                                                 nrefs->need_check[level + 1];
2334                                 }
2335                         }
2336                 }
2337         }
2338
2339         if (check_all && eb) {
2340                 calc_extent_flag_v2(root, eb, &flags);
2341                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2342                         nrefs->full_backref[level] = 1;
2343         }
2344
2345         return 0;
2346 }
2347
2348 /*
2349  * @level           if @level == -1 means extent data item
2350  *                  else normal treeblocl.
2351  */
2352 static int should_check_extent_strictly(struct btrfs_root *root,
2353                                         struct node_refs *nrefs, int level)
2354 {
2355         int root_level = btrfs_header_level(root->node);
2356
2357         if (level > root_level || level < -1)
2358                 return 1;
2359         if (level == root_level)
2360                 return 1;
2361         /*
2362          * if the upper node is marked full backref, it should contain shared
2363          * backref of the parent (except owner == root->objectid).
2364          */
2365         while (++level <= root_level)
2366                 if (nrefs->refs[level] > 1)
2367                         return 0;
2368
2369         return 1;
2370 }
2371
2372 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2373                           struct walk_control *wc, int *level,
2374                           struct node_refs *nrefs)
2375 {
2376         enum btrfs_tree_block_status status;
2377         u64 bytenr;
2378         u64 ptr_gen;
2379         struct btrfs_fs_info *fs_info = root->fs_info;
2380         struct extent_buffer *next;
2381         struct extent_buffer *cur;
2382         int ret, err = 0;
2383         u64 refs;
2384
2385         WARN_ON(*level < 0);
2386         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2387
2388         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2389                 refs = nrefs->refs[*level];
2390                 ret = 0;
2391         } else {
2392                 ret = btrfs_lookup_extent_info(NULL, root,
2393                                        path->nodes[*level]->start,
2394                                        *level, 1, &refs, NULL);
2395                 if (ret < 0) {
2396                         err = ret;
2397                         goto out;
2398                 }
2399                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2400                 nrefs->refs[*level] = refs;
2401         }
2402
2403         if (refs > 1) {
2404                 ret = enter_shared_node(root, path->nodes[*level]->start,
2405                                         refs, wc, *level);
2406                 if (ret > 0) {
2407                         err = ret;
2408                         goto out;
2409                 }
2410         }
2411
2412         while (*level >= 0) {
2413                 WARN_ON(*level < 0);
2414                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2415                 cur = path->nodes[*level];
2416
2417                 if (btrfs_header_level(cur) != *level)
2418                         WARN_ON(1);
2419
2420                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2421                         break;
2422                 if (*level == 0) {
2423                         ret = process_one_leaf(root, cur, wc);
2424                         if (ret < 0)
2425                                 err = ret;
2426                         break;
2427                 }
2428                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2429                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2430
2431                 if (bytenr == nrefs->bytenr[*level - 1]) {
2432                         refs = nrefs->refs[*level - 1];
2433                 } else {
2434                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2435                                         *level - 1, 1, &refs, NULL);
2436                         if (ret < 0) {
2437                                 refs = 0;
2438                         } else {
2439                                 nrefs->bytenr[*level - 1] = bytenr;
2440                                 nrefs->refs[*level - 1] = refs;
2441                         }
2442                 }
2443
2444                 if (refs > 1) {
2445                         ret = enter_shared_node(root, bytenr, refs,
2446                                                 wc, *level - 1);
2447                         if (ret > 0) {
2448                                 path->slots[*level]++;
2449                                 continue;
2450                         }
2451                 }
2452
2453                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2454                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2455                         free_extent_buffer(next);
2456                         reada_walk_down(root, cur, path->slots[*level]);
2457                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2458                         if (!extent_buffer_uptodate(next)) {
2459                                 struct btrfs_key node_key;
2460
2461                                 btrfs_node_key_to_cpu(path->nodes[*level],
2462                                                       &node_key,
2463                                                       path->slots[*level]);
2464                                 btrfs_add_corrupt_extent_record(root->fs_info,
2465                                                 &node_key,
2466                                                 path->nodes[*level]->start,
2467                                                 root->fs_info->nodesize,
2468                                                 *level);
2469                                 err = -EIO;
2470                                 goto out;
2471                         }
2472                 }
2473
2474                 ret = check_child_node(cur, path->slots[*level], next);
2475                 if (ret) {
2476                         free_extent_buffer(next);
2477                         err = ret;
2478                         goto out;
2479                 }
2480
2481                 if (btrfs_is_leaf(next))
2482                         status = btrfs_check_leaf(root, NULL, next);
2483                 else
2484                         status = btrfs_check_node(root, NULL, next);
2485                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2486                         free_extent_buffer(next);
2487                         err = -EIO;
2488                         goto out;
2489                 }
2490
2491                 *level = *level - 1;
2492                 free_extent_buffer(path->nodes[*level]);
2493                 path->nodes[*level] = next;
2494                 path->slots[*level] = 0;
2495         }
2496 out:
2497         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2498         return err;
2499 }
2500
2501 static int fs_root_objectid(u64 objectid);
2502
2503 /*
2504  * Update global fs information.
2505  */
2506 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2507                          int level)
2508 {
2509         u32 free_nrs;
2510         struct extent_buffer *eb = path->nodes[level];
2511
2512         total_btree_bytes += eb->len;
2513         if (fs_root_objectid(root->objectid))
2514                 total_fs_tree_bytes += eb->len;
2515         if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2516                 total_extent_tree_bytes += eb->len;
2517
2518         if (level == 0) {
2519                 btree_space_waste += btrfs_leaf_free_space(root, eb);
2520         } else {
2521                 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root->fs_info) -
2522                             btrfs_header_nritems(eb));
2523                 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2524         }
2525 }
2526
2527 /*
2528  * This function only handles BACKREF_MISSING,
2529  * If corresponding extent item exists, increase the ref, else insert an extent
2530  * item and backref.
2531  *
2532  * Returns error bits after repair.
2533  */
2534 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2535                                  struct btrfs_root *root,
2536                                  struct extent_buffer *node,
2537                                  struct node_refs *nrefs, int level, int err)
2538 {
2539         struct btrfs_fs_info *fs_info = root->fs_info;
2540         struct btrfs_root *extent_root = fs_info->extent_root;
2541         struct btrfs_path path;
2542         struct btrfs_extent_item *ei;
2543         struct btrfs_tree_block_info *bi;
2544         struct btrfs_key key;
2545         struct extent_buffer *eb;
2546         u32 size = sizeof(*ei);
2547         u32 node_size = root->fs_info->nodesize;
2548         int insert_extent = 0;
2549         int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2550         int root_level = btrfs_header_level(root->node);
2551         int generation;
2552         int ret;
2553         u64 owner;
2554         u64 bytenr;
2555         u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2556         u64 parent = 0;
2557
2558         if ((err & BACKREF_MISSING) == 0)
2559                 return err;
2560
2561         WARN_ON(level > BTRFS_MAX_LEVEL);
2562         WARN_ON(level < 0);
2563
2564         btrfs_init_path(&path);
2565         bytenr = btrfs_header_bytenr(node);
2566         owner = btrfs_header_owner(node);
2567         generation = btrfs_header_generation(node);
2568
2569         key.objectid = bytenr;
2570         key.type = (u8)-1;
2571         key.offset = (u64)-1;
2572
2573         /* Search for the extent item */
2574         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2575         if (ret <= 0) {
2576                 ret = -EIO;
2577                 goto out;
2578         }
2579
2580         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2581         if (ret)
2582                 insert_extent = 1;
2583
2584         /* calculate if the extent item flag is full backref or not */
2585         if (nrefs->full_backref[level] != 0)
2586                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2587
2588         /* insert an extent item */
2589         if (insert_extent) {
2590                 struct btrfs_disk_key copy_key;
2591
2592                 generation = btrfs_header_generation(node);
2593
2594                 if (level < root_level && nrefs->full_backref[level + 1] &&
2595                     owner != root->objectid) {
2596                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2597                 }
2598
2599                 key.objectid = bytenr;
2600                 if (!skinny_metadata) {
2601                         key.type = BTRFS_EXTENT_ITEM_KEY;
2602                         key.offset = node_size;
2603                         size += sizeof(*bi);
2604                 } else {
2605                         key.type = BTRFS_METADATA_ITEM_KEY;
2606                         key.offset = level;
2607                 }
2608
2609                 btrfs_release_path(&path);
2610                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2611                                               size);
2612                 if (ret)
2613                         goto out;
2614
2615                 eb = path.nodes[0];
2616                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2617
2618                 btrfs_set_extent_refs(eb, ei, 0);
2619                 btrfs_set_extent_generation(eb, ei, generation);
2620                 btrfs_set_extent_flags(eb, ei, flags);
2621
2622                 if (!skinny_metadata) {
2623                         bi = (struct btrfs_tree_block_info *)(ei + 1);
2624                         memset_extent_buffer(eb, 0, (unsigned long)bi,
2625                                              sizeof(*bi));
2626                         btrfs_set_disk_key_objectid(&copy_key, root->objectid);
2627                         btrfs_set_disk_key_type(&copy_key, 0);
2628                         btrfs_set_disk_key_offset(&copy_key, 0);
2629
2630                         btrfs_set_tree_block_level(eb, bi, level);
2631                         btrfs_set_tree_block_key(eb, bi, &copy_key);
2632                 }
2633                 btrfs_mark_buffer_dirty(eb);
2634                 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2635                 btrfs_update_block_group(extent_root, bytenr, node_size, 1, 0);
2636
2637                 nrefs->refs[level] = 0;
2638                 nrefs->full_backref[level] =
2639                         flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2640                 btrfs_release_path(&path);
2641         }
2642
2643         if (level < root_level && nrefs->full_backref[level + 1] &&
2644             owner != root->objectid)
2645                 parent = nrefs->bytenr[level + 1];
2646
2647         /* increase the ref */
2648         ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2649                         parent, root->objectid, level, 0);
2650
2651         nrefs->refs[level]++;
2652 out:
2653         btrfs_release_path(&path);
2654         if (ret) {
2655                 error(
2656         "failed to repair tree block ref start %llu root %llu due to %s",
2657                       bytenr, root->objectid, strerror(-ret));
2658         } else {
2659                 printf("Added one tree block ref start %llu %s %llu\n",
2660                        bytenr, parent ? "parent" : "root",
2661                        parent ? parent : root->objectid);
2662                 err &= ~BACKREF_MISSING;
2663         }
2664
2665         return err;
2666 }
2667
2668 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2669                             unsigned int ext_ref);
2670 static int check_tree_block_ref(struct btrfs_root *root,
2671                                 struct extent_buffer *eb, u64 bytenr,
2672                                 int level, u64 owner, struct node_refs *nrefs);
2673 static int check_leaf_items(struct btrfs_trans_handle *trans,
2674                             struct btrfs_root *root, struct btrfs_path *path,
2675                             struct node_refs *nrefs, int account_bytes);
2676
2677 /*
2678  * @trans      just for lowmem repair mode
2679  * @check all  if not 0 then check all tree block backrefs and items
2680  *             0 then just check relationship of items in fs tree(s)
2681  *
2682  * Returns >0  Found error, should continue
2683  * Returns <0  Fatal error, must exit the whole check
2684  * Returns 0   No errors found
2685  */
2686 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2687                              struct btrfs_root *root, struct btrfs_path *path,
2688                              int *level, struct node_refs *nrefs, int ext_ref,
2689                              int check_all)
2690
2691 {
2692         enum btrfs_tree_block_status status;
2693         u64 bytenr;
2694         u64 ptr_gen;
2695         struct btrfs_fs_info *fs_info = root->fs_info;
2696         struct extent_buffer *next;
2697         struct extent_buffer *cur;
2698         int ret;
2699         int err = 0;
2700         int check;
2701         int account_file_data = 0;
2702
2703         WARN_ON(*level < 0);
2704         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2705
2706         ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2707                                 path->nodes[*level], nrefs, *level, check_all);
2708         if (ret < 0)
2709                 return ret;
2710
2711         while (*level >= 0) {
2712                 WARN_ON(*level < 0);
2713                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2714                 cur = path->nodes[*level];
2715                 bytenr = btrfs_header_bytenr(cur);
2716                 check = nrefs->need_check[*level];
2717
2718                 if (btrfs_header_level(cur) != *level)
2719                         WARN_ON(1);
2720                /*
2721                 * Update bytes accounting and check tree block ref
2722                 * NOTE: Doing accounting and check before checking nritems
2723                 * is necessary because of empty node/leaf.
2724                 */
2725                 if ((check_all && !nrefs->checked[*level]) ||
2726                     (!check_all && nrefs->need_check[*level])) {
2727                         ret = check_tree_block_ref(root, cur,
2728                            btrfs_header_bytenr(cur), btrfs_header_level(cur),
2729                            btrfs_header_owner(cur), nrefs);
2730
2731                         if (repair && ret)
2732                                 ret = repair_tree_block_ref(trans, root,
2733                                     path->nodes[*level], nrefs, *level, ret);
2734                         err |= ret;
2735
2736                         if (check_all && nrefs->need_check[*level] &&
2737                                 nrefs->refs[*level]) {
2738                                 account_bytes(root, path, *level);
2739                                 account_file_data = 1;
2740                         }
2741                         nrefs->checked[*level] = 1;
2742                 }
2743
2744                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2745                         break;
2746
2747                 /* Don't forgot to check leaf/node validation */
2748                 if (*level == 0) {
2749                         /* skip duplicate check */
2750                         if (check || !check_all) {
2751                                 ret = btrfs_check_leaf(root, NULL, cur);
2752                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2753                                         err |= -EIO;
2754                                         break;
2755                                 }
2756                         }
2757
2758                         ret = 0;
2759                         if (!check_all)
2760                                 ret = process_one_leaf_v2(root, path, nrefs,
2761                                                           level, ext_ref);
2762                         else
2763                                 ret = check_leaf_items(trans, root, path,
2764                                                nrefs, account_file_data);
2765                         err |= ret;
2766                         break;
2767                 } else {
2768                         if (check || !check_all) {
2769                                 ret = btrfs_check_node(root, NULL, cur);
2770                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2771                                         err |= -EIO;
2772                                         break;
2773                                 }
2774                         }
2775                 }
2776
2777                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2778                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2779
2780                 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2781                                         check_all);
2782                 if (ret < 0)
2783                         break;
2784                 /*
2785                  * check all trees in check_chunks_and_extent_v2
2786                  * check shared node once in check_fs_roots
2787                  */
2788                 if (!check_all && !nrefs->need_check[*level - 1]) {
2789                         path->slots[*level]++;
2790                         continue;
2791                 }
2792
2793                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2794                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2795                         free_extent_buffer(next);
2796                         reada_walk_down(root, cur, path->slots[*level]);
2797                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2798                         if (!extent_buffer_uptodate(next)) {
2799                                 struct btrfs_key node_key;
2800
2801                                 btrfs_node_key_to_cpu(path->nodes[*level],
2802                                                       &node_key,
2803                                                       path->slots[*level]);
2804                                 btrfs_add_corrupt_extent_record(fs_info,
2805                                         &node_key, path->nodes[*level]->start,
2806                                         fs_info->nodesize, *level);
2807                                 err |= -EIO;
2808                                 break;
2809                         }
2810                 }
2811
2812                 ret = check_child_node(cur, path->slots[*level], next);
2813                 err |= ret;
2814                 if (ret < 0) 
2815                         break;
2816
2817                 if (btrfs_is_leaf(next))
2818                         status = btrfs_check_leaf(root, NULL, next);
2819                 else
2820                         status = btrfs_check_node(root, NULL, next);
2821                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2822                         free_extent_buffer(next);
2823                         err |= -EIO;
2824                         break;
2825                 }
2826
2827                 *level = *level - 1;
2828                 free_extent_buffer(path->nodes[*level]);
2829                 path->nodes[*level] = next;
2830                 path->slots[*level] = 0;
2831                 account_file_data = 0;
2832
2833                 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2834         }
2835         return err;
2836 }
2837
2838 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2839                         struct walk_control *wc, int *level)
2840 {
2841         int i;
2842         struct extent_buffer *leaf;
2843
2844         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2845                 leaf = path->nodes[i];
2846                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2847                         path->slots[i]++;
2848                         *level = i;
2849                         return 0;
2850                 } else {
2851                         free_extent_buffer(path->nodes[*level]);
2852                         path->nodes[*level] = NULL;
2853                         BUG_ON(*level > wc->active_node);
2854                         if (*level == wc->active_node)
2855                                 leave_shared_node(root, wc, *level);
2856                         *level = i + 1;
2857                 }
2858         }
2859         return 1;
2860 }
2861
2862 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2863                            int *level)
2864 {
2865         int i;
2866         struct extent_buffer *leaf;
2867
2868         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2869                 leaf = path->nodes[i];
2870                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2871                         path->slots[i]++;
2872                         *level = i;
2873                         return 0;
2874                 } else {
2875                         free_extent_buffer(path->nodes[*level]);
2876                         path->nodes[*level] = NULL;
2877                         *level = i + 1;
2878                 }
2879         }
2880         return 1;
2881 }
2882
2883 static int check_root_dir(struct inode_record *rec)
2884 {
2885         struct inode_backref *backref;
2886         int ret = -1;
2887
2888         if (!rec->found_inode_item || rec->errors)
2889                 goto out;
2890         if (rec->nlink != 1 || rec->found_link != 0)
2891                 goto out;
2892         if (list_empty(&rec->backrefs))
2893                 goto out;
2894         backref = to_inode_backref(rec->backrefs.next);
2895         if (!backref->found_inode_ref)
2896                 goto out;
2897         if (backref->index != 0 || backref->namelen != 2 ||
2898             memcmp(backref->name, "..", 2))
2899                 goto out;
2900         if (backref->found_dir_index || backref->found_dir_item)
2901                 goto out;
2902         ret = 0;
2903 out:
2904         return ret;
2905 }
2906
2907 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2908                               struct btrfs_root *root, struct btrfs_path *path,
2909                               struct inode_record *rec)
2910 {
2911         struct btrfs_inode_item *ei;
2912         struct btrfs_key key;
2913         int ret;
2914
2915         key.objectid = rec->ino;
2916         key.type = BTRFS_INODE_ITEM_KEY;
2917         key.offset = (u64)-1;
2918
2919         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2920         if (ret < 0)
2921                 goto out;
2922         if (ret) {
2923                 if (!path->slots[0]) {
2924                         ret = -ENOENT;
2925                         goto out;
2926                 }
2927                 path->slots[0]--;
2928                 ret = 0;
2929         }
2930         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2931         if (key.objectid != rec->ino) {
2932                 ret = -ENOENT;
2933                 goto out;
2934         }
2935
2936         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2937                             struct btrfs_inode_item);
2938         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2939         btrfs_mark_buffer_dirty(path->nodes[0]);
2940         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2941         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2942                root->root_key.objectid);
2943 out:
2944         btrfs_release_path(path);
2945         return ret;
2946 }
2947
2948 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2949                                     struct btrfs_root *root,
2950                                     struct btrfs_path *path,
2951                                     struct inode_record *rec)
2952 {
2953         int ret;
2954
2955         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2956         btrfs_release_path(path);
2957         if (!ret)
2958                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2959         return ret;
2960 }
2961
2962 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2963                                struct btrfs_root *root,
2964                                struct btrfs_path *path,
2965                                struct inode_record *rec)
2966 {
2967         struct btrfs_inode_item *ei;
2968         struct btrfs_key key;
2969         int ret = 0;
2970
2971         key.objectid = rec->ino;
2972         key.type = BTRFS_INODE_ITEM_KEY;
2973         key.offset = 0;
2974
2975         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2976         if (ret) {
2977                 if (ret > 0)
2978                         ret = -ENOENT;
2979                 goto out;
2980         }
2981
2982         /* Since ret == 0, no need to check anything */
2983         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2984                             struct btrfs_inode_item);
2985         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2986         btrfs_mark_buffer_dirty(path->nodes[0]);
2987         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2988         printf("reset nbytes for ino %llu root %llu\n",
2989                rec->ino, root->root_key.objectid);
2990 out:
2991         btrfs_release_path(path);
2992         return ret;
2993 }
2994
2995 static int add_missing_dir_index(struct btrfs_root *root,
2996                                  struct cache_tree *inode_cache,
2997                                  struct inode_record *rec,
2998                                  struct inode_backref *backref)
2999 {
3000         struct btrfs_path path;
3001         struct btrfs_trans_handle *trans;
3002         struct btrfs_dir_item *dir_item;
3003         struct extent_buffer *leaf;
3004         struct btrfs_key key;
3005         struct btrfs_disk_key disk_key;
3006         struct inode_record *dir_rec;
3007         unsigned long name_ptr;
3008         u32 data_size = sizeof(*dir_item) + backref->namelen;
3009         int ret;
3010
3011         trans = btrfs_start_transaction(root, 1);
3012         if (IS_ERR(trans))
3013                 return PTR_ERR(trans);
3014
3015         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
3016                 (unsigned long long)rec->ino);
3017
3018         btrfs_init_path(&path);
3019         key.objectid = backref->dir;
3020         key.type = BTRFS_DIR_INDEX_KEY;
3021         key.offset = backref->index;
3022         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
3023         BUG_ON(ret);
3024
3025         leaf = path.nodes[0];
3026         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
3027
3028         disk_key.objectid = cpu_to_le64(rec->ino);
3029         disk_key.type = BTRFS_INODE_ITEM_KEY;
3030         disk_key.offset = 0;
3031
3032         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
3033         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
3034         btrfs_set_dir_data_len(leaf, dir_item, 0);
3035         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
3036         name_ptr = (unsigned long)(dir_item + 1);
3037         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
3038         btrfs_mark_buffer_dirty(leaf);
3039         btrfs_release_path(&path);
3040         btrfs_commit_transaction(trans, root);
3041
3042         backref->found_dir_index = 1;
3043         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
3044         BUG_ON(IS_ERR(dir_rec));
3045         if (!dir_rec)
3046                 return 0;
3047         dir_rec->found_size += backref->namelen;
3048         if (dir_rec->found_size == dir_rec->isize &&
3049             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
3050                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
3051         if (dir_rec->found_size != dir_rec->isize)
3052                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
3053
3054         return 0;
3055 }
3056
3057 static int delete_dir_index(struct btrfs_root *root,
3058                             struct inode_backref *backref)
3059 {
3060         struct btrfs_trans_handle *trans;
3061         struct btrfs_dir_item *di;
3062         struct btrfs_path path;
3063         int ret = 0;
3064
3065         trans = btrfs_start_transaction(root, 1);
3066         if (IS_ERR(trans))
3067                 return PTR_ERR(trans);
3068
3069         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
3070                 (unsigned long long)backref->dir,
3071                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
3072                 (unsigned long long)root->objectid);
3073
3074         btrfs_init_path(&path);
3075         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
3076                                     backref->name, backref->namelen,
3077                                     backref->index, -1);
3078         if (IS_ERR(di)) {
3079                 ret = PTR_ERR(di);
3080                 btrfs_release_path(&path);
3081                 btrfs_commit_transaction(trans, root);
3082                 if (ret == -ENOENT)
3083                         return 0;
3084                 return ret;
3085         }
3086
3087         if (!di)
3088                 ret = btrfs_del_item(trans, root, &path);
3089         else
3090                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
3091         BUG_ON(ret);
3092         btrfs_release_path(&path);
3093         btrfs_commit_transaction(trans, root);
3094         return ret;
3095 }
3096
3097 static int __create_inode_item(struct btrfs_trans_handle *trans,
3098                                struct btrfs_root *root, u64 ino, u64 size,
3099                                u64 nbytes, u64 nlink, u32 mode)
3100 {
3101         struct btrfs_inode_item ii;
3102         time_t now = time(NULL);
3103         int ret;
3104
3105         btrfs_set_stack_inode_size(&ii, size);
3106         btrfs_set_stack_inode_nbytes(&ii, nbytes);
3107         btrfs_set_stack_inode_nlink(&ii, nlink);
3108         btrfs_set_stack_inode_mode(&ii, mode);
3109         btrfs_set_stack_inode_generation(&ii, trans->transid);
3110         btrfs_set_stack_timespec_nsec(&ii.atime, 0);
3111         btrfs_set_stack_timespec_sec(&ii.ctime, now);
3112         btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
3113         btrfs_set_stack_timespec_sec(&ii.mtime, now);
3114         btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
3115         btrfs_set_stack_timespec_sec(&ii.otime, 0);
3116         btrfs_set_stack_timespec_nsec(&ii.otime, 0);
3117
3118         ret = btrfs_insert_inode(trans, root, ino, &ii);
3119         ASSERT(!ret);
3120
3121         warning("root %llu inode %llu recreating inode item, this may "
3122                 "be incomplete, please check permissions and content after "
3123                 "the fsck completes.\n", (unsigned long long)root->objectid,
3124                 (unsigned long long)ino);
3125
3126         return 0;
3127 }
3128
3129 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
3130                                     struct btrfs_root *root, u64 ino,
3131                                     u8 filetype)
3132 {
3133         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
3134
3135         return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
3136 }
3137
3138 static int create_inode_item(struct btrfs_root *root,
3139                              struct inode_record *rec, int root_dir)
3140 {
3141         struct btrfs_trans_handle *trans;
3142         u64 nlink = 0;
3143         u32 mode = 0;
3144         u64 size = 0;
3145         int ret;
3146
3147         trans = btrfs_start_transaction(root, 1);
3148         if (IS_ERR(trans)) {
3149                 ret = PTR_ERR(trans);
3150                 return ret;
3151         }
3152
3153         nlink = root_dir ? 1 : rec->found_link;
3154         if (rec->found_dir_item) {
3155                 if (rec->found_file_extent)
3156                         fprintf(stderr, "root %llu inode %llu has both a dir "
3157                                 "item and extents, unsure if it is a dir or a "
3158                                 "regular file so setting it as a directory\n",
3159                                 (unsigned long long)root->objectid,
3160                                 (unsigned long long)rec->ino);
3161                 mode = S_IFDIR | 0755;
3162                 size = rec->found_size;
3163         } else if (!rec->found_dir_item) {
3164                 size = rec->extent_end;
3165                 mode =  S_IFREG | 0755;
3166         }
3167
3168         ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
3169                                   nlink, mode);
3170         btrfs_commit_transaction(trans, root);
3171         return 0;
3172 }
3173
3174 static int repair_inode_backrefs(struct btrfs_root *root,
3175                                  struct inode_record *rec,
3176                                  struct cache_tree *inode_cache,
3177                                  int delete)
3178 {
3179         struct inode_backref *tmp, *backref;
3180         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3181         int ret = 0;
3182         int repaired = 0;
3183
3184         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3185                 if (!delete && rec->ino == root_dirid) {
3186                         if (!rec->found_inode_item) {
3187                                 ret = create_inode_item(root, rec, 1);
3188                                 if (ret)
3189                                         break;
3190                                 repaired++;
3191                         }
3192                 }
3193
3194                 /* Index 0 for root dir's are special, don't mess with it */
3195                 if (rec->ino == root_dirid && backref->index == 0)
3196                         continue;
3197
3198                 if (delete &&
3199                     ((backref->found_dir_index && !backref->found_inode_ref) ||
3200                      (backref->found_dir_index && backref->found_inode_ref &&
3201                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
3202                         ret = delete_dir_index(root, backref);
3203                         if (ret)
3204                                 break;
3205                         repaired++;
3206                         list_del(&backref->list);
3207                         free(backref);
3208                         continue;
3209                 }
3210
3211                 if (!delete && !backref->found_dir_index &&
3212                     backref->found_dir_item && backref->found_inode_ref) {
3213                         ret = add_missing_dir_index(root, inode_cache, rec,
3214                                                     backref);
3215                         if (ret)
3216                                 break;
3217                         repaired++;
3218                         if (backref->found_dir_item &&
3219                             backref->found_dir_index) {
3220                                 if (!backref->errors &&
3221                                     backref->found_inode_ref) {
3222                                         list_del(&backref->list);
3223                                         free(backref);
3224                                         continue;
3225                                 }
3226                         }
3227                 }
3228
3229                 if (!delete && (!backref->found_dir_index &&
3230                                 !backref->found_dir_item &&
3231                                 backref->found_inode_ref)) {
3232                         struct btrfs_trans_handle *trans;
3233                         struct btrfs_key location;
3234
3235                         ret = check_dir_conflict(root, backref->name,
3236                                                  backref->namelen,
3237                                                  backref->dir,
3238                                                  backref->index);
3239                         if (ret) {
3240                                 /*
3241                                  * let nlink fixing routine to handle it,
3242                                  * which can do it better.
3243                                  */
3244                                 ret = 0;
3245                                 break;
3246                         }
3247                         location.objectid = rec->ino;
3248                         location.type = BTRFS_INODE_ITEM_KEY;
3249                         location.offset = 0;
3250
3251                         trans = btrfs_start_transaction(root, 1);
3252                         if (IS_ERR(trans)) {
3253                                 ret = PTR_ERR(trans);
3254                                 break;
3255                         }
3256                         fprintf(stderr, "adding missing dir index/item pair "
3257                                 "for inode %llu\n",
3258                                 (unsigned long long)rec->ino);
3259                         ret = btrfs_insert_dir_item(trans, root, backref->name,
3260                                                     backref->namelen,
3261                                                     backref->dir, &location,
3262                                                     imode_to_type(rec->imode),
3263                                                     backref->index);
3264                         BUG_ON(ret);
3265                         btrfs_commit_transaction(trans, root);
3266                         repaired++;
3267                 }
3268
3269                 if (!delete && (backref->found_inode_ref &&
3270                                 backref->found_dir_index &&
3271                                 backref->found_dir_item &&
3272                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
3273                                 !rec->found_inode_item)) {
3274                         ret = create_inode_item(root, rec, 0);
3275                         if (ret)
3276                                 break;
3277                         repaired++;
3278                 }
3279
3280         }
3281         return ret ? ret : repaired;
3282 }
3283
3284 /*
3285  * To determine the file type for nlink/inode_item repair
3286  *
3287  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
3288  * Return -ENOENT if file type is not found.
3289  */
3290 static int find_file_type(struct inode_record *rec, u8 *type)
3291 {
3292         struct inode_backref *backref;
3293
3294         /* For inode item recovered case */
3295         if (rec->found_inode_item) {
3296                 *type = imode_to_type(rec->imode);
3297                 return 0;
3298         }
3299
3300         list_for_each_entry(backref, &rec->backrefs, list) {
3301                 if (backref->found_dir_index || backref->found_dir_item) {
3302                         *type = backref->filetype;
3303                         return 0;
3304                 }
3305         }
3306         return -ENOENT;
3307 }
3308
3309 /*
3310  * To determine the file name for nlink repair
3311  *
3312  * Return 0 if file name is found, set name and namelen.
3313  * Return -ENOENT if file name is not found.
3314  */
3315 static int find_file_name(struct inode_record *rec,
3316                           char *name, int *namelen)
3317 {
3318         struct inode_backref *backref;
3319
3320         list_for_each_entry(backref, &rec->backrefs, list) {
3321                 if (backref->found_dir_index || backref->found_dir_item ||
3322                     backref->found_inode_ref) {
3323                         memcpy(name, backref->name, backref->namelen);
3324                         *namelen = backref->namelen;
3325                         return 0;
3326                 }
3327         }
3328         return -ENOENT;
3329 }
3330
3331 /* Reset the nlink of the inode to the correct one */
3332 static int reset_nlink(struct btrfs_trans_handle *trans,
3333                        struct btrfs_root *root,
3334                        struct btrfs_path *path,
3335                        struct inode_record *rec)
3336 {
3337         struct inode_backref *backref;
3338         struct inode_backref *tmp;
3339         struct btrfs_key key;
3340         struct btrfs_inode_item *inode_item;
3341         int ret = 0;
3342
3343         /* We don't believe this either, reset it and iterate backref */
3344         rec->found_link = 0;
3345
3346         /* Remove all backref including the valid ones */
3347         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3348                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3349                                    backref->index, backref->name,
3350                                    backref->namelen, 0);
3351                 if (ret < 0)
3352                         goto out;
3353
3354                 /* remove invalid backref, so it won't be added back */
3355                 if (!(backref->found_dir_index &&
3356                       backref->found_dir_item &&
3357                       backref->found_inode_ref)) {
3358                         list_del(&backref->list);
3359                         free(backref);
3360                 } else {
3361                         rec->found_link++;
3362                 }
3363         }
3364
3365         /* Set nlink to 0 */
3366         key.objectid = rec->ino;
3367         key.type = BTRFS_INODE_ITEM_KEY;
3368         key.offset = 0;
3369         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3370         if (ret < 0)
3371                 goto out;
3372         if (ret > 0) {
3373                 ret = -ENOENT;
3374                 goto out;
3375         }
3376         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3377                                     struct btrfs_inode_item);
3378         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3379         btrfs_mark_buffer_dirty(path->nodes[0]);
3380         btrfs_release_path(path);
3381
3382         /*
3383          * Add back valid inode_ref/dir_item/dir_index,
3384          * add_link() will handle the nlink inc, so new nlink must be correct
3385          */
3386         list_for_each_entry(backref, &rec->backrefs, list) {
3387                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3388                                      backref->name, backref->namelen,
3389                                      backref->filetype, &backref->index, 1, 0);
3390                 if (ret < 0)
3391                         goto out;
3392         }
3393 out:
3394         btrfs_release_path(path);
3395         return ret;
3396 }
3397
3398 static int get_highest_inode(struct btrfs_trans_handle *trans,
3399                                 struct btrfs_root *root,
3400                                 struct btrfs_path *path,
3401                                 u64 *highest_ino)
3402 {
3403         struct btrfs_key key, found_key;
3404         int ret;
3405
3406         btrfs_init_path(path);
3407         key.objectid = BTRFS_LAST_FREE_OBJECTID;
3408         key.offset = -1;
3409         key.type = BTRFS_INODE_ITEM_KEY;
3410         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3411         if (ret == 1) {
3412                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3413                                 path->slots[0] - 1);
3414                 *highest_ino = found_key.objectid;
3415                 ret = 0;
3416         }
3417         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3418                 ret = -EOVERFLOW;
3419         btrfs_release_path(path);
3420         return ret;
3421 }
3422
3423 /*
3424  * Link inode to dir 'lost+found'. Increase @ref_count.
3425  *
3426  * Returns 0 means success.
3427  * Returns <0 means failure.
3428  */
3429 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3430                                    struct btrfs_root *root,
3431                                    struct btrfs_path *path,
3432                                    u64 ino, char *namebuf, u32 name_len,
3433                                    u8 filetype, u64 *ref_count)
3434 {
3435         char *dir_name = "lost+found";
3436         u64 lost_found_ino;
3437         int ret;
3438         u32 mode = 0700;
3439
3440         btrfs_release_path(path);
3441         ret = get_highest_inode(trans, root, path, &lost_found_ino);
3442         if (ret < 0)
3443                 goto out;
3444         lost_found_ino++;
3445
3446         ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3447                           BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3448                           mode);
3449         if (ret < 0) {
3450                 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3451                 goto out;
3452         }
3453         ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3454                              namebuf, name_len, filetype, NULL, 1, 0);
3455         /*
3456          * Add ".INO" suffix several times to handle case where
3457          * "FILENAME.INO" is already taken by another file.
3458          */
3459         while (ret == -EEXIST) {
3460                 /*
3461                  * Conflicting file name, add ".INO" as suffix * +1 for '.'
3462                  */
3463                 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3464                         ret = -EFBIG;
3465                         goto out;
3466                 }
3467                 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3468                          ".%llu", ino);
3469                 name_len += count_digits(ino) + 1;
3470                 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3471                                      name_len, filetype, NULL, 1, 0);
3472         }
3473         if (ret < 0) {
3474                 error("failed to link the inode %llu to %s dir: %s",
3475                       ino, dir_name, strerror(-ret));
3476                 goto out;
3477         }
3478
3479         ++*ref_count;
3480         printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3481                name_len, namebuf, dir_name);
3482 out:
3483         btrfs_release_path(path);
3484         if (ret)
3485                 error("failed to move file '%.*s' to '%s' dir", name_len,
3486                                 namebuf, dir_name);
3487         return ret;
3488 }
3489
3490 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3491                                struct btrfs_root *root,
3492                                struct btrfs_path *path,
3493                                struct inode_record *rec)
3494 {
3495         char namebuf[BTRFS_NAME_LEN] = {0};
3496         u8 type = 0;
3497         int namelen = 0;
3498         int name_recovered = 0;
3499         int type_recovered = 0;
3500         int ret = 0;
3501
3502         /*
3503          * Get file name and type first before these invalid inode ref
3504          * are deleted by remove_all_invalid_backref()
3505          */
3506         name_recovered = !find_file_name(rec, namebuf, &namelen);
3507         type_recovered = !find_file_type(rec, &type);
3508
3509         if (!name_recovered) {
3510                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3511                        rec->ino, rec->ino);
3512                 namelen = count_digits(rec->ino);
3513                 sprintf(namebuf, "%llu", rec->ino);
3514                 name_recovered = 1;
3515         }
3516         if (!type_recovered) {
3517                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3518                        rec->ino);
3519                 type = BTRFS_FT_REG_FILE;
3520                 type_recovered = 1;
3521         }
3522
3523         ret = reset_nlink(trans, root, path, rec);
3524         if (ret < 0) {
3525                 fprintf(stderr,
3526                         "Failed to reset nlink for inode %llu: %s\n",
3527                         rec->ino, strerror(-ret));
3528                 goto out;
3529         }
3530
3531         if (rec->found_link == 0) {
3532                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3533                                               namebuf, namelen, type,
3534                                               (u64 *)&rec->found_link);
3535                 if (ret)
3536                         goto out;
3537         }
3538         printf("Fixed the nlink of inode %llu\n", rec->ino);
3539 out:
3540         /*
3541          * Clear the flag anyway, or we will loop forever for the same inode
3542          * as it will not be removed from the bad inode list and the dead loop
3543          * happens.
3544          */
3545         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3546         btrfs_release_path(path);
3547         return ret;
3548 }
3549
3550 /*
3551  * Check if there is any normal(reg or prealloc) file extent for given
3552  * ino.
3553  * This is used to determine the file type when neither its dir_index/item or
3554  * inode_item exists.
3555  *
3556  * This will *NOT* report error, if any error happens, just consider it does
3557  * not have any normal file extent.
3558  */
3559 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3560 {
3561         struct btrfs_path path;
3562         struct btrfs_key key;
3563         struct btrfs_key found_key;
3564         struct btrfs_file_extent_item *fi;
3565         u8 type;
3566         int ret = 0;
3567
3568         btrfs_init_path(&path);
3569         key.objectid = ino;
3570         key.type = BTRFS_EXTENT_DATA_KEY;
3571         key.offset = 0;
3572
3573         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3574         if (ret < 0) {
3575                 ret = 0;
3576                 goto out;
3577         }
3578         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3579                 ret = btrfs_next_leaf(root, &path);
3580                 if (ret) {
3581                         ret = 0;
3582                         goto out;
3583                 }
3584         }
3585         while (1) {
3586                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3587                                       path.slots[0]);
3588                 if (found_key.objectid != ino ||
3589                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3590                         break;
3591                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3592                                     struct btrfs_file_extent_item);
3593                 type = btrfs_file_extent_type(path.nodes[0], fi);
3594                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3595                         ret = 1;
3596                         goto out;
3597                 }
3598         }
3599 out:
3600         btrfs_release_path(&path);
3601         return ret;
3602 }
3603
3604 static u32 btrfs_type_to_imode(u8 type)
3605 {
3606         static u32 imode_by_btrfs_type[] = {
3607                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3608                 [BTRFS_FT_DIR]          = S_IFDIR,
3609                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3610                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3611                 [BTRFS_FT_FIFO]         = S_IFIFO,
3612                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3613                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3614         };
3615
3616         return imode_by_btrfs_type[(type)];
3617 }
3618
3619 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3620                                 struct btrfs_root *root,
3621                                 struct btrfs_path *path,
3622                                 struct inode_record *rec)
3623 {
3624         u8 filetype;
3625         u32 mode = 0700;
3626         int type_recovered = 0;
3627         int ret = 0;
3628
3629         printf("Trying to rebuild inode:%llu\n", rec->ino);
3630
3631         type_recovered = !find_file_type(rec, &filetype);
3632
3633         /*
3634          * Try to determine inode type if type not found.
3635          *
3636          * For found regular file extent, it must be FILE.
3637          * For found dir_item/index, it must be DIR.
3638          *
3639          * For undetermined one, use FILE as fallback.
3640          *
3641          * TODO:
3642          * 1. If found backref(inode_index/item is already handled) to it,
3643          *    it must be DIR.
3644          *    Need new inode-inode ref structure to allow search for that.
3645          */
3646         if (!type_recovered) {
3647                 if (rec->found_file_extent &&
3648                     find_normal_file_extent(root, rec->ino)) {
3649                         type_recovered = 1;
3650                         filetype = BTRFS_FT_REG_FILE;
3651                 } else if (rec->found_dir_item) {
3652                         type_recovered = 1;
3653                         filetype = BTRFS_FT_DIR;
3654                 } else if (!list_empty(&rec->orphan_extents)) {
3655                         type_recovered = 1;
3656                         filetype = BTRFS_FT_REG_FILE;
3657                 } else{
3658                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3659                                rec->ino);
3660                         type_recovered = 1;
3661                         filetype = BTRFS_FT_REG_FILE;
3662                 }
3663         }
3664
3665         ret = btrfs_new_inode(trans, root, rec->ino,
3666                               mode | btrfs_type_to_imode(filetype));
3667         if (ret < 0)
3668                 goto out;
3669
3670         /*
3671          * Here inode rebuild is done, we only rebuild the inode item,
3672          * don't repair the nlink(like move to lost+found).
3673          * That is the job of nlink repair.
3674          *
3675          * We just fill the record and return
3676          */
3677         rec->found_dir_item = 1;
3678         rec->imode = mode | btrfs_type_to_imode(filetype);
3679         rec->nlink = 0;
3680         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3681         /* Ensure the inode_nlinks repair function will be called */
3682         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3683 out:
3684         return ret;
3685 }
3686
3687 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3688                                       struct btrfs_root *root,
3689                                       struct btrfs_path *path,
3690                                       struct inode_record *rec)
3691 {
3692         struct orphan_data_extent *orphan;
3693         struct orphan_data_extent *tmp;
3694         int ret = 0;
3695
3696         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3697                 /*
3698                  * Check for conflicting file extents
3699                  *
3700                  * Here we don't know whether the extents is compressed or not,
3701                  * so we can only assume it not compressed nor data offset,
3702                  * and use its disk_len as extent length.
3703                  */
3704                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3705                                        orphan->offset, orphan->disk_len, 0);
3706                 btrfs_release_path(path);
3707                 if (ret < 0)
3708                         goto out;
3709                 if (!ret) {
3710                         fprintf(stderr,
3711                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3712                                 orphan->disk_bytenr, orphan->disk_len);
3713                         ret = btrfs_free_extent(trans,
3714                                         root->fs_info->extent_root,
3715                                         orphan->disk_bytenr, orphan->disk_len,
3716                                         0, root->objectid, orphan->objectid,
3717                                         orphan->offset);
3718                         if (ret < 0)
3719                                 goto out;
3720                 }
3721                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3722                                 orphan->offset, orphan->disk_bytenr,
3723                                 orphan->disk_len, orphan->disk_len);
3724                 if (ret < 0)
3725                         goto out;
3726
3727                 /* Update file size info */
3728                 rec->found_size += orphan->disk_len;
3729                 if (rec->found_size == rec->nbytes)
3730                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3731
3732                 /* Update the file extent hole info too */
3733                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3734                                            orphan->disk_len);
3735                 if (ret < 0)
3736                         goto out;
3737                 if (RB_EMPTY_ROOT(&rec->holes))
3738                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3739
3740                 list_del(&orphan->list);
3741                 free(orphan);
3742         }
3743         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3744 out:
3745         return ret;
3746 }
3747
3748 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3749                                         struct btrfs_root *root,
3750                                         struct btrfs_path *path,
3751                                         struct inode_record *rec)
3752 {
3753         struct rb_node *node;
3754         struct file_extent_hole *hole;
3755         int found = 0;
3756         int ret = 0;
3757
3758         node = rb_first(&rec->holes);
3759
3760         while (node) {
3761                 found = 1;
3762                 hole = rb_entry(node, struct file_extent_hole, node);
3763                 ret = btrfs_punch_hole(trans, root, rec->ino,
3764                                        hole->start, hole->len);
3765                 if (ret < 0)
3766                         goto out;
3767                 ret = del_file_extent_hole(&rec->holes, hole->start,
3768                                            hole->len);
3769                 if (ret < 0)
3770                         goto out;
3771                 if (RB_EMPTY_ROOT(&rec->holes))
3772                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3773                 node = rb_first(&rec->holes);
3774         }
3775         /* special case for a file losing all its file extent */
3776         if (!found) {
3777                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3778                                        round_up(rec->isize,
3779                                                 root->fs_info->sectorsize));
3780                 if (ret < 0)
3781                         goto out;
3782         }
3783         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3784                rec->ino, root->objectid);
3785 out:
3786         return ret;
3787 }
3788
3789 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3790 {
3791         struct btrfs_trans_handle *trans;
3792         struct btrfs_path path;
3793         int ret = 0;
3794
3795         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3796                              I_ERR_NO_ORPHAN_ITEM |
3797                              I_ERR_LINK_COUNT_WRONG |
3798                              I_ERR_NO_INODE_ITEM |
3799                              I_ERR_FILE_EXTENT_ORPHAN |
3800                              I_ERR_FILE_EXTENT_DISCOUNT|
3801                              I_ERR_FILE_NBYTES_WRONG)))
3802                 return rec->errors;
3803
3804         /*
3805          * For nlink repair, it may create a dir and add link, so
3806          * 2 for parent(256)'s dir_index and dir_item
3807          * 2 for lost+found dir's inode_item and inode_ref
3808          * 1 for the new inode_ref of the file
3809          * 2 for lost+found dir's dir_index and dir_item for the file
3810          */
3811         trans = btrfs_start_transaction(root, 7);
3812         if (IS_ERR(trans))
3813                 return PTR_ERR(trans);
3814
3815         btrfs_init_path(&path);
3816         if (rec->errors & I_ERR_NO_INODE_ITEM)
3817                 ret = repair_inode_no_item(trans, root, &path, rec);
3818         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3819                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3820         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3821                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3822         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3823                 ret = repair_inode_isize(trans, root, &path, rec);
3824         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3825                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3826         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3827                 ret = repair_inode_nlinks(trans, root, &path, rec);
3828         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3829                 ret = repair_inode_nbytes(trans, root, &path, rec);
3830         btrfs_commit_transaction(trans, root);
3831         btrfs_release_path(&path);
3832         return ret;
3833 }
3834
3835 static int check_inode_recs(struct btrfs_root *root,
3836                             struct cache_tree *inode_cache)
3837 {
3838         struct cache_extent *cache;
3839         struct ptr_node *node;
3840         struct inode_record *rec;
3841         struct inode_backref *backref;
3842         int stage = 0;
3843         int ret = 0;
3844         int err = 0;
3845         u64 error = 0;
3846         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3847
3848         if (btrfs_root_refs(&root->root_item) == 0) {
3849                 if (!cache_tree_empty(inode_cache))
3850                         fprintf(stderr, "warning line %d\n", __LINE__);
3851                 return 0;
3852         }
3853
3854         /*
3855          * We need to repair backrefs first because we could change some of the
3856          * errors in the inode recs.
3857          *
3858          * We also need to go through and delete invalid backrefs first and then
3859          * add the correct ones second.  We do this because we may get EEXIST
3860          * when adding back the correct index because we hadn't yet deleted the
3861          * invalid index.
3862          *
3863          * For example, if we were missing a dir index then the directories
3864          * isize would be wrong, so if we fixed the isize to what we thought it
3865          * would be and then fixed the backref we'd still have a invalid fs, so
3866          * we need to add back the dir index and then check to see if the isize
3867          * is still wrong.
3868          */
3869         while (stage < 3) {
3870                 stage++;
3871                 if (stage == 3 && !err)
3872                         break;
3873
3874                 cache = search_cache_extent(inode_cache, 0);
3875                 while (repair && cache) {
3876                         node = container_of(cache, struct ptr_node, cache);
3877                         rec = node->data;
3878                         cache = next_cache_extent(cache);
3879
3880                         /* Need to free everything up and rescan */
3881                         if (stage == 3) {
3882                                 remove_cache_extent(inode_cache, &node->cache);
3883                                 free(node);
3884                                 free_inode_rec(rec);
3885                                 continue;
3886                         }
3887
3888                         if (list_empty(&rec->backrefs))
3889                                 continue;
3890
3891                         ret = repair_inode_backrefs(root, rec, inode_cache,
3892                                                     stage == 1);
3893                         if (ret < 0) {
3894                                 err = ret;
3895                                 stage = 2;
3896                                 break;
3897                         } if (ret > 0) {
3898                                 err = -EAGAIN;
3899                         }
3900                 }
3901         }
3902         if (err)
3903                 return err;
3904
3905         rec = get_inode_rec(inode_cache, root_dirid, 0);
3906         BUG_ON(IS_ERR(rec));
3907         if (rec) {
3908                 ret = check_root_dir(rec);
3909                 if (ret) {
3910                         fprintf(stderr, "root %llu root dir %llu error\n",
3911                                 (unsigned long long)root->root_key.objectid,
3912                                 (unsigned long long)root_dirid);
3913                         print_inode_error(root, rec);
3914                         error++;
3915                 }
3916         } else {
3917                 if (repair) {
3918                         struct btrfs_trans_handle *trans;
3919
3920                         trans = btrfs_start_transaction(root, 1);
3921                         if (IS_ERR(trans)) {
3922                                 err = PTR_ERR(trans);
3923                                 return err;
3924                         }
3925
3926                         fprintf(stderr,
3927                                 "root %llu missing its root dir, recreating\n",
3928                                 (unsigned long long)root->objectid);
3929
3930                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3931                         BUG_ON(ret);
3932
3933                         btrfs_commit_transaction(trans, root);
3934                         return -EAGAIN;
3935                 }
3936
3937                 fprintf(stderr, "root %llu root dir %llu not found\n",
3938                         (unsigned long long)root->root_key.objectid,
3939                         (unsigned long long)root_dirid);
3940         }
3941
3942         while (1) {
3943                 cache = search_cache_extent(inode_cache, 0);
3944                 if (!cache)
3945                         break;
3946                 node = container_of(cache, struct ptr_node, cache);
3947                 rec = node->data;
3948                 remove_cache_extent(inode_cache, &node->cache);
3949                 free(node);
3950                 if (rec->ino == root_dirid ||
3951                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3952                         free_inode_rec(rec);
3953                         continue;
3954                 }
3955
3956                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3957                         ret = check_orphan_item(root, rec->ino);
3958                         if (ret == 0)
3959                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3960                         if (can_free_inode_rec(rec)) {
3961                                 free_inode_rec(rec);
3962                                 continue;
3963                         }
3964                 }
3965
3966                 if (!rec->found_inode_item)
3967                         rec->errors |= I_ERR_NO_INODE_ITEM;
3968                 if (rec->found_link != rec->nlink)
3969                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3970                 if (repair) {
3971                         ret = try_repair_inode(root, rec);
3972                         if (ret == 0 && can_free_inode_rec(rec)) {
3973                                 free_inode_rec(rec);
3974                                 continue;
3975                         }
3976                         ret = 0;
3977                 }
3978
3979                 if (!(repair && ret == 0))
3980                         error++;
3981                 print_inode_error(root, rec);
3982                 list_for_each_entry(backref, &rec->backrefs, list) {
3983                         if (!backref->found_dir_item)
3984                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3985                         if (!backref->found_dir_index)
3986                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3987                         if (!backref->found_inode_ref)
3988                                 backref->errors |= REF_ERR_NO_INODE_REF;
3989                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3990                                 " namelen %u name %s filetype %d errors %x",
3991                                 (unsigned long long)backref->dir,
3992                                 (unsigned long long)backref->index,
3993                                 backref->namelen, backref->name,
3994                                 backref->filetype, backref->errors);
3995                         print_ref_error(backref->errors);
3996                 }
3997                 free_inode_rec(rec);
3998         }
3999         return (error > 0) ? -1 : 0;
4000 }
4001
4002 static struct root_record *get_root_rec(struct cache_tree *root_cache,
4003                                         u64 objectid)
4004 {
4005         struct cache_extent *cache;
4006         struct root_record *rec = NULL;
4007         int ret;
4008
4009         cache = lookup_cache_extent(root_cache, objectid, 1);
4010         if (cache) {
4011                 rec = container_of(cache, struct root_record, cache);
4012         } else {
4013                 rec = calloc(1, sizeof(*rec));
4014                 if (!rec)
4015                         return ERR_PTR(-ENOMEM);
4016                 rec->objectid = objectid;
4017                 INIT_LIST_HEAD(&rec->backrefs);
4018                 rec->cache.start = objectid;
4019                 rec->cache.size = 1;
4020
4021                 ret = insert_cache_extent(root_cache, &rec->cache);
4022                 if (ret)
4023                         return ERR_PTR(-EEXIST);
4024         }
4025         return rec;
4026 }
4027
4028 static struct root_backref *get_root_backref(struct root_record *rec,
4029                                              u64 ref_root, u64 dir, u64 index,
4030                                              const char *name, int namelen)
4031 {
4032         struct root_backref *backref;
4033
4034         list_for_each_entry(backref, &rec->backrefs, list) {
4035                 if (backref->ref_root != ref_root || backref->dir != dir ||
4036                     backref->namelen != namelen)
4037                         continue;
4038                 if (memcmp(name, backref->name, namelen))
4039                         continue;
4040                 return backref;
4041         }
4042
4043         backref = calloc(1, sizeof(*backref) + namelen + 1);
4044         if (!backref)
4045                 return NULL;
4046         backref->ref_root = ref_root;
4047         backref->dir = dir;
4048         backref->index = index;
4049         backref->namelen = namelen;
4050         memcpy(backref->name, name, namelen);
4051         backref->name[namelen] = '\0';
4052         list_add_tail(&backref->list, &rec->backrefs);
4053         return backref;
4054 }
4055
4056 static void free_root_record(struct cache_extent *cache)
4057 {
4058         struct root_record *rec;
4059         struct root_backref *backref;
4060
4061         rec = container_of(cache, struct root_record, cache);
4062         while (!list_empty(&rec->backrefs)) {
4063                 backref = to_root_backref(rec->backrefs.next);
4064                 list_del(&backref->list);
4065                 free(backref);
4066         }
4067
4068         free(rec);
4069 }
4070
4071 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
4072
4073 static int add_root_backref(struct cache_tree *root_cache,
4074                             u64 root_id, u64 ref_root, u64 dir, u64 index,
4075                             const char *name, int namelen,
4076                             int item_type, int errors)
4077 {
4078         struct root_record *rec;
4079         struct root_backref *backref;
4080
4081         rec = get_root_rec(root_cache, root_id);
4082         BUG_ON(IS_ERR(rec));
4083         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
4084         BUG_ON(!backref);
4085
4086         backref->errors |= errors;
4087
4088         if (item_type != BTRFS_DIR_ITEM_KEY) {
4089                 if (backref->found_dir_index || backref->found_back_ref ||
4090                     backref->found_forward_ref) {
4091                         if (backref->index != index)
4092                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
4093                 } else {
4094                         backref->index = index;
4095                 }
4096         }
4097
4098         if (item_type == BTRFS_DIR_ITEM_KEY) {
4099                 if (backref->found_forward_ref)
4100                         rec->found_ref++;
4101                 backref->found_dir_item = 1;
4102         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
4103                 backref->found_dir_index = 1;
4104         } else if (item_type == BTRFS_ROOT_REF_KEY) {
4105                 if (backref->found_forward_ref)
4106                         backref->errors |= REF_ERR_DUP_ROOT_REF;
4107                 else if (backref->found_dir_item)
4108                         rec->found_ref++;
4109                 backref->found_forward_ref = 1;
4110         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
4111                 if (backref->found_back_ref)
4112                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
4113                 backref->found_back_ref = 1;
4114         } else {
4115                 BUG_ON(1);
4116         }
4117
4118         if (backref->found_forward_ref && backref->found_dir_item)
4119                 backref->reachable = 1;
4120         return 0;
4121 }
4122
4123 static int merge_root_recs(struct btrfs_root *root,
4124                            struct cache_tree *src_cache,
4125                            struct cache_tree *dst_cache)
4126 {
4127         struct cache_extent *cache;
4128         struct ptr_node *node;
4129         struct inode_record *rec;
4130         struct inode_backref *backref;
4131         int ret = 0;
4132
4133         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4134                 free_inode_recs_tree(src_cache);
4135                 return 0;
4136         }
4137
4138         while (1) {
4139                 cache = search_cache_extent(src_cache, 0);
4140                 if (!cache)
4141                         break;
4142                 node = container_of(cache, struct ptr_node, cache);
4143                 rec = node->data;
4144                 remove_cache_extent(src_cache, &node->cache);
4145                 free(node);
4146
4147                 ret = is_child_root(root, root->objectid, rec->ino);
4148                 if (ret < 0)
4149                         break;
4150                 else if (ret == 0)
4151                         goto skip;
4152
4153                 list_for_each_entry(backref, &rec->backrefs, list) {
4154                         BUG_ON(backref->found_inode_ref);
4155                         if (backref->found_dir_item)
4156                                 add_root_backref(dst_cache, rec->ino,
4157                                         root->root_key.objectid, backref->dir,
4158                                         backref->index, backref->name,
4159                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
4160                                         backref->errors);
4161                         if (backref->found_dir_index)
4162                                 add_root_backref(dst_cache, rec->ino,
4163                                         root->root_key.objectid, backref->dir,
4164                                         backref->index, backref->name,
4165                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
4166                                         backref->errors);
4167                 }
4168 skip:
4169                 free_inode_rec(rec);
4170         }
4171         if (ret < 0)
4172                 return ret;
4173         return 0;
4174 }
4175
4176 static int check_root_refs(struct btrfs_root *root,
4177                            struct cache_tree *root_cache)
4178 {
4179         struct root_record *rec;
4180         struct root_record *ref_root;
4181         struct root_backref *backref;
4182         struct cache_extent *cache;
4183         int loop = 1;
4184         int ret;
4185         int error;
4186         int errors = 0;
4187
4188         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
4189         BUG_ON(IS_ERR(rec));
4190         rec->found_ref = 1;
4191
4192         /* fixme: this can not detect circular references */
4193         while (loop) {
4194                 loop = 0;
4195                 cache = search_cache_extent(root_cache, 0);
4196                 while (1) {
4197                         if (!cache)
4198                                 break;
4199                         rec = container_of(cache, struct root_record, cache);
4200                         cache = next_cache_extent(cache);
4201
4202                         if (rec->found_ref == 0)
4203                                 continue;
4204
4205                         list_for_each_entry(backref, &rec->backrefs, list) {
4206                                 if (!backref->reachable)
4207                                         continue;
4208
4209                                 ref_root = get_root_rec(root_cache,
4210                                                         backref->ref_root);
4211                                 BUG_ON(IS_ERR(ref_root));
4212                                 if (ref_root->found_ref > 0)
4213                                         continue;
4214
4215                                 backref->reachable = 0;
4216                                 rec->found_ref--;
4217                                 if (rec->found_ref == 0)
4218                                         loop = 1;
4219                         }
4220                 }
4221         }
4222
4223         cache = search_cache_extent(root_cache, 0);
4224         while (1) {
4225                 if (!cache)
4226                         break;
4227                 rec = container_of(cache, struct root_record, cache);
4228                 cache = next_cache_extent(cache);
4229
4230                 if (rec->found_ref == 0 &&
4231                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
4232                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
4233                         ret = check_orphan_item(root->fs_info->tree_root,
4234                                                 rec->objectid);
4235                         if (ret == 0)
4236                                 continue;
4237
4238                         /*
4239                          * If we don't have a root item then we likely just have
4240                          * a dir item in a snapshot for this root but no actual
4241                          * ref key or anything so it's meaningless.
4242                          */
4243                         if (!rec->found_root_item)
4244                                 continue;
4245                         errors++;
4246                         fprintf(stderr, "fs tree %llu not referenced\n",
4247                                 (unsigned long long)rec->objectid);
4248                 }
4249
4250                 error = 0;
4251                 if (rec->found_ref > 0 && !rec->found_root_item)
4252                         error = 1;
4253                 list_for_each_entry(backref, &rec->backrefs, list) {
4254                         if (!backref->found_dir_item)
4255                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
4256                         if (!backref->found_dir_index)
4257                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
4258                         if (!backref->found_back_ref)
4259                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
4260                         if (!backref->found_forward_ref)
4261                                 backref->errors |= REF_ERR_NO_ROOT_REF;
4262                         if (backref->reachable && backref->errors)
4263                                 error = 1;
4264                 }
4265                 if (!error)
4266                         continue;
4267
4268                 errors++;
4269                 fprintf(stderr, "fs tree %llu refs %u %s\n",
4270                         (unsigned long long)rec->objectid, rec->found_ref,
4271                          rec->found_root_item ? "" : "not found");
4272
4273                 list_for_each_entry(backref, &rec->backrefs, list) {
4274                         if (!backref->reachable)
4275                                 continue;
4276                         if (!backref->errors && rec->found_root_item)
4277                                 continue;
4278                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
4279                                 " index %llu namelen %u name %s errors %x\n",
4280                                 (unsigned long long)backref->ref_root,
4281                                 (unsigned long long)backref->dir,
4282                                 (unsigned long long)backref->index,
4283                                 backref->namelen, backref->name,
4284                                 backref->errors);
4285                         print_ref_error(backref->errors);
4286                 }
4287         }
4288         return errors > 0 ? 1 : 0;
4289 }
4290
4291 static int process_root_ref(struct extent_buffer *eb, int slot,
4292                             struct btrfs_key *key,
4293                             struct cache_tree *root_cache)
4294 {
4295         u64 dirid;
4296         u64 index;
4297         u32 len;
4298         u32 name_len;
4299         struct btrfs_root_ref *ref;
4300         char namebuf[BTRFS_NAME_LEN];
4301         int error;
4302
4303         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
4304
4305         dirid = btrfs_root_ref_dirid(eb, ref);
4306         index = btrfs_root_ref_sequence(eb, ref);
4307         name_len = btrfs_root_ref_name_len(eb, ref);
4308
4309         if (name_len <= BTRFS_NAME_LEN) {
4310                 len = name_len;
4311                 error = 0;
4312         } else {
4313                 len = BTRFS_NAME_LEN;
4314                 error = REF_ERR_NAME_TOO_LONG;
4315         }
4316         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
4317
4318         if (key->type == BTRFS_ROOT_REF_KEY) {
4319                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
4320                                  index, namebuf, len, key->type, error);
4321         } else {
4322                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
4323                                  index, namebuf, len, key->type, error);
4324         }
4325         return 0;
4326 }
4327
4328 static void free_corrupt_block(struct cache_extent *cache)
4329 {
4330         struct btrfs_corrupt_block *corrupt;
4331
4332         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4333         free(corrupt);
4334 }
4335
4336 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4337
4338 /*
4339  * Repair the btree of the given root.
4340  *
4341  * The fix is to remove the node key in corrupt_blocks cache_tree.
4342  * and rebalance the tree.
4343  * After the fix, the btree should be writeable.
4344  */
4345 static int repair_btree(struct btrfs_root *root,
4346                         struct cache_tree *corrupt_blocks)
4347 {
4348         struct btrfs_trans_handle *trans;
4349         struct btrfs_path path;
4350         struct btrfs_corrupt_block *corrupt;
4351         struct cache_extent *cache;
4352         struct btrfs_key key;
4353         u64 offset;
4354         int level;
4355         int ret = 0;
4356
4357         if (cache_tree_empty(corrupt_blocks))
4358                 return 0;
4359
4360         trans = btrfs_start_transaction(root, 1);
4361         if (IS_ERR(trans)) {
4362                 ret = PTR_ERR(trans);
4363                 fprintf(stderr, "Error starting transaction: %s\n",
4364                         strerror(-ret));
4365                 return ret;
4366         }
4367         btrfs_init_path(&path);
4368         cache = first_cache_extent(corrupt_blocks);
4369         while (cache) {
4370                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4371                                        cache);
4372                 level = corrupt->level;
4373                 path.lowest_level = level;
4374                 key.objectid = corrupt->key.objectid;
4375                 key.type = corrupt->key.type;
4376                 key.offset = corrupt->key.offset;
4377
4378                 /*
4379                  * Here we don't want to do any tree balance, since it may
4380                  * cause a balance with corrupted brother leaf/node,
4381                  * so ins_len set to 0 here.
4382                  * Balance will be done after all corrupt node/leaf is deleted.
4383                  */
4384                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4385                 if (ret < 0)
4386                         goto out;
4387                 offset = btrfs_node_blockptr(path.nodes[level],
4388                                              path.slots[level]);
4389
4390                 /* Remove the ptr */
4391                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4392                 if (ret < 0)
4393                         goto out;
4394                 /*
4395                  * Remove the corresponding extent
4396                  * return value is not concerned.
4397                  */
4398                 btrfs_release_path(&path);
4399                 ret = btrfs_free_extent(trans, root, offset,
4400                                 root->fs_info->nodesize, 0,
4401                                 root->root_key.objectid, level - 1, 0);
4402                 cache = next_cache_extent(cache);
4403         }
4404
4405         /* Balance the btree using btrfs_search_slot() */
4406         cache = first_cache_extent(corrupt_blocks);
4407         while (cache) {
4408                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4409                                        cache);
4410                 memcpy(&key, &corrupt->key, sizeof(key));
4411                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4412                 if (ret < 0)
4413                         goto out;
4414                 /* return will always >0 since it won't find the item */
4415                 ret = 0;
4416                 btrfs_release_path(&path);
4417                 cache = next_cache_extent(cache);
4418         }
4419 out:
4420         btrfs_commit_transaction(trans, root);
4421         btrfs_release_path(&path);
4422         return ret;
4423 }
4424
4425 static int check_fs_root(struct btrfs_root *root,
4426                          struct cache_tree *root_cache,
4427                          struct walk_control *wc)
4428 {
4429         int ret = 0;
4430         int err = 0;
4431         int wret;
4432         int level;
4433         struct btrfs_path path;
4434         struct shared_node root_node;
4435         struct root_record *rec;
4436         struct btrfs_root_item *root_item = &root->root_item;
4437         struct cache_tree corrupt_blocks;
4438         struct orphan_data_extent *orphan;
4439         struct orphan_data_extent *tmp;
4440         enum btrfs_tree_block_status status;
4441         struct node_refs nrefs;
4442
4443         /*
4444          * Reuse the corrupt_block cache tree to record corrupted tree block
4445          *
4446          * Unlike the usage in extent tree check, here we do it in a per
4447          * fs/subvol tree base.
4448          */
4449         cache_tree_init(&corrupt_blocks);
4450         root->fs_info->corrupt_blocks = &corrupt_blocks;
4451
4452         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4453                 rec = get_root_rec(root_cache, root->root_key.objectid);
4454                 BUG_ON(IS_ERR(rec));
4455                 if (btrfs_root_refs(root_item) > 0)
4456                         rec->found_root_item = 1;
4457         }
4458
4459         btrfs_init_path(&path);
4460         memset(&root_node, 0, sizeof(root_node));
4461         cache_tree_init(&root_node.root_cache);
4462         cache_tree_init(&root_node.inode_cache);
4463         memset(&nrefs, 0, sizeof(nrefs));
4464
4465         /* Move the orphan extent record to corresponding inode_record */
4466         list_for_each_entry_safe(orphan, tmp,
4467                                  &root->orphan_data_extents, list) {
4468                 struct inode_record *inode;
4469
4470                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4471                                       1);
4472                 BUG_ON(IS_ERR(inode));
4473                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4474                 list_move(&orphan->list, &inode->orphan_extents);
4475         }
4476
4477         level = btrfs_header_level(root->node);
4478         memset(wc->nodes, 0, sizeof(wc->nodes));
4479         wc->nodes[level] = &root_node;
4480         wc->active_node = level;
4481         wc->root_level = level;
4482
4483         /* We may not have checked the root block, lets do that now */
4484         if (btrfs_is_leaf(root->node))
4485                 status = btrfs_check_leaf(root, NULL, root->node);
4486         else
4487                 status = btrfs_check_node(root, NULL, root->node);
4488         if (status != BTRFS_TREE_BLOCK_CLEAN)
4489                 return -EIO;
4490
4491         if (btrfs_root_refs(root_item) > 0 ||
4492             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4493                 path.nodes[level] = root->node;
4494                 extent_buffer_get(root->node);
4495                 path.slots[level] = 0;
4496         } else {
4497                 struct btrfs_key key;
4498                 struct btrfs_disk_key found_key;
4499
4500                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4501                 level = root_item->drop_level;
4502                 path.lowest_level = level;
4503                 if (level > btrfs_header_level(root->node) ||
4504                     level >= BTRFS_MAX_LEVEL) {
4505                         error("ignoring invalid drop level: %u", level);
4506                         goto skip_walking;
4507                 }
4508                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4509                 if (wret < 0)
4510                         goto skip_walking;
4511                 btrfs_node_key(path.nodes[level], &found_key,
4512                                 path.slots[level]);
4513                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4514                                         sizeof(found_key)));
4515         }
4516
4517         while (1) {
4518                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4519                 if (wret < 0)
4520                         ret = wret;
4521                 if (wret != 0)
4522                         break;
4523
4524                 wret = walk_up_tree(root, &path, wc, &level);
4525                 if (wret < 0)
4526                         ret = wret;
4527                 if (wret != 0)
4528                         break;
4529         }
4530 skip_walking:
4531         btrfs_release_path(&path);
4532
4533         if (!cache_tree_empty(&corrupt_blocks)) {
4534                 struct cache_extent *cache;
4535                 struct btrfs_corrupt_block *corrupt;
4536
4537                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4538                        root->root_key.objectid);
4539                 cache = first_cache_extent(&corrupt_blocks);
4540                 while (cache) {
4541                         corrupt = container_of(cache,
4542                                                struct btrfs_corrupt_block,
4543                                                cache);
4544                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4545                                cache->start, corrupt->level,
4546                                corrupt->key.objectid, corrupt->key.type,
4547                                corrupt->key.offset);
4548                         cache = next_cache_extent(cache);
4549                 }
4550                 if (repair) {
4551                         printf("Try to repair the btree for root %llu\n",
4552                                root->root_key.objectid);
4553                         ret = repair_btree(root, &corrupt_blocks);
4554                         if (ret < 0)
4555                                 fprintf(stderr, "Failed to repair btree: %s\n",
4556                                         strerror(-ret));
4557                         if (!ret)
4558                                 printf("Btree for root %llu is fixed\n",
4559                                        root->root_key.objectid);
4560                 }
4561         }
4562
4563         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4564         if (err < 0)
4565                 ret = err;
4566
4567         if (root_node.current) {
4568                 root_node.current->checked = 1;
4569                 maybe_free_inode_rec(&root_node.inode_cache,
4570                                 root_node.current);
4571         }
4572
4573         err = check_inode_recs(root, &root_node.inode_cache);
4574         if (!ret)
4575                 ret = err;
4576
4577         free_corrupt_blocks_tree(&corrupt_blocks);
4578         root->fs_info->corrupt_blocks = NULL;
4579         free_orphan_data_extents(&root->orphan_data_extents);
4580         return ret;
4581 }
4582
4583 static int fs_root_objectid(u64 objectid)
4584 {
4585         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4586             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4587                 return 1;
4588         return is_fstree(objectid);
4589 }
4590
4591 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4592                           struct cache_tree *root_cache)
4593 {
4594         struct btrfs_path path;
4595         struct btrfs_key key;
4596         struct walk_control wc;
4597         struct extent_buffer *leaf, *tree_node;
4598         struct btrfs_root *tmp_root;
4599         struct btrfs_root *tree_root = fs_info->tree_root;
4600         int ret;
4601         int err = 0;
4602
4603         if (ctx.progress_enabled) {
4604                 ctx.tp = TASK_FS_ROOTS;
4605                 task_start(ctx.info);
4606         }
4607
4608         /*
4609          * Just in case we made any changes to the extent tree that weren't
4610          * reflected into the free space cache yet.
4611          */
4612         if (repair)
4613                 reset_cached_block_groups(fs_info);
4614         memset(&wc, 0, sizeof(wc));
4615         cache_tree_init(&wc.shared);
4616         btrfs_init_path(&path);
4617
4618 again:
4619         key.offset = 0;
4620         key.objectid = 0;
4621         key.type = BTRFS_ROOT_ITEM_KEY;
4622         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4623         if (ret < 0) {
4624                 err = 1;
4625                 goto out;
4626         }
4627         tree_node = tree_root->node;
4628         while (1) {
4629                 if (tree_node != tree_root->node) {
4630                         free_root_recs_tree(root_cache);
4631                         btrfs_release_path(&path);
4632                         goto again;
4633                 }
4634                 leaf = path.nodes[0];
4635                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4636                         ret = btrfs_next_leaf(tree_root, &path);
4637                         if (ret) {
4638                                 if (ret < 0)
4639                                         err = 1;
4640                                 break;
4641                         }
4642                         leaf = path.nodes[0];
4643                 }
4644                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4645                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4646                     fs_root_objectid(key.objectid)) {
4647                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4648                                 tmp_root = btrfs_read_fs_root_no_cache(
4649                                                 fs_info, &key);
4650                         } else {
4651                                 key.offset = (u64)-1;
4652                                 tmp_root = btrfs_read_fs_root(
4653                                                 fs_info, &key);
4654                         }
4655                         if (IS_ERR(tmp_root)) {
4656                                 err = 1;
4657                                 goto next;
4658                         }
4659                         ret = check_fs_root(tmp_root, root_cache, &wc);
4660                         if (ret == -EAGAIN) {
4661                                 free_root_recs_tree(root_cache);
4662                                 btrfs_release_path(&path);
4663                                 goto again;
4664                         }
4665                         if (ret)
4666                                 err = 1;
4667                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4668                                 btrfs_free_fs_root(tmp_root);
4669                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4670                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4671                         process_root_ref(leaf, path.slots[0], &key,
4672                                          root_cache);
4673                 }
4674 next:
4675                 path.slots[0]++;
4676         }
4677 out:
4678         btrfs_release_path(&path);
4679         if (err)
4680                 free_extent_cache_tree(&wc.shared);
4681         if (!cache_tree_empty(&wc.shared))
4682                 fprintf(stderr, "warning line %d\n", __LINE__);
4683
4684         task_stop(ctx.info);
4685
4686         return err;
4687 }
4688
4689 /*
4690  * Find the @index according by @ino and name.
4691  * Notice:time efficiency is O(N)
4692  *
4693  * @root:       the root of the fs/file tree
4694  * @index_ret:  the index as return value
4695  * @namebuf:    the name to match
4696  * @name_len:   the length of name to match
4697  * @file_type:  the file_type of INODE_ITEM to match
4698  *
4699  * Returns 0 if found and *@index_ret will be modified with right value
4700  * Returns< 0 not found and *@index_ret will be (u64)-1
4701  */
4702 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4703                           u64 *index_ret, char *namebuf, u32 name_len,
4704                           u8 file_type)
4705 {
4706         struct btrfs_path path;
4707         struct extent_buffer *node;
4708         struct btrfs_dir_item *di;
4709         struct btrfs_key key;
4710         struct btrfs_key location;
4711         char name[BTRFS_NAME_LEN] = {0};
4712
4713         u32 total;
4714         u32 cur = 0;
4715         u32 len;
4716         u32 data_len;
4717         u8 filetype;
4718         int slot;
4719         int ret;
4720
4721         ASSERT(index_ret);
4722
4723         /* search from the last index */
4724         key.objectid = dirid;
4725         key.offset = (u64)-1;
4726         key.type = BTRFS_DIR_INDEX_KEY;
4727
4728         btrfs_init_path(&path);
4729         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4730         if (ret < 0)
4731                 return ret;
4732
4733 loop:
4734         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4735         if (ret) {
4736                 ret = -ENOENT;
4737                 *index_ret = (64)-1;
4738                 goto out;
4739         }
4740         /* Check whether inode_id/filetype/name match */
4741         node = path.nodes[0];
4742         slot = path.slots[0];
4743         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4744         total = btrfs_item_size_nr(node, slot);
4745         while (cur < total) {
4746                 ret = -ENOENT;
4747                 len = btrfs_dir_name_len(node, di);
4748                 data_len = btrfs_dir_data_len(node, di);
4749
4750                 btrfs_dir_item_key_to_cpu(node, di, &location);
4751                 if (location.objectid != location_id ||
4752                     location.type != BTRFS_INODE_ITEM_KEY ||
4753                     location.offset != 0)
4754                         goto next;
4755
4756                 filetype = btrfs_dir_type(node, di);
4757                 if (file_type != filetype)
4758                         goto next;
4759
4760                 if (len > BTRFS_NAME_LEN)
4761                         len = BTRFS_NAME_LEN;
4762
4763                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4764                 if (len != name_len || strncmp(namebuf, name, len))
4765                         goto next;
4766
4767                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4768                 *index_ret = key.offset;
4769                 ret = 0;
4770                 goto out;
4771 next:
4772                 len += sizeof(*di) + data_len;
4773                 di = (struct btrfs_dir_item *)((char *)di + len);
4774                 cur += len;
4775         }
4776         goto loop;
4777
4778 out:
4779         btrfs_release_path(&path);
4780         return ret;
4781 }
4782
4783 /*
4784  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4785  * INODE_REF/INODE_EXTREF match.
4786  *
4787  * @root:       the root of the fs/file tree
4788  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4789  *              value while find index
4790  * @location_key: location key of the struct btrfs_dir_item to match
4791  * @name:       the name to match
4792  * @namelen:    the length of name
4793  * @file_type:  the type of file to math
4794  *
4795  * Return 0 if no error occurred.
4796  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4797  * DIR_ITEM/DIR_INDEX
4798  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4799  * and DIR_ITEM/DIR_INDEX mismatch
4800  */
4801 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4802                          struct btrfs_key *location_key, char *name,
4803                          u32 namelen, u8 file_type)
4804 {
4805         struct btrfs_path path;
4806         struct extent_buffer *node;
4807         struct btrfs_dir_item *di;
4808         struct btrfs_key location;
4809         char namebuf[BTRFS_NAME_LEN] = {0};
4810         u32 total;
4811         u32 cur = 0;
4812         u32 len;
4813         u32 data_len;
4814         u8 filetype;
4815         int slot;
4816         int ret;
4817
4818         /* get the index by traversing all index */
4819         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4820                 ret = find_dir_index(root, key->objectid,
4821                                      location_key->objectid, &key->offset,
4822                                      name, namelen, file_type);
4823                 if (ret)
4824                         ret = DIR_INDEX_MISSING;
4825                 return ret;
4826         }
4827
4828         btrfs_init_path(&path);
4829         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4830         if (ret) {
4831                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4832                         DIR_INDEX_MISSING;
4833                 goto out;
4834         }
4835
4836         /* Check whether inode_id/filetype/name match */
4837         node = path.nodes[0];
4838         slot = path.slots[0];
4839         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4840         total = btrfs_item_size_nr(node, slot);
4841         while (cur < total) {
4842                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4843                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4844
4845                 len = btrfs_dir_name_len(node, di);
4846                 data_len = btrfs_dir_data_len(node, di);
4847
4848                 btrfs_dir_item_key_to_cpu(node, di, &location);
4849                 if (location.objectid != location_key->objectid ||
4850                     location.type != location_key->type ||
4851                     location.offset != location_key->offset)
4852                         goto next;
4853
4854                 filetype = btrfs_dir_type(node, di);
4855                 if (file_type != filetype)
4856                         goto next;
4857
4858                 if (len > BTRFS_NAME_LEN) {
4859                         len = BTRFS_NAME_LEN;
4860                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4861                         root->objectid,
4862                         key->type == BTRFS_DIR_ITEM_KEY ?
4863                         "DIR_ITEM" : "DIR_INDEX",
4864                         key->objectid, key->offset, len);
4865                 }
4866                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4867                                    len);
4868                 if (len != namelen || strncmp(namebuf, name, len))
4869                         goto next;
4870
4871                 ret = 0;
4872                 goto out;
4873 next:
4874                 len += sizeof(*di) + data_len;
4875                 di = (struct btrfs_dir_item *)((char *)di + len);
4876                 cur += len;
4877         }
4878
4879 out:
4880         btrfs_release_path(&path);
4881         return ret;
4882 }
4883
4884 /*
4885  * Prints inode ref error message
4886  */
4887 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4888                                 u64 index, const char *namebuf, int name_len,
4889                                 u8 filetype, int err)
4890 {
4891         if (!err)
4892                 return;
4893
4894         /* root dir error */
4895         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4896                 error(
4897         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4898                       root->objectid, key->objectid, key->offset, namebuf);
4899                 return;
4900         }
4901
4902         /* normal error */
4903         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4904                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4905                       root->objectid, key->offset,
4906                       btrfs_name_hash(namebuf, name_len),
4907                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4908                       namebuf, filetype);
4909         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4910                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4911                       root->objectid, key->offset, index,
4912                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4913                       namebuf, filetype);
4914 }
4915
4916 /*
4917  * Insert the missing inode item.
4918  *
4919  * Returns 0 means success.
4920  * Returns <0 means error.
4921  */
4922 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4923                                      u8 filetype)
4924 {
4925         struct btrfs_key key;
4926         struct btrfs_trans_handle *trans;
4927         struct btrfs_path path;
4928         int ret;
4929
4930         key.objectid = ino;
4931         key.type = BTRFS_INODE_ITEM_KEY;
4932         key.offset = 0;
4933
4934         btrfs_init_path(&path);
4935         trans = btrfs_start_transaction(root, 1);
4936         if (IS_ERR(trans)) {
4937                 ret = -EIO;
4938                 goto out;
4939         }
4940
4941         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4942         if (ret < 0 || !ret)
4943                 goto fail;
4944
4945         /* insert inode item */
4946         create_inode_item_lowmem(trans, root, ino, filetype);
4947         ret = 0;
4948 fail:
4949         btrfs_commit_transaction(trans, root);
4950 out:
4951         if (ret)
4952                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4953                       root->objectid, ino);
4954         btrfs_release_path(&path);
4955         return ret;
4956 }
4957
4958 /*
4959  * The ternary means dir item, dir index and relative inode ref.
4960  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4961  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4962  * strategy:
4963  * If two of three is missing or mismatched, delete the existing one.
4964  * If one of three is missing or mismatched, add the missing one.
4965  *
4966  * returns 0 means success.
4967  * returns not 0 means on error;
4968  */
4969 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4970                           u64 index, char *name, int name_len, u8 filetype,
4971                           int err)
4972 {
4973         struct btrfs_trans_handle *trans;
4974         int stage = 0;
4975         int ret = 0;
4976
4977         /*
4978          * stage shall be one of following valild values:
4979          *      0: Fine, nothing to do.
4980          *      1: One of three is wrong, so add missing one.
4981          *      2: Two of three is wrong, so delete existed one.
4982          */
4983         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4984                 stage++;
4985         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4986                 stage++;
4987         if (err & (INODE_REF_MISSING))
4988                 stage++;
4989
4990         /* stage must be smllarer than 3 */
4991         ASSERT(stage < 3);
4992
4993         trans = btrfs_start_transaction(root, 1);
4994         if (stage == 2) {
4995                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4996                                    name_len, 0);
4997                 goto out;
4998         }
4999         if (stage == 1) {
5000                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
5001                                filetype, &index, 1, 1);
5002                 goto out;
5003         }
5004 out:
5005         btrfs_commit_transaction(trans, root);
5006
5007         if (ret)
5008                 error("fail to repair inode %llu name %s filetype %u",
5009                       ino, name, filetype);
5010         else
5011                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
5012                        stage == 2 ? "Delete" : "Add",
5013                        ino, name, filetype);
5014
5015         return ret;
5016 }
5017
5018 /*
5019  * Traverse the given INODE_REF and call find_dir_item() to find related
5020  * DIR_ITEM/DIR_INDEX.
5021  *
5022  * @root:       the root of the fs/file tree
5023  * @ref_key:    the key of the INODE_REF
5024  * @path        the path provides node and slot
5025  * @refs:       the count of INODE_REF
5026  * @mode:       the st_mode of INODE_ITEM
5027  * @name_ret:   returns with the first ref's name
5028  * @name_len_ret:    len of the name_ret
5029  *
5030  * Return 0 if no error occurred.
5031  */
5032 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5033                            struct btrfs_path *path, char *name_ret,
5034                            u32 *namelen_ret, u64 *refs_ret, int mode)
5035 {
5036         struct btrfs_key key;
5037         struct btrfs_key location;
5038         struct btrfs_inode_ref *ref;
5039         struct extent_buffer *node;
5040         char namebuf[BTRFS_NAME_LEN] = {0};
5041         u32 total;
5042         u32 cur = 0;
5043         u32 len;
5044         u32 name_len;
5045         u64 index;
5046         int ret;
5047         int err = 0;
5048         int tmp_err;
5049         int slot;
5050         int need_research = 0;
5051         u64 refs;
5052
5053 begin:
5054         err = 0;
5055         cur = 0;
5056         refs = *refs_ret;
5057
5058         /* since after repair, path and the dir item may be changed */
5059         if (need_research) {
5060                 need_research = 0;
5061                 btrfs_release_path(path);
5062                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
5063                 /* the item was deleted, let path point to the last checked item */
5064                 if (ret > 0) {
5065                         if (path->slots[0] == 0)
5066                                 btrfs_prev_leaf(root, path);
5067                         else
5068                                 path->slots[0]--;
5069                 }
5070                 if (ret)
5071                         goto out;
5072         }
5073
5074         location.objectid = ref_key->objectid;
5075         location.type = BTRFS_INODE_ITEM_KEY;
5076         location.offset = 0;
5077         node = path->nodes[0];
5078         slot = path->slots[0];
5079
5080         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5081         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5082         total = btrfs_item_size_nr(node, slot);
5083
5084 next:
5085         /* Update inode ref count */
5086         refs++;
5087         tmp_err = 0;
5088         index = btrfs_inode_ref_index(node, ref);
5089         name_len = btrfs_inode_ref_name_len(node, ref);
5090
5091         if (name_len <= BTRFS_NAME_LEN) {
5092                 len = name_len;
5093         } else {
5094                 len = BTRFS_NAME_LEN;
5095                 warning("root %llu INODE_REF[%llu %llu] name too long",
5096                         root->objectid, ref_key->objectid, ref_key->offset);
5097         }
5098
5099         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
5100
5101         /* copy the first name found to name_ret */
5102         if (refs == 1 && name_ret) {
5103                 memcpy(name_ret, namebuf, len);
5104                 *namelen_ret = len;
5105         }
5106
5107         /* Check root dir ref */
5108         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
5109                 if (index != 0 || len != strlen("..") ||
5110                     strncmp("..", namebuf, len) ||
5111                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
5112                         /* set err bits then repair will delete the ref */
5113                         err |= DIR_INDEX_MISSING;
5114                         err |= DIR_ITEM_MISSING;
5115                 }
5116                 goto end;
5117         }
5118
5119         /* Find related DIR_INDEX */
5120         key.objectid = ref_key->offset;
5121         key.type = BTRFS_DIR_INDEX_KEY;
5122         key.offset = index;
5123         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5124                             imode_to_type(mode));
5125
5126         /* Find related dir_item */
5127         key.objectid = ref_key->offset;
5128         key.type = BTRFS_DIR_ITEM_KEY;
5129         key.offset = btrfs_name_hash(namebuf, len);
5130         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5131                             imode_to_type(mode));
5132 end:
5133         if (tmp_err && repair) {
5134                 ret = repair_ternary_lowmem(root, ref_key->offset,
5135                                             ref_key->objectid, index, namebuf,
5136                                             name_len, imode_to_type(mode),
5137                                             tmp_err);
5138                 if (!ret) {
5139                         need_research = 1;
5140                         goto begin;
5141                 }
5142         }
5143         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
5144                             imode_to_type(mode), tmp_err);
5145         err |= tmp_err;
5146         len = sizeof(*ref) + name_len;
5147         ref = (struct btrfs_inode_ref *)((char *)ref + len);
5148         cur += len;
5149         if (cur < total)
5150                 goto next;
5151
5152 out:
5153         *refs_ret = refs;
5154         return err;
5155 }
5156
5157 /*
5158  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
5159  * DIR_ITEM/DIR_INDEX.
5160  *
5161  * @root:       the root of the fs/file tree
5162  * @ref_key:    the key of the INODE_EXTREF
5163  * @refs:       the count of INODE_EXTREF
5164  * @mode:       the st_mode of INODE_ITEM
5165  *
5166  * Return 0 if no error occurred.
5167  */
5168 static int check_inode_extref(struct btrfs_root *root,
5169                               struct btrfs_key *ref_key,
5170                               struct extent_buffer *node, int slot, u64 *refs,
5171                               int mode)
5172 {
5173         struct btrfs_key key;
5174         struct btrfs_key location;
5175         struct btrfs_inode_extref *extref;
5176         char namebuf[BTRFS_NAME_LEN] = {0};
5177         u32 total;
5178         u32 cur = 0;
5179         u32 len;
5180         u32 name_len;
5181         u64 index;
5182         u64 parent;
5183         int ret;
5184         int err = 0;
5185
5186         location.objectid = ref_key->objectid;
5187         location.type = BTRFS_INODE_ITEM_KEY;
5188         location.offset = 0;
5189
5190         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5191         total = btrfs_item_size_nr(node, slot);
5192
5193 next:
5194         /* update inode ref count */
5195         (*refs)++;
5196         name_len = btrfs_inode_extref_name_len(node, extref);
5197         index = btrfs_inode_extref_index(node, extref);
5198         parent = btrfs_inode_extref_parent(node, extref);
5199         if (name_len <= BTRFS_NAME_LEN) {
5200                 len = name_len;
5201         } else {
5202                 len = BTRFS_NAME_LEN;
5203                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
5204                         root->objectid, ref_key->objectid, ref_key->offset);
5205         }
5206         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
5207
5208         /* Check root dir ref name */
5209         if (index == 0 && strncmp(namebuf, "..", name_len)) {
5210                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
5211                       root->objectid, ref_key->objectid, ref_key->offset,
5212                       namebuf);
5213                 err |= ROOT_DIR_ERROR;
5214         }
5215
5216         /* find related dir_index */
5217         key.objectid = parent;
5218         key.type = BTRFS_DIR_INDEX_KEY;
5219         key.offset = index;
5220         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5221         err |= ret;
5222
5223         /* find related dir_item */
5224         key.objectid = parent;
5225         key.type = BTRFS_DIR_ITEM_KEY;
5226         key.offset = btrfs_name_hash(namebuf, len);
5227         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5228         err |= ret;
5229
5230         len = sizeof(*extref) + name_len;
5231         extref = (struct btrfs_inode_extref *)((char *)extref + len);
5232         cur += len;
5233
5234         if (cur < total)
5235                 goto next;
5236
5237         return err;
5238 }
5239
5240 /*
5241  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
5242  * DIR_ITEM/DIR_INDEX match.
5243  * Return with @index_ret.
5244  *
5245  * @root:       the root of the fs/file tree
5246  * @key:        the key of the INODE_REF/INODE_EXTREF
5247  * @name:       the name in the INODE_REF/INODE_EXTREF
5248  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
5249  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
5250  *              value (64)-1 means do not check index
5251  * @ext_ref:    the EXTENDED_IREF feature
5252  *
5253  * Return 0 if no error occurred.
5254  * Return >0 for error bitmap
5255  */
5256 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
5257                           char *name, int namelen, u64 *index_ret,
5258                           unsigned int ext_ref)
5259 {
5260         struct btrfs_path path;
5261         struct btrfs_inode_ref *ref;
5262         struct btrfs_inode_extref *extref;
5263         struct extent_buffer *node;
5264         char ref_namebuf[BTRFS_NAME_LEN] = {0};
5265         u32 total;
5266         u32 cur = 0;
5267         u32 len;
5268         u32 ref_namelen;
5269         u64 ref_index;
5270         u64 parent;
5271         u64 dir_id;
5272         int slot;
5273         int ret;
5274
5275         ASSERT(index_ret);
5276
5277         btrfs_init_path(&path);
5278         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5279         if (ret) {
5280                 ret = INODE_REF_MISSING;
5281                 goto extref;
5282         }
5283
5284         node = path.nodes[0];
5285         slot = path.slots[0];
5286
5287         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5288         total = btrfs_item_size_nr(node, slot);
5289
5290         /* Iterate all entry of INODE_REF */
5291         while (cur < total) {
5292                 ret = INODE_REF_MISSING;
5293
5294                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
5295                 ref_index = btrfs_inode_ref_index(node, ref);
5296                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5297                         goto next_ref;
5298
5299                 if (cur + sizeof(*ref) + ref_namelen > total ||
5300                     ref_namelen > BTRFS_NAME_LEN) {
5301                         warning("root %llu INODE %s[%llu %llu] name too long",
5302                                 root->objectid,
5303                                 key->type == BTRFS_INODE_REF_KEY ?
5304                                         "REF" : "EXTREF",
5305                                 key->objectid, key->offset);
5306
5307                         if (cur + sizeof(*ref) > total)
5308                                 break;
5309                         len = min_t(u32, total - cur - sizeof(*ref),
5310                                     BTRFS_NAME_LEN);
5311                 } else {
5312                         len = ref_namelen;
5313                 }
5314
5315                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
5316                                    len);
5317
5318                 if (len != namelen || strncmp(ref_namebuf, name, len))
5319                         goto next_ref;
5320
5321                 *index_ret = ref_index;
5322                 ret = 0;
5323                 goto out;
5324 next_ref:
5325                 len = sizeof(*ref) + ref_namelen;
5326                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5327                 cur += len;
5328         }
5329
5330 extref:
5331         /* Skip if not support EXTENDED_IREF feature */
5332         if (!ext_ref)
5333                 goto out;
5334
5335         btrfs_release_path(&path);
5336         btrfs_init_path(&path);
5337
5338         dir_id = key->offset;
5339         key->type = BTRFS_INODE_EXTREF_KEY;
5340         key->offset = btrfs_extref_hash(dir_id, name, namelen);
5341
5342         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5343         if (ret) {
5344                 ret = INODE_REF_MISSING;
5345                 goto out;
5346         }
5347
5348         node = path.nodes[0];
5349         slot = path.slots[0];
5350
5351         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5352         cur = 0;
5353         total = btrfs_item_size_nr(node, slot);
5354
5355         /* Iterate all entry of INODE_EXTREF */
5356         while (cur < total) {
5357                 ret = INODE_REF_MISSING;
5358
5359                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5360                 ref_index = btrfs_inode_extref_index(node, extref);
5361                 parent = btrfs_inode_extref_parent(node, extref);
5362                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5363                         goto next_extref;
5364
5365                 if (parent != dir_id)
5366                         goto next_extref;
5367
5368                 if (ref_namelen <= BTRFS_NAME_LEN) {
5369                         len = ref_namelen;
5370                 } else {
5371                         len = BTRFS_NAME_LEN;
5372                         warning("root %llu INODE %s[%llu %llu] name too long",
5373                                 root->objectid,
5374                                 key->type == BTRFS_INODE_REF_KEY ?
5375                                         "REF" : "EXTREF",
5376                                 key->objectid, key->offset);
5377                 }
5378                 read_extent_buffer(node, ref_namebuf,
5379                                    (unsigned long)(extref + 1), len);
5380
5381                 if (len != namelen || strncmp(ref_namebuf, name, len))
5382                         goto next_extref;
5383
5384                 *index_ret = ref_index;
5385                 ret = 0;
5386                 goto out;
5387
5388 next_extref:
5389                 len = sizeof(*extref) + ref_namelen;
5390                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5391                 cur += len;
5392
5393         }
5394 out:
5395         btrfs_release_path(&path);
5396         return ret;
5397 }
5398
5399 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5400                                u64 ino, u64 index, const char *namebuf,
5401                                int name_len, u8 filetype, int err)
5402 {
5403         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5404                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5405                       root->objectid, key->objectid, key->offset, namebuf,
5406                       filetype,
5407                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5408         }
5409
5410         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5411                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5412                       root->objectid, key->objectid, index, namebuf, filetype,
5413                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5414         }
5415
5416         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5417                 error(
5418                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5419                       root->objectid, ino, index, namebuf, filetype,
5420                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5421         }
5422
5423         if (err & INODE_REF_MISSING)
5424                 error(
5425                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5426                       root->objectid, ino, key->objectid, namebuf, filetype);
5427
5428 }
5429
5430 /*
5431  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5432  *
5433  * Returns error after repair
5434  */
5435 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5436                            u64 index, u8 filetype, char *namebuf, u32 name_len,
5437                            int err)
5438 {
5439         int ret;
5440
5441         if (err & INODE_ITEM_MISSING) {
5442                 ret = repair_inode_item_missing(root, ino, filetype);
5443                 if (!ret)
5444                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5445         }
5446
5447         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5448                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5449                                             name_len, filetype, err);
5450                 if (!ret) {
5451                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5452                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5453                         err &= ~(INODE_REF_MISSING);
5454                 }
5455         }
5456         return err;
5457 }
5458
5459 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5460                 u64 *size_ret)
5461 {
5462         struct btrfs_key key;
5463         struct btrfs_path path;
5464         u32 len;
5465         struct btrfs_dir_item *di;
5466         int ret;
5467         int cur = 0;
5468         int total = 0;
5469
5470         ASSERT(size_ret);
5471         *size_ret = 0;
5472
5473         key.objectid = ino;
5474         key.type = type;
5475         key.offset = (u64)-1;
5476
5477         btrfs_init_path(&path);
5478         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5479         if (ret < 0) {
5480                 ret = -EIO;
5481                 goto out;
5482         }
5483         /* if found, go to spacial case */
5484         if (ret == 0)
5485                 goto special_case;
5486
5487 loop:
5488         ret = btrfs_previous_item(root, &path, ino, type);
5489
5490         if (ret) {
5491                 ret = 0;
5492                 goto out;
5493         }
5494
5495 special_case:
5496         di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5497         cur = 0;
5498         total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5499
5500         while (cur < total) {
5501                 len = btrfs_dir_name_len(path.nodes[0], di);
5502                 if (len > BTRFS_NAME_LEN)
5503                         len = BTRFS_NAME_LEN;
5504                 *size_ret += len;
5505
5506                 len += btrfs_dir_data_len(path.nodes[0], di);
5507                 len += sizeof(*di);
5508                 di = (struct btrfs_dir_item *)((char *)di + len);
5509                 cur += len;
5510         }
5511         goto loop;
5512
5513 out:
5514         btrfs_release_path(&path);
5515         return ret;
5516 }
5517
5518 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5519 {
5520         u64 item_size;
5521         u64 index_size;
5522         int ret;
5523
5524         ASSERT(size);
5525         ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5526         if (ret)
5527                 goto out;
5528
5529         ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5530         if (ret)
5531                 goto out;
5532
5533         *size = item_size + index_size;
5534
5535 out:
5536         if (ret)
5537                 error("failed to count root %llu INODE[%llu] root size",
5538                       root->objectid, ino);
5539         return ret;
5540 }
5541
5542 /*
5543  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5544  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5545  *
5546  * @root:       the root of the fs/file tree
5547  * @key:        the key of the INODE_REF/INODE_EXTREF
5548  * @path:       the path
5549  * @size:       the st_size of the INODE_ITEM
5550  * @ext_ref:    the EXTENDED_IREF feature
5551  *
5552  * Return 0 if no error occurred.
5553  * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5554  */
5555 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5556                           struct btrfs_path *path, u64 *size,
5557                           unsigned int ext_ref)
5558 {
5559         struct btrfs_dir_item *di;
5560         struct btrfs_inode_item *ii;
5561         struct btrfs_key key;
5562         struct btrfs_key location;
5563         struct extent_buffer *node;
5564         int slot;
5565         char namebuf[BTRFS_NAME_LEN] = {0};
5566         u32 total;
5567         u32 cur = 0;
5568         u32 len;
5569         u32 name_len;
5570         u32 data_len;
5571         u8 filetype;
5572         u32 mode = 0;
5573         u64 index;
5574         int ret;
5575         int err;
5576         int tmp_err;
5577         int need_research = 0;
5578
5579         /*
5580          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5581          * ignore index check.
5582          */
5583         if (di_key->type == BTRFS_DIR_INDEX_KEY)
5584                 index = di_key->offset;
5585         else
5586                 index = (u64)-1;
5587 begin:
5588         err = 0;
5589         cur = 0;
5590
5591         /* since after repair, path and the dir item may be changed */
5592         if (need_research) {
5593                 need_research = 0;
5594                 err |= DIR_COUNT_AGAIN;
5595                 btrfs_release_path(path);
5596                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5597                 /* the item was deleted, let path point the last checked item */
5598                 if (ret > 0) {
5599                         if (path->slots[0] == 0)
5600                                 btrfs_prev_leaf(root, path);
5601                         else
5602                                 path->slots[0]--;
5603                 }
5604                 if (ret)
5605                         goto out;
5606         }
5607
5608         node = path->nodes[0];
5609         slot = path->slots[0];
5610
5611         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5612         total = btrfs_item_size_nr(node, slot);
5613         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5614
5615         while (cur < total) {
5616                 data_len = btrfs_dir_data_len(node, di);
5617                 tmp_err = 0;
5618                 if (data_len)
5619                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5620                               root->objectid,
5621               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5622                               di_key->objectid, di_key->offset, data_len);
5623
5624                 name_len = btrfs_dir_name_len(node, di);
5625                 if (name_len <= BTRFS_NAME_LEN) {
5626                         len = name_len;
5627                 } else {
5628                         len = BTRFS_NAME_LEN;
5629                         warning("root %llu %s[%llu %llu] name too long",
5630                                 root->objectid,
5631                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5632                                 di_key->objectid, di_key->offset);
5633                 }
5634                 (*size) += name_len;
5635                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5636                                    len);
5637                 filetype = btrfs_dir_type(node, di);
5638
5639                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5640                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5641                         err |= -EIO;
5642                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5643                         root->objectid, di_key->objectid, di_key->offset,
5644                         namebuf, len, filetype, di_key->offset,
5645                         btrfs_name_hash(namebuf, len));
5646                 }
5647
5648                 btrfs_dir_item_key_to_cpu(node, di, &location);
5649                 /* Ignore related ROOT_ITEM check */
5650                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5651                         goto next;
5652
5653                 btrfs_release_path(path);
5654                 /* Check relative INODE_ITEM(existence/filetype) */
5655                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5656                 if (ret) {
5657                         tmp_err |= INODE_ITEM_MISSING;
5658                         goto next;
5659                 }
5660
5661                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5662                                     struct btrfs_inode_item);
5663                 mode = btrfs_inode_mode(path->nodes[0], ii);
5664                 if (imode_to_type(mode) != filetype) {
5665                         tmp_err |= INODE_ITEM_MISMATCH;
5666                         goto next;
5667                 }
5668
5669                 /* Check relative INODE_REF/INODE_EXTREF */
5670                 key.objectid = location.objectid;
5671                 key.type = BTRFS_INODE_REF_KEY;
5672                 key.offset = di_key->objectid;
5673                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5674                                           &index, ext_ref);
5675
5676                 /* check relative INDEX/ITEM */
5677                 key.objectid = di_key->objectid;
5678                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5679                         key.type = BTRFS_DIR_INDEX_KEY;
5680                         key.offset = index;
5681                 } else {
5682                         key.type = BTRFS_DIR_ITEM_KEY;
5683                         key.offset = btrfs_name_hash(namebuf, name_len);
5684                 }
5685
5686                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5687                                          name_len, filetype);
5688                 /* find_dir_item may find index */
5689                 if (key.type == BTRFS_DIR_INDEX_KEY)
5690                         index = key.offset;
5691 next:
5692
5693                 if (tmp_err && repair) {
5694                         ret = repair_dir_item(root, di_key->objectid,
5695                                               location.objectid, index,
5696                                               imode_to_type(mode), namebuf,
5697                                               name_len, tmp_err);
5698                         if (ret != tmp_err) {
5699                                 need_research = 1;
5700                                 goto begin;
5701                         }
5702                 }
5703                 btrfs_release_path(path);
5704                 print_dir_item_err(root, di_key, location.objectid, index,
5705                                    namebuf, name_len, filetype, tmp_err);
5706                 err |= tmp_err;
5707                 len = sizeof(*di) + name_len + data_len;
5708                 di = (struct btrfs_dir_item *)((char *)di + len);
5709                 cur += len;
5710
5711                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5712                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5713                               root->objectid, di_key->objectid,
5714                               di_key->offset);
5715                         break;
5716                 }
5717         }
5718 out:
5719         /* research path */
5720         btrfs_release_path(path);
5721         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5722         if (ret)
5723                 err |= ret > 0 ? -ENOENT : ret;
5724         return err;
5725 }
5726
5727 /*
5728  * Wrapper function of btrfs_punch_hole.
5729  *
5730  * Returns 0 means success.
5731  * Returns not 0 means error.
5732  */
5733 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5734                              u64 len)
5735 {
5736         struct btrfs_trans_handle *trans;
5737         int ret = 0;
5738
5739         trans = btrfs_start_transaction(root, 1);
5740         if (IS_ERR(trans))
5741                 return PTR_ERR(trans);
5742
5743         ret = btrfs_punch_hole(trans, root, ino, start, len);
5744         if (ret)
5745                 error("failed to add hole [%llu, %llu] in inode [%llu]",
5746                       start, len, ino);
5747         else
5748                 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5749                        ino);
5750
5751         btrfs_commit_transaction(trans, root);
5752         return ret;
5753 }
5754
5755 /*
5756  * Check file extent datasum/hole, update the size of the file extents,
5757  * check and update the last offset of the file extent.
5758  *
5759  * @root:       the root of fs/file tree.
5760  * @fkey:       the key of the file extent.
5761  * @nodatasum:  INODE_NODATASUM feature.
5762  * @size:       the sum of all EXTENT_DATA items size for this inode.
5763  * @end:        the offset of the last extent.
5764  *
5765  * Return 0 if no error occurred.
5766  */
5767 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5768                              struct extent_buffer *node, int slot,
5769                              unsigned int nodatasum, u64 *size, u64 *end)
5770 {
5771         struct btrfs_file_extent_item *fi;
5772         u64 disk_bytenr;
5773         u64 disk_num_bytes;
5774         u64 extent_num_bytes;
5775         u64 extent_offset;
5776         u64 csum_found;         /* In byte size, sectorsize aligned */
5777         u64 search_start;       /* Logical range start we search for csum */
5778         u64 search_len;         /* Logical range len we search for csum */
5779         unsigned int extent_type;
5780         unsigned int is_hole;
5781         int compressed = 0;
5782         int ret;
5783         int err = 0;
5784
5785         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5786
5787         /* Check inline extent */
5788         extent_type = btrfs_file_extent_type(node, fi);
5789         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5790                 struct btrfs_item *e = btrfs_item_nr(slot);
5791                 u32 item_inline_len;
5792
5793                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5794                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5795                 compressed = btrfs_file_extent_compression(node, fi);
5796                 if (extent_num_bytes == 0) {
5797                         error(
5798                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5799                                 root->objectid, fkey->objectid, fkey->offset);
5800                         err |= FILE_EXTENT_ERROR;
5801                 }
5802                 if (!compressed && extent_num_bytes != item_inline_len) {
5803                         error(
5804                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5805                                 root->objectid, fkey->objectid, fkey->offset,
5806                                 extent_num_bytes, item_inline_len);
5807                         err |= FILE_EXTENT_ERROR;
5808                 }
5809                 *end += extent_num_bytes;
5810                 *size += extent_num_bytes;
5811                 return err;
5812         }
5813
5814         /* Check extent type */
5815         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5816                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5817                 err |= FILE_EXTENT_ERROR;
5818                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5819                       root->objectid, fkey->objectid, fkey->offset);
5820                 return err;
5821         }
5822
5823         /* Check REG_EXTENT/PREALLOC_EXTENT */
5824         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5825         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5826         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5827         extent_offset = btrfs_file_extent_offset(node, fi);
5828         compressed = btrfs_file_extent_compression(node, fi);
5829         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5830
5831         /*
5832          * Check EXTENT_DATA csum
5833          *
5834          * For plain (uncompressed) extent, we should only check the range
5835          * we're referring to, as it's possible that part of prealloc extent
5836          * has been written, and has csum:
5837          *
5838          * |<--- Original large preallocated extent A ---->|
5839          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5840          *      No csum                         Has csum
5841          *
5842          * For compressed extent, we should check the whole range.
5843          */
5844         if (!compressed) {
5845                 search_start = disk_bytenr + extent_offset;
5846                 search_len = extent_num_bytes;
5847         } else {
5848                 search_start = disk_bytenr;
5849                 search_len = disk_num_bytes;
5850         }
5851         ret = count_csum_range(root, search_start, search_len, &csum_found);
5852         if (csum_found > 0 && nodatasum) {
5853                 err |= ODD_CSUM_ITEM;
5854                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5855                       root->objectid, fkey->objectid, fkey->offset);
5856         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5857                    !is_hole && (ret < 0 || csum_found < search_len)) {
5858                 err |= CSUM_ITEM_MISSING;
5859                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5860                       root->objectid, fkey->objectid, fkey->offset,
5861                       csum_found, search_len);
5862         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5863                 err |= ODD_CSUM_ITEM;
5864                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5865                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5866         }
5867
5868         /* Check EXTENT_DATA hole */
5869         if (!no_holes && *end != fkey->offset) {
5870                 if (repair)
5871                         ret = punch_extent_hole(root, fkey->objectid,
5872                                                 *end, fkey->offset - *end);
5873                 if (!repair || ret) {
5874                         err |= FILE_EXTENT_ERROR;
5875                         error(
5876 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
5877                                 root->objectid, fkey->objectid, fkey->offset,
5878                                 fkey->objectid, *end);
5879                 }
5880         }
5881
5882         *end += extent_num_bytes;
5883         if (!is_hole)
5884                 *size += extent_num_bytes;
5885
5886         return err;
5887 }
5888
5889 /*
5890  * Set inode item nbytes to @nbytes
5891  *
5892  * Returns  0     on success
5893  * Returns  != 0  on error
5894  */
5895 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5896                                       struct btrfs_path *path,
5897                                       u64 ino, u64 nbytes)
5898 {
5899         struct btrfs_trans_handle *trans;
5900         struct btrfs_inode_item *ii;
5901         struct btrfs_key key;
5902         struct btrfs_key research_key;
5903         int err = 0;
5904         int ret;
5905
5906         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5907
5908         key.objectid = ino;
5909         key.type = BTRFS_INODE_ITEM_KEY;
5910         key.offset = 0;
5911
5912         trans = btrfs_start_transaction(root, 1);
5913         if (IS_ERR(trans)) {
5914                 ret = PTR_ERR(trans);
5915                 err |= ret;
5916                 goto out;
5917         }
5918
5919         btrfs_release_path(path);
5920         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5921         if (ret > 0)
5922                 ret = -ENOENT;
5923         if (ret) {
5924                 err |= ret;
5925                 goto fail;
5926         }
5927
5928         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5929                             struct btrfs_inode_item);
5930         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5931         btrfs_mark_buffer_dirty(path->nodes[0]);
5932 fail:
5933         btrfs_commit_transaction(trans, root);
5934 out:
5935         if (ret)
5936                 error("failed to set nbytes in inode %llu root %llu",
5937                       ino, root->root_key.objectid);
5938         else
5939                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5940                        root->root_key.objectid, nbytes);
5941
5942         /* research path */
5943         btrfs_release_path(path);
5944         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5945         err |= ret;
5946
5947         return err;
5948 }
5949
5950 /*
5951  * Set directory inode isize to @isize.
5952  *
5953  * Returns 0     on success.
5954  * Returns != 0  on error.
5955  */
5956 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5957                                    struct btrfs_path *path,
5958                                    u64 ino, u64 isize)
5959 {
5960         struct btrfs_trans_handle *trans;
5961         struct btrfs_inode_item *ii;
5962         struct btrfs_key key;
5963         struct btrfs_key research_key;
5964         int ret;
5965         int err = 0;
5966
5967         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5968
5969         key.objectid = ino;
5970         key.type = BTRFS_INODE_ITEM_KEY;
5971         key.offset = 0;
5972
5973         trans = btrfs_start_transaction(root, 1);
5974         if (IS_ERR(trans)) {
5975                 ret = PTR_ERR(trans);
5976                 err |= ret;
5977                 goto out;
5978         }
5979
5980         btrfs_release_path(path);
5981         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5982         if (ret > 0)
5983                 ret = -ENOENT;
5984         if (ret) {
5985                 err |= ret;
5986                 goto fail;
5987         }
5988
5989         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5990                             struct btrfs_inode_item);
5991         btrfs_set_inode_size(path->nodes[0], ii, isize);
5992         btrfs_mark_buffer_dirty(path->nodes[0]);
5993 fail:
5994         btrfs_commit_transaction(trans, root);
5995 out:
5996         if (ret)
5997                 error("failed to set isize in inode %llu root %llu",
5998                       ino, root->root_key.objectid);
5999         else
6000                 printf("Set isize in inode %llu root %llu to %llu\n",
6001                        ino, root->root_key.objectid, isize);
6002
6003         btrfs_release_path(path);
6004         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6005         err |= ret;
6006
6007         return err;
6008 }
6009
6010 /*
6011  * Wrapper function for btrfs_add_orphan_item().
6012  *
6013  * Returns 0     on success.
6014  * Returns != 0  on error.
6015  */
6016 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
6017                                            struct btrfs_path *path, u64 ino)
6018 {
6019         struct btrfs_trans_handle *trans;
6020         struct btrfs_key research_key;
6021         int ret;
6022         int err = 0;
6023
6024         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
6025
6026         trans = btrfs_start_transaction(root, 1);
6027         if (IS_ERR(trans)) {
6028                 ret = PTR_ERR(trans);
6029                 err |= ret;
6030                 goto out;
6031         }
6032
6033         btrfs_release_path(path);
6034         ret = btrfs_add_orphan_item(trans, root, path, ino);
6035         err |= ret;
6036         btrfs_commit_transaction(trans, root);
6037 out:
6038         if (ret)
6039                 error("failed to add inode %llu as orphan item root %llu",
6040                       ino, root->root_key.objectid);
6041         else
6042                 printf("Added inode %llu as orphan item root %llu\n",
6043                        ino, root->root_key.objectid);
6044
6045         btrfs_release_path(path);
6046         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6047         err |= ret;
6048
6049         return err;
6050 }
6051
6052 /* Set inode_item nlink to @ref_count.
6053  * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
6054  *
6055  * Returns 0 on success
6056  */
6057 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
6058                                       struct btrfs_path *path, u64 ino,
6059                                       const char *name, u32 namelen,
6060                                       u64 ref_count, u8 filetype, u64 *nlink)
6061 {
6062         struct btrfs_trans_handle *trans;
6063         struct btrfs_inode_item *ii;
6064         struct btrfs_key key;
6065         struct btrfs_key old_key;
6066         char namebuf[BTRFS_NAME_LEN] = {0};
6067         int name_len;
6068         int ret;
6069         int ret2;
6070
6071         /* save the key */
6072         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
6073
6074         if (name && namelen) {
6075                 ASSERT(namelen <= BTRFS_NAME_LEN);
6076                 memcpy(namebuf, name, namelen);
6077                 name_len = namelen;
6078         } else {
6079                 sprintf(namebuf, "%llu", ino);
6080                 name_len = count_digits(ino);
6081                 printf("Can't find file name for inode %llu, use %s instead\n",
6082                        ino, namebuf);
6083         }
6084
6085         trans = btrfs_start_transaction(root, 1);
6086         if (IS_ERR(trans)) {
6087                 ret = PTR_ERR(trans);
6088                 goto out;
6089         }
6090
6091         btrfs_release_path(path);
6092         /* if refs is 0, put it into lostfound */
6093         if (ref_count == 0) {
6094                 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
6095                                               name_len, filetype, &ref_count);
6096                 if (ret)
6097                         goto fail;
6098         }
6099
6100         /* reset inode_item's nlink to ref_count */
6101         key.objectid = ino;
6102         key.type = BTRFS_INODE_ITEM_KEY;
6103         key.offset = 0;
6104
6105         btrfs_release_path(path);
6106         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6107         if (ret > 0)
6108                 ret = -ENOENT;
6109         if (ret)
6110                 goto fail;
6111
6112         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
6113                             struct btrfs_inode_item);
6114         btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
6115         btrfs_mark_buffer_dirty(path->nodes[0]);
6116
6117         if (nlink)
6118                 *nlink = ref_count;
6119 fail:
6120         btrfs_commit_transaction(trans, root);
6121 out:
6122         if (ret)
6123                 error(
6124         "fail to repair nlink of inode %llu root %llu name %s filetype %u",
6125                        root->objectid, ino, namebuf, filetype);
6126         else
6127                 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
6128                        root->objectid, ino, namebuf, filetype);
6129
6130         /* research */
6131         btrfs_release_path(path);
6132         ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
6133         if (ret2 < 0)
6134                 return ret |= ret2;
6135         return ret;
6136 }
6137
6138 /*
6139  * Check INODE_ITEM and related ITEMs (the same inode number)
6140  * 1. check link count
6141  * 2. check inode ref/extref
6142  * 3. check dir item/index
6143  *
6144  * @ext_ref:    the EXTENDED_IREF feature
6145  *
6146  * Return 0 if no error occurred.
6147  * Return >0 for error or hit the traversal is done(by error bitmap)
6148  */
6149 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
6150                             unsigned int ext_ref)
6151 {
6152         struct extent_buffer *node;
6153         struct btrfs_inode_item *ii;
6154         struct btrfs_key key;
6155         struct btrfs_key last_key;
6156         u64 inode_id;
6157         u32 mode;
6158         u64 nlink;
6159         u64 nbytes;
6160         u64 isize;
6161         u64 size = 0;
6162         u64 refs = 0;
6163         u64 extent_end = 0;
6164         u64 extent_size = 0;
6165         unsigned int dir;
6166         unsigned int nodatasum;
6167         int slot;
6168         int ret;
6169         int err = 0;
6170         char namebuf[BTRFS_NAME_LEN] = {0};
6171         u32 name_len = 0;
6172
6173         node = path->nodes[0];
6174         slot = path->slots[0];
6175
6176         btrfs_item_key_to_cpu(node, &key, slot);
6177         inode_id = key.objectid;
6178
6179         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
6180                 ret = btrfs_next_item(root, path);
6181                 if (ret > 0)
6182                         err |= LAST_ITEM;
6183                 return err;
6184         }
6185
6186         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
6187         isize = btrfs_inode_size(node, ii);
6188         nbytes = btrfs_inode_nbytes(node, ii);
6189         mode = btrfs_inode_mode(node, ii);
6190         dir = imode_to_type(mode) == BTRFS_FT_DIR;
6191         nlink = btrfs_inode_nlink(node, ii);
6192         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
6193
6194         while (1) {
6195                 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
6196                 ret = btrfs_next_item(root, path);
6197                 if (ret < 0) {
6198                         /* out will fill 'err' rusing current statistics */
6199                         goto out;
6200                 } else if (ret > 0) {
6201                         err |= LAST_ITEM;
6202                         goto out;
6203                 }
6204
6205                 node = path->nodes[0];
6206                 slot = path->slots[0];
6207                 btrfs_item_key_to_cpu(node, &key, slot);
6208                 if (key.objectid != inode_id)
6209                         goto out;
6210
6211                 switch (key.type) {
6212                 case BTRFS_INODE_REF_KEY:
6213                         ret = check_inode_ref(root, &key, path, namebuf,
6214                                               &name_len, &refs, mode);
6215                         err |= ret;
6216                         break;
6217                 case BTRFS_INODE_EXTREF_KEY:
6218                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
6219                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
6220                                         root->objectid, key.objectid,
6221                                         key.offset);
6222                         ret = check_inode_extref(root, &key, node, slot, &refs,
6223                                                  mode);
6224                         err |= ret;
6225                         break;
6226                 case BTRFS_DIR_ITEM_KEY:
6227                 case BTRFS_DIR_INDEX_KEY:
6228                         if (!dir) {
6229                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
6230                                         root->objectid, inode_id,
6231                                         imode_to_type(mode), key.objectid,
6232                                         key.offset);
6233                         }
6234                         ret = check_dir_item(root, &key, path, &size, ext_ref);
6235                         err |= ret;
6236                         break;
6237                 case BTRFS_EXTENT_DATA_KEY:
6238                         if (dir) {
6239                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
6240                                         root->objectid, inode_id, key.objectid,
6241                                         key.offset);
6242                         }
6243                         ret = check_file_extent(root, &key, node, slot,
6244                                                 nodatasum, &extent_size,
6245                                                 &extent_end);
6246                         err |= ret;
6247                         break;
6248                 case BTRFS_XATTR_ITEM_KEY:
6249                         break;
6250                 default:
6251                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
6252                               key.objectid, key.type, key.offset);
6253                 }
6254         }
6255
6256 out:
6257         if (err & LAST_ITEM) {
6258                 btrfs_release_path(path);
6259                 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
6260                 if (ret)
6261                         return err;
6262         }
6263
6264         /* verify INODE_ITEM nlink/isize/nbytes */
6265         if (dir) {
6266                 if (repair && (err & DIR_COUNT_AGAIN)) {
6267                         err &= ~DIR_COUNT_AGAIN;
6268                         count_dir_isize(root, inode_id, &size);
6269                 }
6270
6271                 if ((nlink != 1 || refs != 1) && repair) {
6272                         ret = repair_inode_nlinks_lowmem(root, path, inode_id,
6273                                 namebuf, name_len, refs, imode_to_type(mode),
6274                                 &nlink);
6275                 }
6276
6277                 if (nlink != 1) {
6278                         err |= LINK_COUNT_ERROR;
6279                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
6280                               root->objectid, inode_id, nlink);
6281                 }
6282
6283                 /*
6284                  * Just a warning, as dir inode nbytes is just an
6285                  * instructive value.
6286                  */
6287                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
6288                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
6289                                 root->objectid, inode_id,
6290                                 root->fs_info->nodesize);
6291                 }
6292
6293                 if (isize != size) {
6294                         if (repair)
6295                                 ret = repair_dir_isize_lowmem(root, path,
6296                                                               inode_id, size);
6297                         if (!repair || ret) {
6298                                 err |= ISIZE_ERROR;
6299                                 error(
6300                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
6301                                       root->objectid, inode_id, isize, size);
6302                         }
6303                 }
6304         } else {
6305                 if (nlink != refs) {
6306                         if (repair)
6307                                 ret = repair_inode_nlinks_lowmem(root, path,
6308                                          inode_id, namebuf, name_len, refs,
6309                                          imode_to_type(mode), &nlink);
6310                         if (!repair || ret) {
6311                                 err |= LINK_COUNT_ERROR;
6312                                 error(
6313                 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
6314                                       root->objectid, inode_id, nlink, refs);
6315                         }
6316                 } else if (!nlink) {
6317                         if (repair)
6318                                 ret = repair_inode_orphan_item_lowmem(root,
6319                                                               path, inode_id);
6320                         if (!repair || ret) {
6321                                 err |= ORPHAN_ITEM;
6322                                 error("root %llu INODE[%llu] is orphan item",
6323                                       root->objectid, inode_id);
6324                         }
6325                 }
6326
6327                 if (!nbytes && !no_holes && extent_end < isize) {
6328                         if (repair)
6329                                 ret = punch_extent_hole(root, inode_id,
6330                                                 extent_end, isize - extent_end);
6331                         if (!repair || ret) {
6332                                 err |= NBYTES_ERROR;
6333                                 error(
6334         "root %llu INODE[%llu] size %llu should have a file extent hole",
6335                                       root->objectid, inode_id, isize);
6336                         }
6337                 }
6338
6339                 if (nbytes != extent_size) {
6340                         if (repair)
6341                                 ret = repair_inode_nbytes_lowmem(root, path,
6342                                                          inode_id, extent_size);
6343                         if (!repair || ret) {
6344                                 err |= NBYTES_ERROR;
6345                                 error(
6346         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6347                                       root->objectid, inode_id, nbytes,
6348                                       extent_size);
6349                         }
6350                 }
6351         }
6352
6353         if (err & LAST_ITEM)
6354                 btrfs_next_item(root, path);
6355         return err;
6356 }
6357
6358 /*
6359  * Insert the missing inode item and inode ref.
6360  *
6361  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6362  * Root dir should be handled specially because root dir is the root of fs.
6363  *
6364  * returns err (>0 or 0) after repair
6365  */
6366 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6367 {
6368         struct btrfs_trans_handle *trans;
6369         struct btrfs_key key;
6370         struct btrfs_path path;
6371         int filetype = BTRFS_FT_DIR;
6372         int ret = 0;
6373
6374         btrfs_init_path(&path);
6375
6376         if (err & INODE_REF_MISSING) {
6377                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6378                 key.type = BTRFS_INODE_REF_KEY;
6379                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6380
6381                 trans = btrfs_start_transaction(root, 1);
6382                 if (IS_ERR(trans)) {
6383                         ret = PTR_ERR(trans);
6384                         goto out;
6385                 }
6386
6387                 btrfs_release_path(&path);
6388                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6389                 if (ret)
6390                         goto trans_fail;
6391
6392                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6393                                              BTRFS_FIRST_FREE_OBJECTID,
6394                                              BTRFS_FIRST_FREE_OBJECTID, 0);
6395                 if (ret)
6396                         goto trans_fail;
6397
6398                 printf("Add INODE_REF[%llu %llu] name %s\n",
6399                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6400                        "..");
6401                 err &= ~INODE_REF_MISSING;
6402 trans_fail:
6403                 if (ret)
6404                         error("fail to insert first inode's ref");
6405                 btrfs_commit_transaction(trans, root);
6406         }
6407
6408         if (err & INODE_ITEM_MISSING) {
6409                 ret = repair_inode_item_missing(root,
6410                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
6411                 if (ret)
6412                         goto out;
6413                 err &= ~INODE_ITEM_MISSING;
6414         }
6415 out:
6416         if (ret)
6417                 error("fail to repair first inode");
6418         btrfs_release_path(&path);
6419         return err;
6420 }
6421
6422 /*
6423  * check first root dir's inode_item and inode_ref
6424  *
6425  * returns 0 means no error
6426  * returns >0 means error
6427  * returns <0 means fatal error
6428  */
6429 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6430 {
6431         struct btrfs_path path;
6432         struct btrfs_key key;
6433         struct btrfs_inode_item *ii;
6434         u64 index;
6435         u32 mode;
6436         int err = 0;
6437         int ret;
6438
6439         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6440         key.type = BTRFS_INODE_ITEM_KEY;
6441         key.offset = 0;
6442
6443         /* For root being dropped, we don't need to check first inode */
6444         if (btrfs_root_refs(&root->root_item) == 0 &&
6445             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6446             BTRFS_FIRST_FREE_OBJECTID)
6447                 return 0;
6448
6449         btrfs_init_path(&path);
6450         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6451         if (ret < 0)
6452                 goto out;
6453         if (ret > 0) {
6454                 ret = 0;
6455                 err |= INODE_ITEM_MISSING;
6456         } else {
6457                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6458                                     struct btrfs_inode_item);
6459                 mode = btrfs_inode_mode(path.nodes[0], ii);
6460                 if (imode_to_type(mode) != BTRFS_FT_DIR)
6461                         err |= INODE_ITEM_MISMATCH;
6462         }
6463
6464         /* lookup first inode ref */
6465         key.offset = BTRFS_FIRST_FREE_OBJECTID;
6466         key.type = BTRFS_INODE_REF_KEY;
6467         /* special index value */
6468         index = 0;
6469
6470         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6471         if (ret < 0)
6472                 goto out;
6473         err |= ret;
6474
6475 out:
6476         btrfs_release_path(&path);
6477
6478         if (err && repair)
6479                 err = repair_fs_first_inode(root, err);
6480
6481         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6482                 error("root dir INODE_ITEM is %s",
6483                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6484         if (err & INODE_REF_MISSING)
6485                 error("root dir INODE_REF is missing");
6486
6487         return ret < 0 ? ret : err;
6488 }
6489
6490 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6491                                                 u64 parent, u64 root)
6492 {
6493         struct rb_node *node;
6494         struct tree_backref *back = NULL;
6495         struct tree_backref match = {
6496                 .node = {
6497                         .is_data = 0,
6498                 },
6499         };
6500
6501         if (parent) {
6502                 match.parent = parent;
6503                 match.node.full_backref = 1;
6504         } else {
6505                 match.root = root;
6506         }
6507
6508         node = rb_search(&rec->backref_tree, &match.node.node,
6509                          (rb_compare_keys)compare_extent_backref, NULL);
6510         if (node)
6511                 back = to_tree_backref(rb_node_to_extent_backref(node));
6512
6513         return back;
6514 }
6515
6516 static struct data_backref *find_data_backref(struct extent_record *rec,
6517                                                 u64 parent, u64 root,
6518                                                 u64 owner, u64 offset,
6519                                                 int found_ref,
6520                                                 u64 disk_bytenr, u64 bytes)
6521 {
6522         struct rb_node *node;
6523         struct data_backref *back = NULL;
6524         struct data_backref match = {
6525                 .node = {
6526                         .is_data = 1,
6527                 },
6528                 .owner = owner,
6529                 .offset = offset,
6530                 .bytes = bytes,
6531                 .found_ref = found_ref,
6532                 .disk_bytenr = disk_bytenr,
6533         };
6534
6535         if (parent) {
6536                 match.parent = parent;
6537                 match.node.full_backref = 1;
6538         } else {
6539                 match.root = root;
6540         }
6541
6542         node = rb_search(&rec->backref_tree, &match.node.node,
6543                          (rb_compare_keys)compare_extent_backref, NULL);
6544         if (node)
6545                 back = to_data_backref(rb_node_to_extent_backref(node));
6546
6547         return back;
6548 }
6549 /*
6550  * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6551  * blocks and integrity of fs tree items.
6552  *
6553  * @root:         the root of the tree to be checked.
6554  * @ext_ref       feature EXTENDED_IREF is enable or not.
6555  * @account       if NOT 0 means check the tree (including tree)'s treeblocks.
6556  *                otherwise means check fs tree(s) items relationship and
6557  *                @root MUST be a fs tree root.
6558  * Returns 0      represents OK.
6559  * Returns not 0  represents error.
6560  */
6561 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6562                             struct btrfs_root *root, unsigned int ext_ref,
6563                             int check_all)
6564
6565 {
6566         struct btrfs_path path;
6567         struct node_refs nrefs;
6568         struct btrfs_root_item *root_item = &root->root_item;
6569         int ret;
6570         int level;
6571         int err = 0;
6572
6573         memset(&nrefs, 0, sizeof(nrefs));
6574         if (!check_all) {
6575                 /*
6576                  * We need to manually check the first inode item (256)
6577                  * As the following traversal function will only start from
6578                  * the first inode item in the leaf, if inode item (256) is
6579                  * missing we will skip it forever.
6580                  */
6581                 ret = check_fs_first_inode(root, ext_ref);
6582                 if (ret < 0)
6583                         return ret;
6584         }
6585
6586
6587         level = btrfs_header_level(root->node);
6588         btrfs_init_path(&path);
6589
6590         if (btrfs_root_refs(root_item) > 0 ||
6591             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6592                 path.nodes[level] = root->node;
6593                 path.slots[level] = 0;
6594                 extent_buffer_get(root->node);
6595         } else {
6596                 struct btrfs_key key;
6597
6598                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6599                 level = root_item->drop_level;
6600                 path.lowest_level = level;
6601                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6602                 if (ret < 0)
6603                         goto out;
6604                 ret = 0;
6605         }
6606
6607         while (1) {
6608                 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6609                                         ext_ref, check_all);
6610
6611                 err |= !!ret;
6612
6613                 /* if ret is negative, walk shall stop */
6614                 if (ret < 0) {
6615                         ret = err;
6616                         break;
6617                 }
6618
6619                 ret = walk_up_tree_v2(root, &path, &level);
6620                 if (ret != 0) {
6621                         /* Normal exit, reset ret to err */
6622                         ret = err;
6623                         break;
6624                 }
6625         }
6626
6627 out:
6628         btrfs_release_path(&path);
6629         return ret;
6630 }
6631
6632 /*
6633  * Iterate all items in the tree and call check_inode_item() to check.
6634  *
6635  * @root:       the root of the tree to be checked.
6636  * @ext_ref:    the EXTENDED_IREF feature
6637  *
6638  * Return 0 if no error found.
6639  * Return <0 for error.
6640  */
6641 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6642 {
6643         reset_cached_block_groups(root->fs_info);
6644         return check_btrfs_root(NULL, root, ext_ref, 0);
6645 }
6646
6647 /*
6648  * Find the relative ref for root_ref and root_backref.
6649  *
6650  * @root:       the root of the root tree.
6651  * @ref_key:    the key of the root ref.
6652  *
6653  * Return 0 if no error occurred.
6654  */
6655 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6656                           struct extent_buffer *node, int slot)
6657 {
6658         struct btrfs_path path;
6659         struct btrfs_key key;
6660         struct btrfs_root_ref *ref;
6661         struct btrfs_root_ref *backref;
6662         char ref_name[BTRFS_NAME_LEN] = {0};
6663         char backref_name[BTRFS_NAME_LEN] = {0};
6664         u64 ref_dirid;
6665         u64 ref_seq;
6666         u32 ref_namelen;
6667         u64 backref_dirid;
6668         u64 backref_seq;
6669         u32 backref_namelen;
6670         u32 len;
6671         int ret;
6672         int err = 0;
6673
6674         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6675         ref_dirid = btrfs_root_ref_dirid(node, ref);
6676         ref_seq = btrfs_root_ref_sequence(node, ref);
6677         ref_namelen = btrfs_root_ref_name_len(node, ref);
6678
6679         if (ref_namelen <= BTRFS_NAME_LEN) {
6680                 len = ref_namelen;
6681         } else {
6682                 len = BTRFS_NAME_LEN;
6683                 warning("%s[%llu %llu] ref_name too long",
6684                         ref_key->type == BTRFS_ROOT_REF_KEY ?
6685                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6686                         ref_key->offset);
6687         }
6688         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6689
6690         /* Find relative root_ref */
6691         key.objectid = ref_key->offset;
6692         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6693         key.offset = ref_key->objectid;
6694
6695         btrfs_init_path(&path);
6696         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6697         if (ret) {
6698                 err |= ROOT_REF_MISSING;
6699                 error("%s[%llu %llu] couldn't find relative ref",
6700                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6701                       "ROOT_REF" : "ROOT_BACKREF",
6702                       ref_key->objectid, ref_key->offset);
6703                 goto out;
6704         }
6705
6706         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6707                                  struct btrfs_root_ref);
6708         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6709         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6710         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6711
6712         if (backref_namelen <= BTRFS_NAME_LEN) {
6713                 len = backref_namelen;
6714         } else {
6715                 len = BTRFS_NAME_LEN;
6716                 warning("%s[%llu %llu] ref_name too long",
6717                         key.type == BTRFS_ROOT_REF_KEY ?
6718                         "ROOT_REF" : "ROOT_BACKREF",
6719                         key.objectid, key.offset);
6720         }
6721         read_extent_buffer(path.nodes[0], backref_name,
6722                            (unsigned long)(backref + 1), len);
6723
6724         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6725             ref_namelen != backref_namelen ||
6726             strncmp(ref_name, backref_name, len)) {
6727                 err |= ROOT_REF_MISMATCH;
6728                 error("%s[%llu %llu] mismatch relative ref",
6729                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6730                       "ROOT_REF" : "ROOT_BACKREF",
6731                       ref_key->objectid, ref_key->offset);
6732         }
6733 out:
6734         btrfs_release_path(&path);
6735         return err;
6736 }
6737
6738 /*
6739  * Check all fs/file tree in low_memory mode.
6740  *
6741  * 1. for fs tree root item, call check_fs_root_v2()
6742  * 2. for fs tree root ref/backref, call check_root_ref()
6743  *
6744  * Return 0 if no error occurred.
6745  */
6746 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6747 {
6748         struct btrfs_root *tree_root = fs_info->tree_root;
6749         struct btrfs_root *cur_root = NULL;
6750         struct btrfs_path path;
6751         struct btrfs_key key;
6752         struct extent_buffer *node;
6753         unsigned int ext_ref;
6754         int slot;
6755         int ret;
6756         int err = 0;
6757
6758         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6759
6760         btrfs_init_path(&path);
6761         key.objectid = BTRFS_FS_TREE_OBJECTID;
6762         key.offset = 0;
6763         key.type = BTRFS_ROOT_ITEM_KEY;
6764
6765         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6766         if (ret < 0) {
6767                 err = ret;
6768                 goto out;
6769         } else if (ret > 0) {
6770                 err = -ENOENT;
6771                 goto out;
6772         }
6773
6774         while (1) {
6775                 node = path.nodes[0];
6776                 slot = path.slots[0];
6777                 btrfs_item_key_to_cpu(node, &key, slot);
6778                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6779                         goto out;
6780                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6781                     fs_root_objectid(key.objectid)) {
6782                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6783                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6784                                                                        &key);
6785                         } else {
6786                                 key.offset = (u64)-1;
6787                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6788                         }
6789
6790                         if (IS_ERR(cur_root)) {
6791                                 error("Fail to read fs/subvol tree: %lld",
6792                                       key.objectid);
6793                                 err = -EIO;
6794                                 goto next;
6795                         }
6796
6797                         ret = check_fs_root_v2(cur_root, ext_ref);
6798                         err |= ret;
6799
6800                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6801                                 btrfs_free_fs_root(cur_root);
6802                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6803                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6804                         ret = check_root_ref(tree_root, &key, node, slot);
6805                         err |= ret;
6806                 }
6807 next:
6808                 ret = btrfs_next_item(tree_root, &path);
6809                 if (ret > 0)
6810                         goto out;
6811                 if (ret < 0) {
6812                         err = ret;
6813                         goto out;
6814                 }
6815         }
6816
6817 out:
6818         btrfs_release_path(&path);
6819         return err;
6820 }
6821
6822 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6823                           struct cache_tree *root_cache)
6824 {
6825         int ret;
6826
6827         if (!ctx.progress_enabled)
6828                 fprintf(stderr, "checking fs roots\n");
6829         if (check_mode == CHECK_MODE_LOWMEM)
6830                 ret = check_fs_roots_v2(fs_info);
6831         else
6832                 ret = check_fs_roots(fs_info, root_cache);
6833
6834         return ret;
6835 }
6836
6837 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6838 {
6839         struct extent_backref *back, *tmp;
6840         struct tree_backref *tback;
6841         struct data_backref *dback;
6842         u64 found = 0;
6843         int err = 0;
6844
6845         rbtree_postorder_for_each_entry_safe(back, tmp,
6846                                              &rec->backref_tree, node) {
6847                 if (!back->found_extent_tree) {
6848                         err = 1;
6849                         if (!print_errs)
6850                                 goto out;
6851                         if (back->is_data) {
6852                                 dback = to_data_backref(back);
6853                                 fprintf(stderr, "Data backref %llu %s %llu"
6854                                         " owner %llu offset %llu num_refs %lu"
6855                                         " not found in extent tree\n",
6856                                         (unsigned long long)rec->start,
6857                                         back->full_backref ?
6858                                         "parent" : "root",
6859                                         back->full_backref ?
6860                                         (unsigned long long)dback->parent:
6861                                         (unsigned long long)dback->root,
6862                                         (unsigned long long)dback->owner,
6863                                         (unsigned long long)dback->offset,
6864                                         (unsigned long)dback->num_refs);
6865                         } else {
6866                                 tback = to_tree_backref(back);
6867                                 fprintf(stderr, "Tree backref %llu parent %llu"
6868                                         " root %llu not found in extent tree\n",
6869                                         (unsigned long long)rec->start,
6870                                         (unsigned long long)tback->parent,
6871                                         (unsigned long long)tback->root);
6872                         }
6873                 }
6874                 if (!back->is_data && !back->found_ref) {
6875                         err = 1;
6876                         if (!print_errs)
6877                                 goto out;
6878                         tback = to_tree_backref(back);
6879                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6880                                 (unsigned long long)rec->start,
6881                                 back->full_backref ? "parent" : "root",
6882                                 back->full_backref ?
6883                                 (unsigned long long)tback->parent :
6884                                 (unsigned long long)tback->root, back);
6885                 }
6886                 if (back->is_data) {
6887                         dback = to_data_backref(back);
6888                         if (dback->found_ref != dback->num_refs) {
6889                                 err = 1;
6890                                 if (!print_errs)
6891                                         goto out;
6892                                 fprintf(stderr, "Incorrect local backref count"
6893                                         " on %llu %s %llu owner %llu"
6894                                         " offset %llu found %u wanted %u back %p\n",
6895                                         (unsigned long long)rec->start,
6896                                         back->full_backref ?
6897                                         "parent" : "root",
6898                                         back->full_backref ?
6899                                         (unsigned long long)dback->parent:
6900                                         (unsigned long long)dback->root,
6901                                         (unsigned long long)dback->owner,
6902                                         (unsigned long long)dback->offset,
6903                                         dback->found_ref, dback->num_refs, back);
6904                         }
6905                         if (dback->disk_bytenr != rec->start) {
6906                                 err = 1;
6907                                 if (!print_errs)
6908                                         goto out;
6909                                 fprintf(stderr, "Backref disk bytenr does not"
6910                                         " match extent record, bytenr=%llu, "
6911                                         "ref bytenr=%llu\n",
6912                                         (unsigned long long)rec->start,
6913                                         (unsigned long long)dback->disk_bytenr);
6914                         }
6915
6916                         if (dback->bytes != rec->nr) {
6917                                 err = 1;
6918                                 if (!print_errs)
6919                                         goto out;
6920                                 fprintf(stderr, "Backref bytes do not match "
6921                                         "extent backref, bytenr=%llu, ref "
6922                                         "bytes=%llu, backref bytes=%llu\n",
6923                                         (unsigned long long)rec->start,
6924                                         (unsigned long long)rec->nr,
6925                                         (unsigned long long)dback->bytes);
6926                         }
6927                 }
6928                 if (!back->is_data) {
6929                         found += 1;
6930                 } else {
6931                         dback = to_data_backref(back);
6932                         found += dback->found_ref;
6933                 }
6934         }
6935         if (found != rec->refs) {
6936                 err = 1;
6937                 if (!print_errs)
6938                         goto out;
6939                 fprintf(stderr, "Incorrect global backref count "
6940                         "on %llu found %llu wanted %llu\n",
6941                         (unsigned long long)rec->start,
6942                         (unsigned long long)found,
6943                         (unsigned long long)rec->refs);
6944         }
6945 out:
6946         return err;
6947 }
6948
6949 static void __free_one_backref(struct rb_node *node)
6950 {
6951         struct extent_backref *back = rb_node_to_extent_backref(node);
6952
6953         free(back);
6954 }
6955
6956 static void free_all_extent_backrefs(struct extent_record *rec)
6957 {
6958         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6959 }
6960
6961 static void free_extent_record_cache(struct cache_tree *extent_cache)
6962 {
6963         struct cache_extent *cache;
6964         struct extent_record *rec;
6965
6966         while (1) {
6967                 cache = first_cache_extent(extent_cache);
6968                 if (!cache)
6969                         break;
6970                 rec = container_of(cache, struct extent_record, cache);
6971                 remove_cache_extent(extent_cache, cache);
6972                 free_all_extent_backrefs(rec);
6973                 free(rec);
6974         }
6975 }
6976
6977 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6978                                  struct extent_record *rec)
6979 {
6980         if (rec->content_checked && rec->owner_ref_checked &&
6981             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6982             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6983             !rec->bad_full_backref && !rec->crossing_stripes &&
6984             !rec->wrong_chunk_type) {
6985                 remove_cache_extent(extent_cache, &rec->cache);
6986                 free_all_extent_backrefs(rec);
6987                 list_del_init(&rec->list);
6988                 free(rec);
6989         }
6990         return 0;
6991 }
6992
6993 static int check_owner_ref(struct btrfs_root *root,
6994                             struct extent_record *rec,
6995                             struct extent_buffer *buf)
6996 {
6997         struct extent_backref *node, *tmp;
6998         struct tree_backref *back;
6999         struct btrfs_root *ref_root;
7000         struct btrfs_key key;
7001         struct btrfs_path path;
7002         struct extent_buffer *parent;
7003         int level;
7004         int found = 0;
7005         int ret;
7006
7007         rbtree_postorder_for_each_entry_safe(node, tmp,
7008                                              &rec->backref_tree, node) {
7009                 if (node->is_data)
7010                         continue;
7011                 if (!node->found_ref)
7012                         continue;
7013                 if (node->full_backref)
7014                         continue;
7015                 back = to_tree_backref(node);
7016                 if (btrfs_header_owner(buf) == back->root)
7017                         return 0;
7018         }
7019         BUG_ON(rec->is_root);
7020
7021         /* try to find the block by search corresponding fs tree */
7022         key.objectid = btrfs_header_owner(buf);
7023         key.type = BTRFS_ROOT_ITEM_KEY;
7024         key.offset = (u64)-1;
7025
7026         ref_root = btrfs_read_fs_root(root->fs_info, &key);
7027         if (IS_ERR(ref_root))
7028                 return 1;
7029
7030         level = btrfs_header_level(buf);
7031         if (level == 0)
7032                 btrfs_item_key_to_cpu(buf, &key, 0);
7033         else
7034                 btrfs_node_key_to_cpu(buf, &key, 0);
7035
7036         btrfs_init_path(&path);
7037         path.lowest_level = level + 1;
7038         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
7039         if (ret < 0)
7040                 return 0;
7041
7042         parent = path.nodes[level + 1];
7043         if (parent && buf->start == btrfs_node_blockptr(parent,
7044                                                         path.slots[level + 1]))
7045                 found = 1;
7046
7047         btrfs_release_path(&path);
7048         return found ? 0 : 1;
7049 }
7050
7051 static int is_extent_tree_record(struct extent_record *rec)
7052 {
7053         struct extent_backref *node, *tmp;
7054         struct tree_backref *back;
7055         int is_extent = 0;
7056
7057         rbtree_postorder_for_each_entry_safe(node, tmp,
7058                                              &rec->backref_tree, node) {
7059                 if (node->is_data)
7060                         return 0;
7061                 back = to_tree_backref(node);
7062                 if (node->full_backref)
7063                         return 0;
7064                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
7065                         is_extent = 1;
7066         }
7067         return is_extent;
7068 }
7069
7070
7071 static int record_bad_block_io(struct btrfs_fs_info *info,
7072                                struct cache_tree *extent_cache,
7073                                u64 start, u64 len)
7074 {
7075         struct extent_record *rec;
7076         struct cache_extent *cache;
7077         struct btrfs_key key;
7078
7079         cache = lookup_cache_extent(extent_cache, start, len);
7080         if (!cache)
7081                 return 0;
7082
7083         rec = container_of(cache, struct extent_record, cache);
7084         if (!is_extent_tree_record(rec))
7085                 return 0;
7086
7087         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
7088         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
7089 }
7090
7091 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
7092                        struct extent_buffer *buf, int slot)
7093 {
7094         if (btrfs_header_level(buf)) {
7095                 struct btrfs_key_ptr ptr1, ptr2;
7096
7097                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
7098                                    sizeof(struct btrfs_key_ptr));
7099                 read_extent_buffer(buf, &ptr2,
7100                                    btrfs_node_key_ptr_offset(slot + 1),
7101                                    sizeof(struct btrfs_key_ptr));
7102                 write_extent_buffer(buf, &ptr1,
7103                                     btrfs_node_key_ptr_offset(slot + 1),
7104                                     sizeof(struct btrfs_key_ptr));
7105                 write_extent_buffer(buf, &ptr2,
7106                                     btrfs_node_key_ptr_offset(slot),
7107                                     sizeof(struct btrfs_key_ptr));
7108                 if (slot == 0) {
7109                         struct btrfs_disk_key key;
7110                         btrfs_node_key(buf, &key, 0);
7111                         btrfs_fixup_low_keys(root, path, &key,
7112                                              btrfs_header_level(buf) + 1);
7113                 }
7114         } else {
7115                 struct btrfs_item *item1, *item2;
7116                 struct btrfs_key k1, k2;
7117                 char *item1_data, *item2_data;
7118                 u32 item1_offset, item2_offset, item1_size, item2_size;
7119
7120                 item1 = btrfs_item_nr(slot);
7121                 item2 = btrfs_item_nr(slot + 1);
7122                 btrfs_item_key_to_cpu(buf, &k1, slot);
7123                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
7124                 item1_offset = btrfs_item_offset(buf, item1);
7125                 item2_offset = btrfs_item_offset(buf, item2);
7126                 item1_size = btrfs_item_size(buf, item1);
7127                 item2_size = btrfs_item_size(buf, item2);
7128
7129                 item1_data = malloc(item1_size);
7130                 if (!item1_data)
7131                         return -ENOMEM;
7132                 item2_data = malloc(item2_size);
7133                 if (!item2_data) {
7134                         free(item1_data);
7135                         return -ENOMEM;
7136                 }
7137
7138                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
7139                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
7140
7141                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
7142                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
7143                 free(item1_data);
7144                 free(item2_data);
7145
7146                 btrfs_set_item_offset(buf, item1, item2_offset);
7147                 btrfs_set_item_offset(buf, item2, item1_offset);
7148                 btrfs_set_item_size(buf, item1, item2_size);
7149                 btrfs_set_item_size(buf, item2, item1_size);
7150
7151                 path->slots[0] = slot;
7152                 btrfs_set_item_key_unsafe(root, path, &k2);
7153                 path->slots[0] = slot + 1;
7154                 btrfs_set_item_key_unsafe(root, path, &k1);
7155         }
7156         return 0;
7157 }
7158
7159 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
7160 {
7161         struct extent_buffer *buf;
7162         struct btrfs_key k1, k2;
7163         int i;
7164         int level = path->lowest_level;
7165         int ret = -EIO;
7166
7167         buf = path->nodes[level];
7168         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
7169                 if (level) {
7170                         btrfs_node_key_to_cpu(buf, &k1, i);
7171                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
7172                 } else {
7173                         btrfs_item_key_to_cpu(buf, &k1, i);
7174                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
7175                 }
7176                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
7177                         continue;
7178                 ret = swap_values(root, path, buf, i);
7179                 if (ret)
7180                         break;
7181                 btrfs_mark_buffer_dirty(buf);
7182                 i = 0;
7183         }
7184         return ret;
7185 }
7186
7187 static int delete_bogus_item(struct btrfs_root *root,
7188                              struct btrfs_path *path,
7189                              struct extent_buffer *buf, int slot)
7190 {
7191         struct btrfs_key key;
7192         int nritems = btrfs_header_nritems(buf);
7193
7194         btrfs_item_key_to_cpu(buf, &key, slot);
7195
7196         /* These are all the keys we can deal with missing. */
7197         if (key.type != BTRFS_DIR_INDEX_KEY &&
7198             key.type != BTRFS_EXTENT_ITEM_KEY &&
7199             key.type != BTRFS_METADATA_ITEM_KEY &&
7200             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7201             key.type != BTRFS_EXTENT_DATA_REF_KEY)
7202                 return -1;
7203
7204         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
7205                (unsigned long long)key.objectid, key.type,
7206                (unsigned long long)key.offset, slot, buf->start);
7207         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
7208                               btrfs_item_nr_offset(slot + 1),
7209                               sizeof(struct btrfs_item) *
7210                               (nritems - slot - 1));
7211         btrfs_set_header_nritems(buf, nritems - 1);
7212         if (slot == 0) {
7213                 struct btrfs_disk_key disk_key;
7214
7215                 btrfs_item_key(buf, &disk_key, 0);
7216                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
7217         }
7218         btrfs_mark_buffer_dirty(buf);
7219         return 0;
7220 }
7221
7222 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
7223 {
7224         struct extent_buffer *buf;
7225         int i;
7226         int ret = 0;
7227
7228         /* We should only get this for leaves */
7229         BUG_ON(path->lowest_level);
7230         buf = path->nodes[0];
7231 again:
7232         for (i = 0; i < btrfs_header_nritems(buf); i++) {
7233                 unsigned int shift = 0, offset;
7234
7235                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
7236                     BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
7237                         if (btrfs_item_end_nr(buf, i) >
7238                             BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
7239                                 ret = delete_bogus_item(root, path, buf, i);
7240                                 if (!ret)
7241                                         goto again;
7242                                 fprintf(stderr, "item is off the end of the "
7243                                         "leaf, can't fix\n");
7244                                 ret = -EIO;
7245                                 break;
7246                         }
7247                         shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
7248                                 btrfs_item_end_nr(buf, i);
7249                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
7250                            btrfs_item_offset_nr(buf, i - 1)) {
7251                         if (btrfs_item_end_nr(buf, i) >
7252                             btrfs_item_offset_nr(buf, i - 1)) {
7253                                 ret = delete_bogus_item(root, path, buf, i);
7254                                 if (!ret)
7255                                         goto again;
7256                                 fprintf(stderr, "items overlap, can't fix\n");
7257                                 ret = -EIO;
7258                                 break;
7259                         }
7260                         shift = btrfs_item_offset_nr(buf, i - 1) -
7261                                 btrfs_item_end_nr(buf, i);
7262                 }
7263                 if (!shift)
7264                         continue;
7265
7266                 printf("Shifting item nr %d by %u bytes in block %llu\n",
7267                        i, shift, (unsigned long long)buf->start);
7268                 offset = btrfs_item_offset_nr(buf, i);
7269                 memmove_extent_buffer(buf,
7270                                       btrfs_leaf_data(buf) + offset + shift,
7271                                       btrfs_leaf_data(buf) + offset,
7272                                       btrfs_item_size_nr(buf, i));
7273                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
7274                                       offset + shift);
7275                 btrfs_mark_buffer_dirty(buf);
7276         }
7277
7278         /*
7279          * We may have moved things, in which case we want to exit so we don't
7280          * write those changes out.  Once we have proper abort functionality in
7281          * progs this can be changed to something nicer.
7282          */
7283         BUG_ON(ret);
7284         return ret;
7285 }
7286
7287 /*
7288  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
7289  * then just return -EIO.
7290  */
7291 static int try_to_fix_bad_block(struct btrfs_root *root,
7292                                 struct extent_buffer *buf,
7293                                 enum btrfs_tree_block_status status)
7294 {
7295         struct btrfs_trans_handle *trans;
7296         struct ulist *roots;
7297         struct ulist_node *node;
7298         struct btrfs_root *search_root;
7299         struct btrfs_path path;
7300         struct ulist_iterator iter;
7301         struct btrfs_key root_key, key;
7302         int ret;
7303
7304         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
7305             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7306                 return -EIO;
7307
7308         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
7309         if (ret)
7310                 return -EIO;
7311
7312         btrfs_init_path(&path);
7313         ULIST_ITER_INIT(&iter);
7314         while ((node = ulist_next(roots, &iter))) {
7315                 root_key.objectid = node->val;
7316                 root_key.type = BTRFS_ROOT_ITEM_KEY;
7317                 root_key.offset = (u64)-1;
7318
7319                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
7320                 if (IS_ERR(root)) {
7321                         ret = -EIO;
7322                         break;
7323                 }
7324
7325
7326                 trans = btrfs_start_transaction(search_root, 0);
7327                 if (IS_ERR(trans)) {
7328                         ret = PTR_ERR(trans);
7329                         break;
7330                 }
7331
7332                 path.lowest_level = btrfs_header_level(buf);
7333                 path.skip_check_block = 1;
7334                 if (path.lowest_level)
7335                         btrfs_node_key_to_cpu(buf, &key, 0);
7336                 else
7337                         btrfs_item_key_to_cpu(buf, &key, 0);
7338                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7339                 if (ret) {
7340                         ret = -EIO;
7341                         btrfs_commit_transaction(trans, search_root);
7342                         break;
7343                 }
7344                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7345                         ret = fix_key_order(search_root, &path);
7346                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7347                         ret = fix_item_offset(search_root, &path);
7348                 if (ret) {
7349                         btrfs_commit_transaction(trans, search_root);
7350                         break;
7351                 }
7352                 btrfs_release_path(&path);
7353                 btrfs_commit_transaction(trans, search_root);
7354         }
7355         ulist_free(roots);
7356         btrfs_release_path(&path);
7357         return ret;
7358 }
7359
7360 static int check_block(struct btrfs_root *root,
7361                        struct cache_tree *extent_cache,
7362                        struct extent_buffer *buf, u64 flags)
7363 {
7364         struct extent_record *rec;
7365         struct cache_extent *cache;
7366         struct btrfs_key key;
7367         enum btrfs_tree_block_status status;
7368         int ret = 0;
7369         int level;
7370
7371         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7372         if (!cache)
7373                 return 1;
7374         rec = container_of(cache, struct extent_record, cache);
7375         rec->generation = btrfs_header_generation(buf);
7376
7377         level = btrfs_header_level(buf);
7378         if (btrfs_header_nritems(buf) > 0) {
7379
7380                 if (level == 0)
7381                         btrfs_item_key_to_cpu(buf, &key, 0);
7382                 else
7383                         btrfs_node_key_to_cpu(buf, &key, 0);
7384
7385                 rec->info_objectid = key.objectid;
7386         }
7387         rec->info_level = level;
7388
7389         if (btrfs_is_leaf(buf))
7390                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7391         else
7392                 status = btrfs_check_node(root, &rec->parent_key, buf);
7393
7394         if (status != BTRFS_TREE_BLOCK_CLEAN) {
7395                 if (repair)
7396                         status = try_to_fix_bad_block(root, buf, status);
7397                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7398                         ret = -EIO;
7399                         fprintf(stderr, "bad block %llu\n",
7400                                 (unsigned long long)buf->start);
7401                 } else {
7402                         /*
7403                          * Signal to callers we need to start the scan over
7404                          * again since we'll have cowed blocks.
7405                          */
7406                         ret = -EAGAIN;
7407                 }
7408         } else {
7409                 rec->content_checked = 1;
7410                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7411                         rec->owner_ref_checked = 1;
7412                 else {
7413                         ret = check_owner_ref(root, rec, buf);
7414                         if (!ret)
7415                                 rec->owner_ref_checked = 1;
7416                 }
7417         }
7418         if (!ret)
7419                 maybe_free_extent_rec(extent_cache, rec);
7420         return ret;
7421 }
7422
7423 #if 0
7424 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7425                                                 u64 parent, u64 root)
7426 {
7427         struct list_head *cur = rec->backrefs.next;
7428         struct extent_backref *node;
7429         struct tree_backref *back;
7430
7431         while(cur != &rec->backrefs) {
7432                 node = to_extent_backref(cur);
7433                 cur = cur->next;
7434                 if (node->is_data)
7435                         continue;
7436                 back = to_tree_backref(node);
7437                 if (parent > 0) {
7438                         if (!node->full_backref)
7439                                 continue;
7440                         if (parent == back->parent)
7441                                 return back;
7442                 } else {
7443                         if (node->full_backref)
7444                                 continue;
7445                         if (back->root == root)
7446                                 return back;
7447                 }
7448         }
7449         return NULL;
7450 }
7451 #endif
7452
7453 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7454                                                 u64 parent, u64 root)
7455 {
7456         struct tree_backref *ref = malloc(sizeof(*ref));
7457
7458         if (!ref)
7459                 return NULL;
7460         memset(&ref->node, 0, sizeof(ref->node));
7461         if (parent > 0) {
7462                 ref->parent = parent;
7463                 ref->node.full_backref = 1;
7464         } else {
7465                 ref->root = root;
7466                 ref->node.full_backref = 0;
7467         }
7468
7469         return ref;
7470 }
7471
7472 #if 0
7473 static struct data_backref *find_data_backref(struct extent_record *rec,
7474                                                 u64 parent, u64 root,
7475                                                 u64 owner, u64 offset,
7476                                                 int found_ref,
7477                                                 u64 disk_bytenr, u64 bytes)
7478 {
7479         struct list_head *cur = rec->backrefs.next;
7480         struct extent_backref *node;
7481         struct data_backref *back;
7482
7483         while(cur != &rec->backrefs) {
7484                 node = to_extent_backref(cur);
7485                 cur = cur->next;
7486                 if (!node->is_data)
7487                         continue;
7488                 back = to_data_backref(node);
7489                 if (parent > 0) {
7490                         if (!node->full_backref)
7491                                 continue;
7492                         if (parent == back->parent)
7493                                 return back;
7494                 } else {
7495                         if (node->full_backref)
7496                                 continue;
7497                         if (back->root == root && back->owner == owner &&
7498                             back->offset == offset) {
7499                                 if (found_ref && node->found_ref &&
7500                                     (back->bytes != bytes ||
7501                                     back->disk_bytenr != disk_bytenr))
7502                                         continue;
7503                                 return back;
7504                         }
7505                 }
7506         }
7507         return NULL;
7508 }
7509 #endif
7510
7511 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7512                                                 u64 parent, u64 root,
7513                                                 u64 owner, u64 offset,
7514                                                 u64 max_size)
7515 {
7516         struct data_backref *ref = malloc(sizeof(*ref));
7517
7518         if (!ref)
7519                 return NULL;
7520         memset(&ref->node, 0, sizeof(ref->node));
7521         ref->node.is_data = 1;
7522
7523         if (parent > 0) {
7524                 ref->parent = parent;
7525                 ref->owner = 0;
7526                 ref->offset = 0;
7527                 ref->node.full_backref = 1;
7528         } else {
7529                 ref->root = root;
7530                 ref->owner = owner;
7531                 ref->offset = offset;
7532                 ref->node.full_backref = 0;
7533         }
7534         ref->bytes = max_size;
7535         ref->found_ref = 0;
7536         ref->num_refs = 0;
7537         if (max_size > rec->max_size)
7538                 rec->max_size = max_size;
7539         return ref;
7540 }
7541
7542 /* Check if the type of extent matches with its chunk */
7543 static void check_extent_type(struct extent_record *rec)
7544 {
7545         struct btrfs_block_group_cache *bg_cache;
7546
7547         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7548         if (!bg_cache)
7549                 return;
7550
7551         /* data extent, check chunk directly*/
7552         if (!rec->metadata) {
7553                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7554                         rec->wrong_chunk_type = 1;
7555                 return;
7556         }
7557
7558         /* metadata extent, check the obvious case first */
7559         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7560                                  BTRFS_BLOCK_GROUP_METADATA))) {
7561                 rec->wrong_chunk_type = 1;
7562                 return;
7563         }
7564
7565         /*
7566          * Check SYSTEM extent, as it's also marked as metadata, we can only
7567          * make sure it's a SYSTEM extent by its backref
7568          */
7569         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7570                 struct extent_backref *node;
7571                 struct tree_backref *tback;
7572                 u64 bg_type;
7573
7574                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7575                 if (node->is_data) {
7576                         /* tree block shouldn't have data backref */
7577                         rec->wrong_chunk_type = 1;
7578                         return;
7579                 }
7580                 tback = container_of(node, struct tree_backref, node);
7581
7582                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7583                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7584                 else
7585                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
7586                 if (!(bg_cache->flags & bg_type))
7587                         rec->wrong_chunk_type = 1;
7588         }
7589 }
7590
7591 /*
7592  * Allocate a new extent record, fill default values from @tmpl and insert int
7593  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7594  * the cache, otherwise it fails.
7595  */
7596 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7597                 struct extent_record *tmpl)
7598 {
7599         struct extent_record *rec;
7600         int ret = 0;
7601
7602         BUG_ON(tmpl->max_size == 0);
7603         rec = malloc(sizeof(*rec));
7604         if (!rec)
7605                 return -ENOMEM;
7606         rec->start = tmpl->start;
7607         rec->max_size = tmpl->max_size;
7608         rec->nr = max(tmpl->nr, tmpl->max_size);
7609         rec->found_rec = tmpl->found_rec;
7610         rec->content_checked = tmpl->content_checked;
7611         rec->owner_ref_checked = tmpl->owner_ref_checked;
7612         rec->num_duplicates = 0;
7613         rec->metadata = tmpl->metadata;
7614         rec->flag_block_full_backref = FLAG_UNSET;
7615         rec->bad_full_backref = 0;
7616         rec->crossing_stripes = 0;
7617         rec->wrong_chunk_type = 0;
7618         rec->is_root = tmpl->is_root;
7619         rec->refs = tmpl->refs;
7620         rec->extent_item_refs = tmpl->extent_item_refs;
7621         rec->parent_generation = tmpl->parent_generation;
7622         INIT_LIST_HEAD(&rec->backrefs);
7623         INIT_LIST_HEAD(&rec->dups);
7624         INIT_LIST_HEAD(&rec->list);
7625         rec->backref_tree = RB_ROOT;
7626         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7627         rec->cache.start = tmpl->start;
7628         rec->cache.size = tmpl->nr;
7629         ret = insert_cache_extent(extent_cache, &rec->cache);
7630         if (ret) {
7631                 free(rec);
7632                 return ret;
7633         }
7634         bytes_used += rec->nr;
7635
7636         if (tmpl->metadata)
7637                 rec->crossing_stripes = check_crossing_stripes(global_info,
7638                                 rec->start, global_info->nodesize);
7639         check_extent_type(rec);
7640         return ret;
7641 }
7642
7643 /*
7644  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7645  * some are hints:
7646  * - refs              - if found, increase refs
7647  * - is_root           - if found, set
7648  * - content_checked   - if found, set
7649  * - owner_ref_checked - if found, set
7650  *
7651  * If not found, create a new one, initialize and insert.
7652  */
7653 static int add_extent_rec(struct cache_tree *extent_cache,
7654                 struct extent_record *tmpl)
7655 {
7656         struct extent_record *rec;
7657         struct cache_extent *cache;
7658         int ret = 0;
7659         int dup = 0;
7660
7661         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7662         if (cache) {
7663                 rec = container_of(cache, struct extent_record, cache);
7664                 if (tmpl->refs)
7665                         rec->refs++;
7666                 if (rec->nr == 1)
7667                         rec->nr = max(tmpl->nr, tmpl->max_size);
7668
7669                 /*
7670                  * We need to make sure to reset nr to whatever the extent
7671                  * record says was the real size, this way we can compare it to
7672                  * the backrefs.
7673                  */
7674                 if (tmpl->found_rec) {
7675                         if (tmpl->start != rec->start || rec->found_rec) {
7676                                 struct extent_record *tmp;
7677
7678                                 dup = 1;
7679                                 if (list_empty(&rec->list))
7680                                         list_add_tail(&rec->list,
7681                                                       &duplicate_extents);
7682
7683                                 /*
7684                                  * We have to do this song and dance in case we
7685                                  * find an extent record that falls inside of
7686                                  * our current extent record but does not have
7687                                  * the same objectid.
7688                                  */
7689                                 tmp = malloc(sizeof(*tmp));
7690                                 if (!tmp)
7691                                         return -ENOMEM;
7692                                 tmp->start = tmpl->start;
7693                                 tmp->max_size = tmpl->max_size;
7694                                 tmp->nr = tmpl->nr;
7695                                 tmp->found_rec = 1;
7696                                 tmp->metadata = tmpl->metadata;
7697                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7698                                 INIT_LIST_HEAD(&tmp->list);
7699                                 list_add_tail(&tmp->list, &rec->dups);
7700                                 rec->num_duplicates++;
7701                         } else {
7702                                 rec->nr = tmpl->nr;
7703                                 rec->found_rec = 1;
7704                         }
7705                 }
7706
7707                 if (tmpl->extent_item_refs && !dup) {
7708                         if (rec->extent_item_refs) {
7709                                 fprintf(stderr, "block %llu rec "
7710                                         "extent_item_refs %llu, passed %llu\n",
7711                                         (unsigned long long)tmpl->start,
7712                                         (unsigned long long)
7713                                                         rec->extent_item_refs,
7714                                         (unsigned long long)tmpl->extent_item_refs);
7715                         }
7716                         rec->extent_item_refs = tmpl->extent_item_refs;
7717                 }
7718                 if (tmpl->is_root)
7719                         rec->is_root = 1;
7720                 if (tmpl->content_checked)
7721                         rec->content_checked = 1;
7722                 if (tmpl->owner_ref_checked)
7723                         rec->owner_ref_checked = 1;
7724                 memcpy(&rec->parent_key, &tmpl->parent_key,
7725                                 sizeof(tmpl->parent_key));
7726                 if (tmpl->parent_generation)
7727                         rec->parent_generation = tmpl->parent_generation;
7728                 if (rec->max_size < tmpl->max_size)
7729                         rec->max_size = tmpl->max_size;
7730
7731                 /*
7732                  * A metadata extent can't cross stripe_len boundary, otherwise
7733                  * kernel scrub won't be able to handle it.
7734                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7735                  * it.
7736                  */
7737                 if (tmpl->metadata)
7738                         rec->crossing_stripes = check_crossing_stripes(
7739                                         global_info, rec->start,
7740                                         global_info->nodesize);
7741                 check_extent_type(rec);
7742                 maybe_free_extent_rec(extent_cache, rec);
7743                 return ret;
7744         }
7745
7746         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7747
7748         return ret;
7749 }
7750
7751 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7752                             u64 parent, u64 root, int found_ref)
7753 {
7754         struct extent_record *rec;
7755         struct tree_backref *back;
7756         struct cache_extent *cache;
7757         int ret;
7758         bool insert = false;
7759
7760         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7761         if (!cache) {
7762                 struct extent_record tmpl;
7763
7764                 memset(&tmpl, 0, sizeof(tmpl));
7765                 tmpl.start = bytenr;
7766                 tmpl.nr = 1;
7767                 tmpl.metadata = 1;
7768                 tmpl.max_size = 1;
7769
7770                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7771                 if (ret)
7772                         return ret;
7773
7774                 /* really a bug in cache_extent implement now */
7775                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7776                 if (!cache)
7777                         return -ENOENT;
7778         }
7779
7780         rec = container_of(cache, struct extent_record, cache);
7781         if (rec->start != bytenr) {
7782                 /*
7783                  * Several cause, from unaligned bytenr to over lapping extents
7784                  */
7785                 return -EEXIST;
7786         }
7787
7788         back = find_tree_backref(rec, parent, root);
7789         if (!back) {
7790                 back = alloc_tree_backref(rec, parent, root);
7791                 if (!back)
7792                         return -ENOMEM;
7793                 insert = true;
7794         }
7795
7796         if (found_ref) {
7797                 if (back->node.found_ref) {
7798                         fprintf(stderr, "Extent back ref already exists "
7799                                 "for %llu parent %llu root %llu \n",
7800                                 (unsigned long long)bytenr,
7801                                 (unsigned long long)parent,
7802                                 (unsigned long long)root);
7803                 }
7804                 back->node.found_ref = 1;
7805         } else {
7806                 if (back->node.found_extent_tree) {
7807                         fprintf(stderr, "Extent back ref already exists "
7808                                 "for %llu parent %llu root %llu \n",
7809                                 (unsigned long long)bytenr,
7810                                 (unsigned long long)parent,
7811                                 (unsigned long long)root);
7812                 }
7813                 back->node.found_extent_tree = 1;
7814         }
7815         if (insert)
7816                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7817                         compare_extent_backref));
7818         check_extent_type(rec);
7819         maybe_free_extent_rec(extent_cache, rec);
7820         return 0;
7821 }
7822
7823 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7824                             u64 parent, u64 root, u64 owner, u64 offset,
7825                             u32 num_refs, int found_ref, u64 max_size)
7826 {
7827         struct extent_record *rec;
7828         struct data_backref *back;
7829         struct cache_extent *cache;
7830         int ret;
7831         bool insert = false;
7832
7833         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7834         if (!cache) {
7835                 struct extent_record tmpl;
7836
7837                 memset(&tmpl, 0, sizeof(tmpl));
7838                 tmpl.start = bytenr;
7839                 tmpl.nr = 1;
7840                 tmpl.max_size = max_size;
7841
7842                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7843                 if (ret)
7844                         return ret;
7845
7846                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7847                 if (!cache)
7848                         abort();
7849         }
7850
7851         rec = container_of(cache, struct extent_record, cache);
7852         if (rec->max_size < max_size)
7853                 rec->max_size = max_size;
7854
7855         /*
7856          * If found_ref is set then max_size is the real size and must match the
7857          * existing refs.  So if we have already found a ref then we need to
7858          * make sure that this ref matches the existing one, otherwise we need
7859          * to add a new backref so we can notice that the backrefs don't match
7860          * and we need to figure out who is telling the truth.  This is to
7861          * account for that awful fsync bug I introduced where we'd end up with
7862          * a btrfs_file_extent_item that would have its length include multiple
7863          * prealloc extents or point inside of a prealloc extent.
7864          */
7865         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7866                                  bytenr, max_size);
7867         if (!back) {
7868                 back = alloc_data_backref(rec, parent, root, owner, offset,
7869                                           max_size);
7870                 BUG_ON(!back);
7871                 insert = true;
7872         }
7873
7874         if (found_ref) {
7875                 BUG_ON(num_refs != 1);
7876                 if (back->node.found_ref)
7877                         BUG_ON(back->bytes != max_size);
7878                 back->node.found_ref = 1;
7879                 back->found_ref += 1;
7880                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7881                         back->bytes = max_size;
7882                         back->disk_bytenr = bytenr;
7883
7884                         /* Need to reinsert if not already in the tree */
7885                         if (!insert) {
7886                                 rb_erase(&back->node.node, &rec->backref_tree);
7887                                 insert = true;
7888                         }
7889                 }
7890                 rec->refs += 1;
7891                 rec->content_checked = 1;
7892                 rec->owner_ref_checked = 1;
7893         } else {
7894                 if (back->node.found_extent_tree) {
7895                         fprintf(stderr, "Extent back ref already exists "
7896                                 "for %llu parent %llu root %llu "
7897                                 "owner %llu offset %llu num_refs %lu\n",
7898                                 (unsigned long long)bytenr,
7899                                 (unsigned long long)parent,
7900                                 (unsigned long long)root,
7901                                 (unsigned long long)owner,
7902                                 (unsigned long long)offset,
7903                                 (unsigned long)num_refs);
7904                 }
7905                 back->num_refs = num_refs;
7906                 back->node.found_extent_tree = 1;
7907         }
7908         if (insert)
7909                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7910                         compare_extent_backref));
7911
7912         maybe_free_extent_rec(extent_cache, rec);
7913         return 0;
7914 }
7915
7916 static int add_pending(struct cache_tree *pending,
7917                        struct cache_tree *seen, u64 bytenr, u32 size)
7918 {
7919         int ret;
7920         ret = add_cache_extent(seen, bytenr, size);
7921         if (ret)
7922                 return ret;
7923         add_cache_extent(pending, bytenr, size);
7924         return 0;
7925 }
7926
7927 static int pick_next_pending(struct cache_tree *pending,
7928                         struct cache_tree *reada,
7929                         struct cache_tree *nodes,
7930                         u64 last, struct block_info *bits, int bits_nr,
7931                         int *reada_bits)
7932 {
7933         unsigned long node_start = last;
7934         struct cache_extent *cache;
7935         int ret;
7936
7937         cache = search_cache_extent(reada, 0);
7938         if (cache) {
7939                 bits[0].start = cache->start;
7940                 bits[0].size = cache->size;
7941                 *reada_bits = 1;
7942                 return 1;
7943         }
7944         *reada_bits = 0;
7945         if (node_start > 32768)
7946                 node_start -= 32768;
7947
7948         cache = search_cache_extent(nodes, node_start);
7949         if (!cache)
7950                 cache = search_cache_extent(nodes, 0);
7951
7952         if (!cache) {
7953                  cache = search_cache_extent(pending, 0);
7954                  if (!cache)
7955                          return 0;
7956                  ret = 0;
7957                  do {
7958                          bits[ret].start = cache->start;
7959                          bits[ret].size = cache->size;
7960                          cache = next_cache_extent(cache);
7961                          ret++;
7962                  } while (cache && ret < bits_nr);
7963                  return ret;
7964         }
7965
7966         ret = 0;
7967         do {
7968                 bits[ret].start = cache->start;
7969                 bits[ret].size = cache->size;
7970                 cache = next_cache_extent(cache);
7971                 ret++;
7972         } while (cache && ret < bits_nr);
7973
7974         if (bits_nr - ret > 8) {
7975                 u64 lookup = bits[0].start + bits[0].size;
7976                 struct cache_extent *next;
7977                 next = search_cache_extent(pending, lookup);
7978                 while(next) {
7979                         if (next->start - lookup > 32768)
7980                                 break;
7981                         bits[ret].start = next->start;
7982                         bits[ret].size = next->size;
7983                         lookup = next->start + next->size;
7984                         ret++;
7985                         if (ret == bits_nr)
7986                                 break;
7987                         next = next_cache_extent(next);
7988                         if (!next)
7989                                 break;
7990                 }
7991         }
7992         return ret;
7993 }
7994
7995 static void free_chunk_record(struct cache_extent *cache)
7996 {
7997         struct chunk_record *rec;
7998
7999         rec = container_of(cache, struct chunk_record, cache);
8000         list_del_init(&rec->list);
8001         list_del_init(&rec->dextents);
8002         free(rec);
8003 }
8004
8005 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
8006 {
8007         cache_tree_free_extents(chunk_cache, free_chunk_record);
8008 }
8009
8010 static void free_device_record(struct rb_node *node)
8011 {
8012         struct device_record *rec;
8013
8014         rec = container_of(node, struct device_record, node);
8015         free(rec);
8016 }
8017
8018 FREE_RB_BASED_TREE(device_cache, free_device_record);
8019
8020 int insert_block_group_record(struct block_group_tree *tree,
8021                               struct block_group_record *bg_rec)
8022 {
8023         int ret;
8024
8025         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
8026         if (ret)
8027                 return ret;
8028
8029         list_add_tail(&bg_rec->list, &tree->block_groups);
8030         return 0;
8031 }
8032
8033 static void free_block_group_record(struct cache_extent *cache)
8034 {
8035         struct block_group_record *rec;
8036
8037         rec = container_of(cache, struct block_group_record, cache);
8038         list_del_init(&rec->list);
8039         free(rec);
8040 }
8041
8042 void free_block_group_tree(struct block_group_tree *tree)
8043 {
8044         cache_tree_free_extents(&tree->tree, free_block_group_record);
8045 }
8046
8047 int insert_device_extent_record(struct device_extent_tree *tree,
8048                                 struct device_extent_record *de_rec)
8049 {
8050         int ret;
8051
8052         /*
8053          * Device extent is a bit different from the other extents, because
8054          * the extents which belong to the different devices may have the
8055          * same start and size, so we need use the special extent cache
8056          * search/insert functions.
8057          */
8058         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
8059         if (ret)
8060                 return ret;
8061
8062         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
8063         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
8064         return 0;
8065 }
8066
8067 static void free_device_extent_record(struct cache_extent *cache)
8068 {
8069         struct device_extent_record *rec;
8070
8071         rec = container_of(cache, struct device_extent_record, cache);
8072         if (!list_empty(&rec->chunk_list))
8073                 list_del_init(&rec->chunk_list);
8074         if (!list_empty(&rec->device_list))
8075                 list_del_init(&rec->device_list);
8076         free(rec);
8077 }
8078
8079 void free_device_extent_tree(struct device_extent_tree *tree)
8080 {
8081         cache_tree_free_extents(&tree->tree, free_device_extent_record);
8082 }
8083
8084 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8085 static int process_extent_ref_v0(struct cache_tree *extent_cache,
8086                                  struct extent_buffer *leaf, int slot)
8087 {
8088         struct btrfs_extent_ref_v0 *ref0;
8089         struct btrfs_key key;
8090         int ret;
8091
8092         btrfs_item_key_to_cpu(leaf, &key, slot);
8093         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
8094         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
8095                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
8096                                 0, 0);
8097         } else {
8098                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
8099                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
8100         }
8101         return ret;
8102 }
8103 #endif
8104
8105 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
8106                                             struct btrfs_key *key,
8107                                             int slot)
8108 {
8109         struct btrfs_chunk *ptr;
8110         struct chunk_record *rec;
8111         int num_stripes, i;
8112
8113         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
8114         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
8115
8116         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
8117         if (!rec) {
8118                 fprintf(stderr, "memory allocation failed\n");
8119                 exit(-1);
8120         }
8121
8122         INIT_LIST_HEAD(&rec->list);
8123         INIT_LIST_HEAD(&rec->dextents);
8124         rec->bg_rec = NULL;
8125
8126         rec->cache.start = key->offset;
8127         rec->cache.size = btrfs_chunk_length(leaf, ptr);
8128
8129         rec->generation = btrfs_header_generation(leaf);
8130
8131         rec->objectid = key->objectid;
8132         rec->type = key->type;
8133         rec->offset = key->offset;
8134
8135         rec->length = rec->cache.size;
8136         rec->owner = btrfs_chunk_owner(leaf, ptr);
8137         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
8138         rec->type_flags = btrfs_chunk_type(leaf, ptr);
8139         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
8140         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
8141         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
8142         rec->num_stripes = num_stripes;
8143         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
8144
8145         for (i = 0; i < rec->num_stripes; ++i) {
8146                 rec->stripes[i].devid =
8147                         btrfs_stripe_devid_nr(leaf, ptr, i);
8148                 rec->stripes[i].offset =
8149                         btrfs_stripe_offset_nr(leaf, ptr, i);
8150                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
8151                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
8152                                 BTRFS_UUID_SIZE);
8153         }
8154
8155         return rec;
8156 }
8157
8158 static int process_chunk_item(struct cache_tree *chunk_cache,
8159                               struct btrfs_key *key, struct extent_buffer *eb,
8160                               int slot)
8161 {
8162         struct chunk_record *rec;
8163         struct btrfs_chunk *chunk;
8164         int ret = 0;
8165
8166         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
8167         /*
8168          * Do extra check for this chunk item,
8169          *
8170          * It's still possible one can craft a leaf with CHUNK_ITEM, with
8171          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
8172          * and owner<->key_type check.
8173          */
8174         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
8175                                       key->offset);
8176         if (ret < 0) {
8177                 error("chunk(%llu, %llu) is not valid, ignore it",
8178                       key->offset, btrfs_chunk_length(eb, chunk));
8179                 return 0;
8180         }
8181         rec = btrfs_new_chunk_record(eb, key, slot);
8182         ret = insert_cache_extent(chunk_cache, &rec->cache);
8183         if (ret) {
8184                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
8185                         rec->offset, rec->length);
8186                 free(rec);
8187         }
8188
8189         return ret;
8190 }
8191
8192 static int process_device_item(struct rb_root *dev_cache,
8193                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
8194 {
8195         struct btrfs_dev_item *ptr;
8196         struct device_record *rec;
8197         int ret = 0;
8198
8199         ptr = btrfs_item_ptr(eb,
8200                 slot, struct btrfs_dev_item);
8201
8202         rec = malloc(sizeof(*rec));
8203         if (!rec) {
8204                 fprintf(stderr, "memory allocation failed\n");
8205                 return -ENOMEM;
8206         }
8207
8208         rec->devid = key->offset;
8209         rec->generation = btrfs_header_generation(eb);
8210
8211         rec->objectid = key->objectid;
8212         rec->type = key->type;
8213         rec->offset = key->offset;
8214
8215         rec->devid = btrfs_device_id(eb, ptr);
8216         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
8217         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
8218
8219         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
8220         if (ret) {
8221                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
8222                 free(rec);
8223         }
8224
8225         return ret;
8226 }
8227
8228 struct block_group_record *
8229 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
8230                              int slot)
8231 {
8232         struct btrfs_block_group_item *ptr;
8233         struct block_group_record *rec;
8234
8235         rec = calloc(1, sizeof(*rec));
8236         if (!rec) {
8237                 fprintf(stderr, "memory allocation failed\n");
8238                 exit(-1);
8239         }
8240
8241         rec->cache.start = key->objectid;
8242         rec->cache.size = key->offset;
8243
8244         rec->generation = btrfs_header_generation(leaf);
8245
8246         rec->objectid = key->objectid;
8247         rec->type = key->type;
8248         rec->offset = key->offset;
8249
8250         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
8251         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
8252
8253         INIT_LIST_HEAD(&rec->list);
8254
8255         return rec;
8256 }
8257
8258 static int process_block_group_item(struct block_group_tree *block_group_cache,
8259                                     struct btrfs_key *key,
8260                                     struct extent_buffer *eb, int slot)
8261 {
8262         struct block_group_record *rec;
8263         int ret = 0;
8264
8265         rec = btrfs_new_block_group_record(eb, key, slot);
8266         ret = insert_block_group_record(block_group_cache, rec);
8267         if (ret) {
8268                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
8269                         rec->objectid, rec->offset);
8270                 free(rec);
8271         }
8272
8273         return ret;
8274 }
8275
8276 struct device_extent_record *
8277 btrfs_new_device_extent_record(struct extent_buffer *leaf,
8278                                struct btrfs_key *key, int slot)
8279 {
8280         struct device_extent_record *rec;
8281         struct btrfs_dev_extent *ptr;
8282
8283         rec = calloc(1, sizeof(*rec));
8284         if (!rec) {
8285                 fprintf(stderr, "memory allocation failed\n");
8286                 exit(-1);
8287         }
8288
8289         rec->cache.objectid = key->objectid;
8290         rec->cache.start = key->offset;
8291
8292         rec->generation = btrfs_header_generation(leaf);
8293
8294         rec->objectid = key->objectid;
8295         rec->type = key->type;
8296         rec->offset = key->offset;
8297
8298         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
8299         rec->chunk_objecteid =
8300                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
8301         rec->chunk_offset =
8302                 btrfs_dev_extent_chunk_offset(leaf, ptr);
8303         rec->length = btrfs_dev_extent_length(leaf, ptr);
8304         rec->cache.size = rec->length;
8305
8306         INIT_LIST_HEAD(&rec->chunk_list);
8307         INIT_LIST_HEAD(&rec->device_list);
8308
8309         return rec;
8310 }
8311
8312 static int
8313 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
8314                            struct btrfs_key *key, struct extent_buffer *eb,
8315                            int slot)
8316 {
8317         struct device_extent_record *rec;
8318         int ret;
8319
8320         rec = btrfs_new_device_extent_record(eb, key, slot);
8321         ret = insert_device_extent_record(dev_extent_cache, rec);
8322         if (ret) {
8323                 fprintf(stderr,
8324                         "Device extent[%llu, %llu, %llu] existed.\n",
8325                         rec->objectid, rec->offset, rec->length);
8326                 free(rec);
8327         }
8328
8329         return ret;
8330 }
8331
8332 static int process_extent_item(struct btrfs_root *root,
8333                                struct cache_tree *extent_cache,
8334                                struct extent_buffer *eb, int slot)
8335 {
8336         struct btrfs_extent_item *ei;
8337         struct btrfs_extent_inline_ref *iref;
8338         struct btrfs_extent_data_ref *dref;
8339         struct btrfs_shared_data_ref *sref;
8340         struct btrfs_key key;
8341         struct extent_record tmpl;
8342         unsigned long end;
8343         unsigned long ptr;
8344         int ret;
8345         int type;
8346         u32 item_size = btrfs_item_size_nr(eb, slot);
8347         u64 refs = 0;
8348         u64 offset;
8349         u64 num_bytes;
8350         int metadata = 0;
8351
8352         btrfs_item_key_to_cpu(eb, &key, slot);
8353
8354         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8355                 metadata = 1;
8356                 num_bytes = root->fs_info->nodesize;
8357         } else {
8358                 num_bytes = key.offset;
8359         }
8360
8361         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8362                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8363                       key.objectid, root->fs_info->sectorsize);
8364                 return -EIO;
8365         }
8366         if (item_size < sizeof(*ei)) {
8367 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8368                 struct btrfs_extent_item_v0 *ei0;
8369                 if (item_size != sizeof(*ei0)) {
8370                         error(
8371         "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
8372                                 key.objectid, key.type, key.offset,
8373                                 btrfs_header_bytenr(eb), slot);
8374                         BUG();
8375                 }
8376                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8377                 refs = btrfs_extent_refs_v0(eb, ei0);
8378 #else
8379                 BUG();
8380 #endif
8381                 memset(&tmpl, 0, sizeof(tmpl));
8382                 tmpl.start = key.objectid;
8383                 tmpl.nr = num_bytes;
8384                 tmpl.extent_item_refs = refs;
8385                 tmpl.metadata = metadata;
8386                 tmpl.found_rec = 1;
8387                 tmpl.max_size = num_bytes;
8388
8389                 return add_extent_rec(extent_cache, &tmpl);
8390         }
8391
8392         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8393         refs = btrfs_extent_refs(eb, ei);
8394         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8395                 metadata = 1;
8396         else
8397                 metadata = 0;
8398         if (metadata && num_bytes != root->fs_info->nodesize) {
8399                 error("ignore invalid metadata extent, length %llu does not equal to %u",
8400                       num_bytes, root->fs_info->nodesize);
8401                 return -EIO;
8402         }
8403         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8404                 error("ignore invalid data extent, length %llu is not aligned to %u",
8405                       num_bytes, root->fs_info->sectorsize);
8406                 return -EIO;
8407         }
8408
8409         memset(&tmpl, 0, sizeof(tmpl));
8410         tmpl.start = key.objectid;
8411         tmpl.nr = num_bytes;
8412         tmpl.extent_item_refs = refs;
8413         tmpl.metadata = metadata;
8414         tmpl.found_rec = 1;
8415         tmpl.max_size = num_bytes;
8416         add_extent_rec(extent_cache, &tmpl);
8417
8418         ptr = (unsigned long)(ei + 1);
8419         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8420             key.type == BTRFS_EXTENT_ITEM_KEY)
8421                 ptr += sizeof(struct btrfs_tree_block_info);
8422
8423         end = (unsigned long)ei + item_size;
8424         while (ptr < end) {
8425                 iref = (struct btrfs_extent_inline_ref *)ptr;
8426                 type = btrfs_extent_inline_ref_type(eb, iref);
8427                 offset = btrfs_extent_inline_ref_offset(eb, iref);
8428                 switch (type) {
8429                 case BTRFS_TREE_BLOCK_REF_KEY:
8430                         ret = add_tree_backref(extent_cache, key.objectid,
8431                                         0, offset, 0);
8432                         if (ret < 0)
8433                                 error(
8434                         "add_tree_backref failed (extent items tree block): %s",
8435                                       strerror(-ret));
8436                         break;
8437                 case BTRFS_SHARED_BLOCK_REF_KEY:
8438                         ret = add_tree_backref(extent_cache, key.objectid,
8439                                         offset, 0, 0);
8440                         if (ret < 0)
8441                                 error(
8442                         "add_tree_backref failed (extent items shared block): %s",
8443                                       strerror(-ret));
8444                         break;
8445                 case BTRFS_EXTENT_DATA_REF_KEY:
8446                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8447                         add_data_backref(extent_cache, key.objectid, 0,
8448                                         btrfs_extent_data_ref_root(eb, dref),
8449                                         btrfs_extent_data_ref_objectid(eb,
8450                                                                        dref),
8451                                         btrfs_extent_data_ref_offset(eb, dref),
8452                                         btrfs_extent_data_ref_count(eb, dref),
8453                                         0, num_bytes);
8454                         break;
8455                 case BTRFS_SHARED_DATA_REF_KEY:
8456                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
8457                         add_data_backref(extent_cache, key.objectid, offset,
8458                                         0, 0, 0,
8459                                         btrfs_shared_data_ref_count(eb, sref),
8460                                         0, num_bytes);
8461                         break;
8462                 default:
8463                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8464                                 key.objectid, key.type, num_bytes);
8465                         goto out;
8466                 }
8467                 ptr += btrfs_extent_inline_ref_size(type);
8468         }
8469         WARN_ON(ptr > end);
8470 out:
8471         return 0;
8472 }
8473
8474 static int check_cache_range(struct btrfs_root *root,
8475                              struct btrfs_block_group_cache *cache,
8476                              u64 offset, u64 bytes)
8477 {
8478         struct btrfs_free_space *entry;
8479         u64 *logical;
8480         u64 bytenr;
8481         int stripe_len;
8482         int i, nr, ret;
8483
8484         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8485                 bytenr = btrfs_sb_offset(i);
8486                 ret = btrfs_rmap_block(root->fs_info,
8487                                        cache->key.objectid, bytenr, 0,
8488                                        &logical, &nr, &stripe_len);
8489                 if (ret)
8490                         return ret;
8491
8492                 while (nr--) {
8493                         if (logical[nr] + stripe_len <= offset)
8494                                 continue;
8495                         if (offset + bytes <= logical[nr])
8496                                 continue;
8497                         if (logical[nr] == offset) {
8498                                 if (stripe_len >= bytes) {
8499                                         free(logical);
8500                                         return 0;
8501                                 }
8502                                 bytes -= stripe_len;
8503                                 offset += stripe_len;
8504                         } else if (logical[nr] < offset) {
8505                                 if (logical[nr] + stripe_len >=
8506                                     offset + bytes) {
8507                                         free(logical);
8508                                         return 0;
8509                                 }
8510                                 bytes = (offset + bytes) -
8511                                         (logical[nr] + stripe_len);
8512                                 offset = logical[nr] + stripe_len;
8513                         } else {
8514                                 /*
8515                                  * Could be tricky, the super may land in the
8516                                  * middle of the area we're checking.  First
8517                                  * check the easiest case, it's at the end.
8518                                  */
8519                                 if (logical[nr] + stripe_len >=
8520                                     bytes + offset) {
8521                                         bytes = logical[nr] - offset;
8522                                         continue;
8523                                 }
8524
8525                                 /* Check the left side */
8526                                 ret = check_cache_range(root, cache,
8527                                                         offset,
8528                                                         logical[nr] - offset);
8529                                 if (ret) {
8530                                         free(logical);
8531                                         return ret;
8532                                 }
8533
8534                                 /* Now we continue with the right side */
8535                                 bytes = (offset + bytes) -
8536                                         (logical[nr] + stripe_len);
8537                                 offset = logical[nr] + stripe_len;
8538                         }
8539                 }
8540
8541                 free(logical);
8542         }
8543
8544         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8545         if (!entry) {
8546                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8547                         offset, offset+bytes);
8548                 return -EINVAL;
8549         }
8550
8551         if (entry->offset != offset) {
8552                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8553                         entry->offset);
8554                 return -EINVAL;
8555         }
8556
8557         if (entry->bytes != bytes) {
8558                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8559                         bytes, entry->bytes, offset);
8560                 return -EINVAL;
8561         }
8562
8563         unlink_free_space(cache->free_space_ctl, entry);
8564         free(entry);
8565         return 0;
8566 }
8567
8568 static int verify_space_cache(struct btrfs_root *root,
8569                               struct btrfs_block_group_cache *cache)
8570 {
8571         struct btrfs_path path;
8572         struct extent_buffer *leaf;
8573         struct btrfs_key key;
8574         u64 last;
8575         int ret = 0;
8576
8577         root = root->fs_info->extent_root;
8578
8579         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8580
8581         btrfs_init_path(&path);
8582         key.objectid = last;
8583         key.offset = 0;
8584         key.type = BTRFS_EXTENT_ITEM_KEY;
8585         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8586         if (ret < 0)
8587                 goto out;
8588         ret = 0;
8589         while (1) {
8590                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8591                         ret = btrfs_next_leaf(root, &path);
8592                         if (ret < 0)
8593                                 goto out;
8594                         if (ret > 0) {
8595                                 ret = 0;
8596                                 break;
8597                         }
8598                 }
8599                 leaf = path.nodes[0];
8600                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8601                 if (key.objectid >= cache->key.offset + cache->key.objectid)
8602                         break;
8603                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8604                     key.type != BTRFS_METADATA_ITEM_KEY) {
8605                         path.slots[0]++;
8606                         continue;
8607                 }
8608
8609                 if (last == key.objectid) {
8610                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
8611                                 last = key.objectid + key.offset;
8612                         else
8613                                 last = key.objectid + root->fs_info->nodesize;
8614                         path.slots[0]++;
8615                         continue;
8616                 }
8617
8618                 ret = check_cache_range(root, cache, last,
8619                                         key.objectid - last);
8620                 if (ret)
8621                         break;
8622                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8623                         last = key.objectid + key.offset;
8624                 else
8625                         last = key.objectid + root->fs_info->nodesize;
8626                 path.slots[0]++;
8627         }
8628
8629         if (last < cache->key.objectid + cache->key.offset)
8630                 ret = check_cache_range(root, cache, last,
8631                                         cache->key.objectid +
8632                                         cache->key.offset - last);
8633
8634 out:
8635         btrfs_release_path(&path);
8636
8637         if (!ret &&
8638             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8639                 fprintf(stderr, "There are still entries left in the space "
8640                         "cache\n");
8641                 ret = -EINVAL;
8642         }
8643
8644         return ret;
8645 }
8646
8647 static int check_space_cache(struct btrfs_root *root)
8648 {
8649         struct btrfs_block_group_cache *cache;
8650         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8651         int ret;
8652         int error = 0;
8653
8654         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8655             btrfs_super_generation(root->fs_info->super_copy) !=
8656             btrfs_super_cache_generation(root->fs_info->super_copy)) {
8657                 printf("cache and super generation don't match, space cache "
8658                        "will be invalidated\n");
8659                 return 0;
8660         }
8661
8662         if (ctx.progress_enabled) {
8663                 ctx.tp = TASK_FREE_SPACE;
8664                 task_start(ctx.info);
8665         }
8666
8667         while (1) {
8668                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8669                 if (!cache)
8670                         break;
8671
8672                 start = cache->key.objectid + cache->key.offset;
8673                 if (!cache->free_space_ctl) {
8674                         if (btrfs_init_free_space_ctl(cache,
8675                                                 root->fs_info->sectorsize)) {
8676                                 ret = -ENOMEM;
8677                                 break;
8678                         }
8679                 } else {
8680                         btrfs_remove_free_space_cache(cache);
8681                 }
8682
8683                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8684                         ret = exclude_super_stripes(root, cache);
8685                         if (ret) {
8686                                 fprintf(stderr, "could not exclude super stripes: %s\n",
8687                                         strerror(-ret));
8688                                 error++;
8689                                 continue;
8690                         }
8691                         ret = load_free_space_tree(root->fs_info, cache);
8692                         free_excluded_extents(root, cache);
8693                         if (ret < 0) {
8694                                 fprintf(stderr, "could not load free space tree: %s\n",
8695                                         strerror(-ret));
8696                                 error++;
8697                                 continue;
8698                         }
8699                         error += ret;
8700                 } else {
8701                         ret = load_free_space_cache(root->fs_info, cache);
8702                         if (!ret)
8703                                 continue;
8704                 }
8705
8706                 ret = verify_space_cache(root, cache);
8707                 if (ret) {
8708                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8709                                 cache->key.objectid);
8710                         error++;
8711                 }
8712         }
8713
8714         task_stop(ctx.info);
8715
8716         return error ? -EINVAL : 0;
8717 }
8718
8719 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8720                         u64 num_bytes, unsigned long leaf_offset,
8721                         struct extent_buffer *eb) {
8722
8723         struct btrfs_fs_info *fs_info = root->fs_info;
8724         u64 offset = 0;
8725         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8726         char *data;
8727         unsigned long csum_offset;
8728         u32 csum;
8729         u32 csum_expected;
8730         u64 read_len;
8731         u64 data_checked = 0;
8732         u64 tmp;
8733         int ret = 0;
8734         int mirror;
8735         int num_copies;
8736
8737         if (num_bytes % fs_info->sectorsize)
8738                 return -EINVAL;
8739
8740         data = malloc(num_bytes);
8741         if (!data)
8742                 return -ENOMEM;
8743
8744         while (offset < num_bytes) {
8745                 mirror = 0;
8746 again:
8747                 read_len = num_bytes - offset;
8748                 /* read as much space once a time */
8749                 ret = read_extent_data(fs_info, data + offset,
8750                                 bytenr + offset, &read_len, mirror);
8751                 if (ret)
8752                         goto out;
8753                 data_checked = 0;
8754                 /* verify every 4k data's checksum */
8755                 while (data_checked < read_len) {
8756                         csum = ~(u32)0;
8757                         tmp = offset + data_checked;
8758
8759                         csum = btrfs_csum_data((char *)data + tmp,
8760                                                csum, fs_info->sectorsize);
8761                         btrfs_csum_final(csum, (u8 *)&csum);
8762
8763                         csum_offset = leaf_offset +
8764                                  tmp / fs_info->sectorsize * csum_size;
8765                         read_extent_buffer(eb, (char *)&csum_expected,
8766                                            csum_offset, csum_size);
8767                         /* try another mirror */
8768                         if (csum != csum_expected) {
8769                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8770                                                 mirror, bytenr + tmp,
8771                                                 csum, csum_expected);
8772                                 num_copies = btrfs_num_copies(root->fs_info,
8773                                                 bytenr, num_bytes);
8774                                 if (mirror < num_copies - 1) {
8775                                         mirror += 1;
8776                                         goto again;
8777                                 }
8778                         }
8779                         data_checked += fs_info->sectorsize;
8780                 }
8781                 offset += read_len;
8782         }
8783 out:
8784         free(data);
8785         return ret;
8786 }
8787
8788 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8789                                u64 num_bytes)
8790 {
8791         struct btrfs_path path;
8792         struct extent_buffer *leaf;
8793         struct btrfs_key key;
8794         int ret;
8795
8796         btrfs_init_path(&path);
8797         key.objectid = bytenr;
8798         key.type = BTRFS_EXTENT_ITEM_KEY;
8799         key.offset = (u64)-1;
8800
8801 again:
8802         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8803                                 0, 0);
8804         if (ret < 0) {
8805                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8806                 btrfs_release_path(&path);
8807                 return ret;
8808         } else if (ret) {
8809                 if (path.slots[0] > 0) {
8810                         path.slots[0]--;
8811                 } else {
8812                         ret = btrfs_prev_leaf(root, &path);
8813                         if (ret < 0) {
8814                                 goto out;
8815                         } else if (ret > 0) {
8816                                 ret = 0;
8817                                 goto out;
8818                         }
8819                 }
8820         }
8821
8822         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8823
8824         /*
8825          * Block group items come before extent items if they have the same
8826          * bytenr, so walk back one more just in case.  Dear future traveller,
8827          * first congrats on mastering time travel.  Now if it's not too much
8828          * trouble could you go back to 2006 and tell Chris to make the
8829          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8830          * EXTENT_ITEM_KEY please?
8831          */
8832         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8833                 if (path.slots[0] > 0) {
8834                         path.slots[0]--;
8835                 } else {
8836                         ret = btrfs_prev_leaf(root, &path);
8837                         if (ret < 0) {
8838                                 goto out;
8839                         } else if (ret > 0) {
8840                                 ret = 0;
8841                                 goto out;
8842                         }
8843                 }
8844                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8845         }
8846
8847         while (num_bytes) {
8848                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8849                         ret = btrfs_next_leaf(root, &path);
8850                         if (ret < 0) {
8851                                 fprintf(stderr, "Error going to next leaf "
8852                                         "%d\n", ret);
8853                                 btrfs_release_path(&path);
8854                                 return ret;
8855                         } else if (ret) {
8856                                 break;
8857                         }
8858                 }
8859                 leaf = path.nodes[0];
8860                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8861                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8862                         path.slots[0]++;
8863                         continue;
8864                 }
8865                 if (key.objectid + key.offset < bytenr) {
8866                         path.slots[0]++;
8867                         continue;
8868                 }
8869                 if (key.objectid > bytenr + num_bytes)
8870                         break;
8871
8872                 if (key.objectid == bytenr) {
8873                         if (key.offset >= num_bytes) {
8874                                 num_bytes = 0;
8875                                 break;
8876                         }
8877                         num_bytes -= key.offset;
8878                         bytenr += key.offset;
8879                 } else if (key.objectid < bytenr) {
8880                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8881                                 num_bytes = 0;
8882                                 break;
8883                         }
8884                         num_bytes = (bytenr + num_bytes) -
8885                                 (key.objectid + key.offset);
8886                         bytenr = key.objectid + key.offset;
8887                 } else {
8888                         if (key.objectid + key.offset < bytenr + num_bytes) {
8889                                 u64 new_start = key.objectid + key.offset;
8890                                 u64 new_bytes = bytenr + num_bytes - new_start;
8891
8892                                 /*
8893                                  * Weird case, the extent is in the middle of
8894                                  * our range, we'll have to search one side
8895                                  * and then the other.  Not sure if this happens
8896                                  * in real life, but no harm in coding it up
8897                                  * anyway just in case.
8898                                  */
8899                                 btrfs_release_path(&path);
8900                                 ret = check_extent_exists(root, new_start,
8901                                                           new_bytes);
8902                                 if (ret) {
8903                                         fprintf(stderr, "Right section didn't "
8904                                                 "have a record\n");
8905                                         break;
8906                                 }
8907                                 num_bytes = key.objectid - bytenr;
8908                                 goto again;
8909                         }
8910                         num_bytes = key.objectid - bytenr;
8911                 }
8912                 path.slots[0]++;
8913         }
8914         ret = 0;
8915
8916 out:
8917         if (num_bytes && !ret) {
8918                 fprintf(stderr, "There are no extents for csum range "
8919                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8920                 ret = 1;
8921         }
8922
8923         btrfs_release_path(&path);
8924         return ret;
8925 }
8926
8927 static int check_csums(struct btrfs_root *root)
8928 {
8929         struct btrfs_path path;
8930         struct extent_buffer *leaf;
8931         struct btrfs_key key;
8932         u64 offset = 0, num_bytes = 0;
8933         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8934         int errors = 0;
8935         int ret;
8936         u64 data_len;
8937         unsigned long leaf_offset;
8938
8939         root = root->fs_info->csum_root;
8940         if (!extent_buffer_uptodate(root->node)) {
8941                 fprintf(stderr, "No valid csum tree found\n");
8942                 return -ENOENT;
8943         }
8944
8945         btrfs_init_path(&path);
8946         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8947         key.type = BTRFS_EXTENT_CSUM_KEY;
8948         key.offset = 0;
8949         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8950         if (ret < 0) {
8951                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8952                 btrfs_release_path(&path);
8953                 return ret;
8954         }
8955
8956         if (ret > 0 && path.slots[0])
8957                 path.slots[0]--;
8958         ret = 0;
8959
8960         while (1) {
8961                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8962                         ret = btrfs_next_leaf(root, &path);
8963                         if (ret < 0) {
8964                                 fprintf(stderr, "Error going to next leaf "
8965                                         "%d\n", ret);
8966                                 break;
8967                         }
8968                         if (ret)
8969                                 break;
8970                 }
8971                 leaf = path.nodes[0];
8972
8973                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8974                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8975                         path.slots[0]++;
8976                         continue;
8977                 }
8978
8979                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8980                               csum_size) * root->fs_info->sectorsize;
8981                 if (!check_data_csum)
8982                         goto skip_csum_check;
8983                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8984                 ret = check_extent_csums(root, key.offset, data_len,
8985                                          leaf_offset, leaf);
8986                 if (ret)
8987                         break;
8988 skip_csum_check:
8989                 if (!num_bytes) {
8990                         offset = key.offset;
8991                 } else if (key.offset != offset + num_bytes) {
8992                         ret = check_extent_exists(root, offset, num_bytes);
8993                         if (ret) {
8994                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8995                                         "there is no extent record\n",
8996                                         offset, offset+num_bytes);
8997                                 errors++;
8998                         }
8999                         offset = key.offset;
9000                         num_bytes = 0;
9001                 }
9002                 num_bytes += data_len;
9003                 path.slots[0]++;
9004         }
9005
9006         btrfs_release_path(&path);
9007         return errors;
9008 }
9009
9010 static int is_dropped_key(struct btrfs_key *key,
9011                           struct btrfs_key *drop_key) {
9012         if (key->objectid < drop_key->objectid)
9013                 return 1;
9014         else if (key->objectid == drop_key->objectid) {
9015                 if (key->type < drop_key->type)
9016                         return 1;
9017                 else if (key->type == drop_key->type) {
9018                         if (key->offset < drop_key->offset)
9019                                 return 1;
9020                 }
9021         }
9022         return 0;
9023 }
9024
9025 /*
9026  * Here are the rules for FULL_BACKREF.
9027  *
9028  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
9029  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
9030  *      FULL_BACKREF set.
9031  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
9032  *    if it happened after the relocation occurred since we'll have dropped the
9033  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
9034  *    have no real way to know for sure.
9035  *
9036  * We process the blocks one root at a time, and we start from the lowest root
9037  * objectid and go to the highest.  So we can just lookup the owner backref for
9038  * the record and if we don't find it then we know it doesn't exist and we have
9039  * a FULL BACKREF.
9040  *
9041  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
9042  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
9043  * be set or not and then we can check later once we've gathered all the refs.
9044  */
9045 static int calc_extent_flag(struct cache_tree *extent_cache,
9046                            struct extent_buffer *buf,
9047                            struct root_item_record *ri,
9048                            u64 *flags)
9049 {
9050         struct extent_record *rec;
9051         struct cache_extent *cache;
9052         struct tree_backref *tback;
9053         u64 owner = 0;
9054
9055         cache = lookup_cache_extent(extent_cache, buf->start, 1);
9056         /* we have added this extent before */
9057         if (!cache)
9058                 return -ENOENT;
9059
9060         rec = container_of(cache, struct extent_record, cache);
9061
9062         /*
9063          * Except file/reloc tree, we can not have
9064          * FULL BACKREF MODE
9065          */
9066         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
9067                 goto normal;
9068         /*
9069          * root node
9070          */
9071         if (buf->start == ri->bytenr)
9072                 goto normal;
9073
9074         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
9075                 goto full_backref;
9076
9077         owner = btrfs_header_owner(buf);
9078         if (owner == ri->objectid)
9079                 goto normal;
9080
9081         tback = find_tree_backref(rec, 0, owner);
9082         if (!tback)
9083                 goto full_backref;
9084 normal:
9085         *flags = 0;
9086         if (rec->flag_block_full_backref != FLAG_UNSET &&
9087             rec->flag_block_full_backref != 0)
9088                 rec->bad_full_backref = 1;
9089         return 0;
9090 full_backref:
9091         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9092         if (rec->flag_block_full_backref != FLAG_UNSET &&
9093             rec->flag_block_full_backref != 1)
9094                 rec->bad_full_backref = 1;
9095         return 0;
9096 }
9097
9098 static void report_mismatch_key_root(u8 key_type, u64 rootid)
9099 {
9100         fprintf(stderr, "Invalid key type(");
9101         print_key_type(stderr, 0, key_type);
9102         fprintf(stderr, ") found in root(");
9103         print_objectid(stderr, rootid, 0);
9104         fprintf(stderr, ")\n");
9105 }
9106
9107 /*
9108  * Check if the key is valid with its extent buffer.
9109  *
9110  * This is a early check in case invalid key exists in a extent buffer
9111  * This is not comprehensive yet, but should prevent wrong key/item passed
9112  * further
9113  */
9114 static int check_type_with_root(u64 rootid, u8 key_type)
9115 {
9116         switch (key_type) {
9117         /* Only valid in chunk tree */
9118         case BTRFS_DEV_ITEM_KEY:
9119         case BTRFS_CHUNK_ITEM_KEY:
9120                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
9121                         goto err;
9122                 break;
9123         /* valid in csum and log tree */
9124         case BTRFS_CSUM_TREE_OBJECTID:
9125                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
9126                       is_fstree(rootid)))
9127                         goto err;
9128                 break;
9129         case BTRFS_EXTENT_ITEM_KEY:
9130         case BTRFS_METADATA_ITEM_KEY:
9131         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9132                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
9133                         goto err;
9134                 break;
9135         case BTRFS_ROOT_ITEM_KEY:
9136                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
9137                         goto err;
9138                 break;
9139         case BTRFS_DEV_EXTENT_KEY:
9140                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
9141                         goto err;
9142                 break;
9143         }
9144         return 0;
9145 err:
9146         report_mismatch_key_root(key_type, rootid);
9147         return -EINVAL;
9148 }
9149
9150 static int run_next_block(struct btrfs_root *root,
9151                           struct block_info *bits,
9152                           int bits_nr,
9153                           u64 *last,
9154                           struct cache_tree *pending,
9155                           struct cache_tree *seen,
9156                           struct cache_tree *reada,
9157                           struct cache_tree *nodes,
9158                           struct cache_tree *extent_cache,
9159                           struct cache_tree *chunk_cache,
9160                           struct rb_root *dev_cache,
9161                           struct block_group_tree *block_group_cache,
9162                           struct device_extent_tree *dev_extent_cache,
9163                           struct root_item_record *ri)
9164 {
9165         struct btrfs_fs_info *fs_info = root->fs_info;
9166         struct extent_buffer *buf;
9167         struct extent_record *rec = NULL;
9168         u64 bytenr;
9169         u32 size;
9170         u64 parent;
9171         u64 owner;
9172         u64 flags;
9173         u64 ptr;
9174         u64 gen = 0;
9175         int ret = 0;
9176         int i;
9177         int nritems;
9178         struct btrfs_key key;
9179         struct cache_extent *cache;
9180         int reada_bits;
9181
9182         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
9183                                     bits_nr, &reada_bits);
9184         if (nritems == 0)
9185                 return 1;
9186
9187         if (!reada_bits) {
9188                 for(i = 0; i < nritems; i++) {
9189                         ret = add_cache_extent(reada, bits[i].start,
9190                                                bits[i].size);
9191                         if (ret == -EEXIST)
9192                                 continue;
9193
9194                         /* fixme, get the parent transid */
9195                         readahead_tree_block(fs_info, bits[i].start, 0);
9196                 }
9197         }
9198         *last = bits[0].start;
9199         bytenr = bits[0].start;
9200         size = bits[0].size;
9201
9202         cache = lookup_cache_extent(pending, bytenr, size);
9203         if (cache) {
9204                 remove_cache_extent(pending, cache);
9205                 free(cache);
9206         }
9207         cache = lookup_cache_extent(reada, bytenr, size);
9208         if (cache) {
9209                 remove_cache_extent(reada, cache);
9210                 free(cache);
9211         }
9212         cache = lookup_cache_extent(nodes, bytenr, size);
9213         if (cache) {
9214                 remove_cache_extent(nodes, cache);
9215                 free(cache);
9216         }
9217         cache = lookup_cache_extent(extent_cache, bytenr, size);
9218         if (cache) {
9219                 rec = container_of(cache, struct extent_record, cache);
9220                 gen = rec->parent_generation;
9221         }
9222
9223         /* fixme, get the real parent transid */
9224         buf = read_tree_block(root->fs_info, bytenr, gen);
9225         if (!extent_buffer_uptodate(buf)) {
9226                 record_bad_block_io(root->fs_info,
9227                                     extent_cache, bytenr, size);
9228                 goto out;
9229         }
9230
9231         nritems = btrfs_header_nritems(buf);
9232
9233         flags = 0;
9234         if (!init_extent_tree) {
9235                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
9236                                        btrfs_header_level(buf), 1, NULL,
9237                                        &flags);
9238                 if (ret < 0) {
9239                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9240                         if (ret < 0) {
9241                                 fprintf(stderr, "Couldn't calc extent flags\n");
9242                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9243                         }
9244                 }
9245         } else {
9246                 flags = 0;
9247                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9248                 if (ret < 0) {
9249                         fprintf(stderr, "Couldn't calc extent flags\n");
9250                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9251                 }
9252         }
9253
9254         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9255                 if (ri != NULL &&
9256                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
9257                     ri->objectid == btrfs_header_owner(buf)) {
9258                         /*
9259                          * Ok we got to this block from it's original owner and
9260                          * we have FULL_BACKREF set.  Relocation can leave
9261                          * converted blocks over so this is altogether possible,
9262                          * however it's not possible if the generation > the
9263                          * last snapshot, so check for this case.
9264                          */
9265                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
9266                             btrfs_header_generation(buf) > ri->last_snapshot) {
9267                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9268                                 rec->bad_full_backref = 1;
9269                         }
9270                 }
9271         } else {
9272                 if (ri != NULL &&
9273                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
9274                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
9275                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9276                         rec->bad_full_backref = 1;
9277                 }
9278         }
9279
9280         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9281                 rec->flag_block_full_backref = 1;
9282                 parent = bytenr;
9283                 owner = 0;
9284         } else {
9285                 rec->flag_block_full_backref = 0;
9286                 parent = 0;
9287                 owner = btrfs_header_owner(buf);
9288         }
9289
9290         ret = check_block(root, extent_cache, buf, flags);
9291         if (ret)
9292                 goto out;
9293
9294         if (btrfs_is_leaf(buf)) {
9295                 btree_space_waste += btrfs_leaf_free_space(root, buf);
9296                 for (i = 0; i < nritems; i++) {
9297                         struct btrfs_file_extent_item *fi;
9298                         btrfs_item_key_to_cpu(buf, &key, i);
9299                         /*
9300                          * Check key type against the leaf owner.
9301                          * Could filter quite a lot of early error if
9302                          * owner is correct
9303                          */
9304                         if (check_type_with_root(btrfs_header_owner(buf),
9305                                                  key.type)) {
9306                                 fprintf(stderr, "ignoring invalid key\n");
9307                                 continue;
9308                         }
9309                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
9310                                 process_extent_item(root, extent_cache, buf,
9311                                                     i);
9312                                 continue;
9313                         }
9314                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9315                                 process_extent_item(root, extent_cache, buf,
9316                                                     i);
9317                                 continue;
9318                         }
9319                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
9320                                 total_csum_bytes +=
9321                                         btrfs_item_size_nr(buf, i);
9322                                 continue;
9323                         }
9324                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
9325                                 process_chunk_item(chunk_cache, &key, buf, i);
9326                                 continue;
9327                         }
9328                         if (key.type == BTRFS_DEV_ITEM_KEY) {
9329                                 process_device_item(dev_cache, &key, buf, i);
9330                                 continue;
9331                         }
9332                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9333                                 process_block_group_item(block_group_cache,
9334                                         &key, buf, i);
9335                                 continue;
9336                         }
9337                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
9338                                 process_device_extent_item(dev_extent_cache,
9339                                         &key, buf, i);
9340                                 continue;
9341
9342                         }
9343                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9344 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9345                                 process_extent_ref_v0(extent_cache, buf, i);
9346 #else
9347                                 BUG();
9348 #endif
9349                                 continue;
9350                         }
9351
9352                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9353                                 ret = add_tree_backref(extent_cache,
9354                                                 key.objectid, 0, key.offset, 0);
9355                                 if (ret < 0)
9356                                         error(
9357                                 "add_tree_backref failed (leaf tree block): %s",
9358                                               strerror(-ret));
9359                                 continue;
9360                         }
9361                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9362                                 ret = add_tree_backref(extent_cache,
9363                                                 key.objectid, key.offset, 0, 0);
9364                                 if (ret < 0)
9365                                         error(
9366                                 "add_tree_backref failed (leaf shared block): %s",
9367                                               strerror(-ret));
9368                                 continue;
9369                         }
9370                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9371                                 struct btrfs_extent_data_ref *ref;
9372                                 ref = btrfs_item_ptr(buf, i,
9373                                                 struct btrfs_extent_data_ref);
9374                                 add_data_backref(extent_cache,
9375                                         key.objectid, 0,
9376                                         btrfs_extent_data_ref_root(buf, ref),
9377                                         btrfs_extent_data_ref_objectid(buf,
9378                                                                        ref),
9379                                         btrfs_extent_data_ref_offset(buf, ref),
9380                                         btrfs_extent_data_ref_count(buf, ref),
9381                                         0, root->fs_info->sectorsize);
9382                                 continue;
9383                         }
9384                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9385                                 struct btrfs_shared_data_ref *ref;
9386                                 ref = btrfs_item_ptr(buf, i,
9387                                                 struct btrfs_shared_data_ref);
9388                                 add_data_backref(extent_cache,
9389                                         key.objectid, key.offset, 0, 0, 0,
9390                                         btrfs_shared_data_ref_count(buf, ref),
9391                                         0, root->fs_info->sectorsize);
9392                                 continue;
9393                         }
9394                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9395                                 struct bad_item *bad;
9396
9397                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9398                                         continue;
9399                                 if (!owner)
9400                                         continue;
9401                                 bad = malloc(sizeof(struct bad_item));
9402                                 if (!bad)
9403                                         continue;
9404                                 INIT_LIST_HEAD(&bad->list);
9405                                 memcpy(&bad->key, &key,
9406                                        sizeof(struct btrfs_key));
9407                                 bad->root_id = owner;
9408                                 list_add_tail(&bad->list, &delete_items);
9409                                 continue;
9410                         }
9411                         if (key.type != BTRFS_EXTENT_DATA_KEY)
9412                                 continue;
9413                         fi = btrfs_item_ptr(buf, i,
9414                                             struct btrfs_file_extent_item);
9415                         if (btrfs_file_extent_type(buf, fi) ==
9416                             BTRFS_FILE_EXTENT_INLINE)
9417                                 continue;
9418                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9419                                 continue;
9420
9421                         data_bytes_allocated +=
9422                                 btrfs_file_extent_disk_num_bytes(buf, fi);
9423                         if (data_bytes_allocated < root->fs_info->sectorsize) {
9424                                 abort();
9425                         }
9426                         data_bytes_referenced +=
9427                                 btrfs_file_extent_num_bytes(buf, fi);
9428                         add_data_backref(extent_cache,
9429                                 btrfs_file_extent_disk_bytenr(buf, fi),
9430                                 parent, owner, key.objectid, key.offset -
9431                                 btrfs_file_extent_offset(buf, fi), 1, 1,
9432                                 btrfs_file_extent_disk_num_bytes(buf, fi));
9433                 }
9434         } else {
9435                 int level;
9436                 struct btrfs_key first_key;
9437
9438                 first_key.objectid = 0;
9439
9440                 if (nritems > 0)
9441                         btrfs_item_key_to_cpu(buf, &first_key, 0);
9442                 level = btrfs_header_level(buf);
9443                 for (i = 0; i < nritems; i++) {
9444                         struct extent_record tmpl;
9445
9446                         ptr = btrfs_node_blockptr(buf, i);
9447                         size = root->fs_info->nodesize;
9448                         btrfs_node_key_to_cpu(buf, &key, i);
9449                         if (ri != NULL) {
9450                                 if ((level == ri->drop_level)
9451                                     && is_dropped_key(&key, &ri->drop_key)) {
9452                                         continue;
9453                                 }
9454                         }
9455
9456                         memset(&tmpl, 0, sizeof(tmpl));
9457                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9458                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9459                         tmpl.start = ptr;
9460                         tmpl.nr = size;
9461                         tmpl.refs = 1;
9462                         tmpl.metadata = 1;
9463                         tmpl.max_size = size;
9464                         ret = add_extent_rec(extent_cache, &tmpl);
9465                         if (ret < 0)
9466                                 goto out;
9467
9468                         ret = add_tree_backref(extent_cache, ptr, parent,
9469                                         owner, 1);
9470                         if (ret < 0) {
9471                                 error(
9472                                 "add_tree_backref failed (non-leaf block): %s",
9473                                       strerror(-ret));
9474                                 continue;
9475                         }
9476
9477                         if (level > 1) {
9478                                 add_pending(nodes, seen, ptr, size);
9479                         } else {
9480                                 add_pending(pending, seen, ptr, size);
9481                         }
9482                 }
9483                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
9484                                       nritems) * sizeof(struct btrfs_key_ptr);
9485         }
9486         total_btree_bytes += buf->len;
9487         if (fs_root_objectid(btrfs_header_owner(buf)))
9488                 total_fs_tree_bytes += buf->len;
9489         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9490                 total_extent_tree_bytes += buf->len;
9491 out:
9492         free_extent_buffer(buf);
9493         return ret;
9494 }
9495
9496 static int add_root_to_pending(struct extent_buffer *buf,
9497                                struct cache_tree *extent_cache,
9498                                struct cache_tree *pending,
9499                                struct cache_tree *seen,
9500                                struct cache_tree *nodes,
9501                                u64 objectid)
9502 {
9503         struct extent_record tmpl;
9504         int ret;
9505
9506         if (btrfs_header_level(buf) > 0)
9507                 add_pending(nodes, seen, buf->start, buf->len);
9508         else
9509                 add_pending(pending, seen, buf->start, buf->len);
9510
9511         memset(&tmpl, 0, sizeof(tmpl));
9512         tmpl.start = buf->start;
9513         tmpl.nr = buf->len;
9514         tmpl.is_root = 1;
9515         tmpl.refs = 1;
9516         tmpl.metadata = 1;
9517         tmpl.max_size = buf->len;
9518         add_extent_rec(extent_cache, &tmpl);
9519
9520         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9521             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9522                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9523                                 0, 1);
9524         else
9525                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9526                                 1);
9527         return ret;
9528 }
9529
9530 /* as we fix the tree, we might be deleting blocks that
9531  * we're tracking for repair.  This hook makes sure we
9532  * remove any backrefs for blocks as we are fixing them.
9533  */
9534 static int free_extent_hook(struct btrfs_trans_handle *trans,
9535                             struct btrfs_root *root,
9536                             u64 bytenr, u64 num_bytes, u64 parent,
9537                             u64 root_objectid, u64 owner, u64 offset,
9538                             int refs_to_drop)
9539 {
9540         struct extent_record *rec;
9541         struct cache_extent *cache;
9542         int is_data;
9543         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9544
9545         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9546         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9547         if (!cache)
9548                 return 0;
9549
9550         rec = container_of(cache, struct extent_record, cache);
9551         if (is_data) {
9552                 struct data_backref *back;
9553                 back = find_data_backref(rec, parent, root_objectid, owner,
9554                                          offset, 1, bytenr, num_bytes);
9555                 if (!back)
9556                         goto out;
9557                 if (back->node.found_ref) {
9558                         back->found_ref -= refs_to_drop;
9559                         if (rec->refs)
9560                                 rec->refs -= refs_to_drop;
9561                 }
9562                 if (back->node.found_extent_tree) {
9563                         back->num_refs -= refs_to_drop;
9564                         if (rec->extent_item_refs)
9565                                 rec->extent_item_refs -= refs_to_drop;
9566                 }
9567                 if (back->found_ref == 0)
9568                         back->node.found_ref = 0;
9569                 if (back->num_refs == 0)
9570                         back->node.found_extent_tree = 0;
9571
9572                 if (!back->node.found_extent_tree && back->node.found_ref) {
9573                         rb_erase(&back->node.node, &rec->backref_tree);
9574                         free(back);
9575                 }
9576         } else {
9577                 struct tree_backref *back;
9578                 back = find_tree_backref(rec, parent, root_objectid);
9579                 if (!back)
9580                         goto out;
9581                 if (back->node.found_ref) {
9582                         if (rec->refs)
9583                                 rec->refs--;
9584                         back->node.found_ref = 0;
9585                 }
9586                 if (back->node.found_extent_tree) {
9587                         if (rec->extent_item_refs)
9588                                 rec->extent_item_refs--;
9589                         back->node.found_extent_tree = 0;
9590                 }
9591                 if (!back->node.found_extent_tree && back->node.found_ref) {
9592                         rb_erase(&back->node.node, &rec->backref_tree);
9593                         free(back);
9594                 }
9595         }
9596         maybe_free_extent_rec(extent_cache, rec);
9597 out:
9598         return 0;
9599 }
9600
9601 static int delete_extent_records(struct btrfs_trans_handle *trans,
9602                                  struct btrfs_root *root,
9603                                  struct btrfs_path *path,
9604                                  u64 bytenr)
9605 {
9606         struct btrfs_key key;
9607         struct btrfs_key found_key;
9608         struct extent_buffer *leaf;
9609         int ret;
9610         int slot;
9611
9612
9613         key.objectid = bytenr;
9614         key.type = (u8)-1;
9615         key.offset = (u64)-1;
9616
9617         while(1) {
9618                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9619                                         &key, path, 0, 1);
9620                 if (ret < 0)
9621                         break;
9622
9623                 if (ret > 0) {
9624                         ret = 0;
9625                         if (path->slots[0] == 0)
9626                                 break;
9627                         path->slots[0]--;
9628                 }
9629                 ret = 0;
9630
9631                 leaf = path->nodes[0];
9632                 slot = path->slots[0];
9633
9634                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9635                 if (found_key.objectid != bytenr)
9636                         break;
9637
9638                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9639                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
9640                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9641                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9642                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9643                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9644                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9645                         btrfs_release_path(path);
9646                         if (found_key.type == 0) {
9647                                 if (found_key.offset == 0)
9648                                         break;
9649                                 key.offset = found_key.offset - 1;
9650                                 key.type = found_key.type;
9651                         }
9652                         key.type = found_key.type - 1;
9653                         key.offset = (u64)-1;
9654                         continue;
9655                 }
9656
9657                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9658                         found_key.objectid, found_key.type, found_key.offset);
9659
9660                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9661                 if (ret)
9662                         break;
9663                 btrfs_release_path(path);
9664
9665                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9666                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
9667                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9668                                 found_key.offset : root->fs_info->nodesize;
9669
9670                         ret = btrfs_update_block_group(root, bytenr,
9671                                                        bytes, 0, 0);
9672                         if (ret)
9673                                 break;
9674                 }
9675         }
9676
9677         btrfs_release_path(path);
9678         return ret;
9679 }
9680
9681 /*
9682  * for a single backref, this will allocate a new extent
9683  * and add the backref to it.
9684  */
9685 static int record_extent(struct btrfs_trans_handle *trans,
9686                          struct btrfs_fs_info *info,
9687                          struct btrfs_path *path,
9688                          struct extent_record *rec,
9689                          struct extent_backref *back,
9690                          int allocated, u64 flags)
9691 {
9692         int ret = 0;
9693         struct btrfs_root *extent_root = info->extent_root;
9694         struct extent_buffer *leaf;
9695         struct btrfs_key ins_key;
9696         struct btrfs_extent_item *ei;
9697         struct data_backref *dback;
9698         struct btrfs_tree_block_info *bi;
9699
9700         if (!back->is_data)
9701                 rec->max_size = max_t(u64, rec->max_size,
9702                                     info->nodesize);
9703
9704         if (!allocated) {
9705                 u32 item_size = sizeof(*ei);
9706
9707                 if (!back->is_data)
9708                         item_size += sizeof(*bi);
9709
9710                 ins_key.objectid = rec->start;
9711                 ins_key.offset = rec->max_size;
9712                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9713
9714                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9715                                         &ins_key, item_size);
9716                 if (ret)
9717                         goto fail;
9718
9719                 leaf = path->nodes[0];
9720                 ei = btrfs_item_ptr(leaf, path->slots[0],
9721                                     struct btrfs_extent_item);
9722
9723                 btrfs_set_extent_refs(leaf, ei, 0);
9724                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9725
9726                 if (back->is_data) {
9727                         btrfs_set_extent_flags(leaf, ei,
9728                                                BTRFS_EXTENT_FLAG_DATA);
9729                 } else {
9730                         struct btrfs_disk_key copy_key;;
9731
9732                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9733                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9734                                              sizeof(*bi));
9735
9736                         btrfs_set_disk_key_objectid(&copy_key,
9737                                                     rec->info_objectid);
9738                         btrfs_set_disk_key_type(&copy_key, 0);
9739                         btrfs_set_disk_key_offset(&copy_key, 0);
9740
9741                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9742                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9743
9744                         btrfs_set_extent_flags(leaf, ei,
9745                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9746                 }
9747
9748                 btrfs_mark_buffer_dirty(leaf);
9749                 ret = btrfs_update_block_group(extent_root, rec->start,
9750                                                rec->max_size, 1, 0);
9751                 if (ret)
9752                         goto fail;
9753                 btrfs_release_path(path);
9754         }
9755
9756         if (back->is_data) {
9757                 u64 parent;
9758                 int i;
9759
9760                 dback = to_data_backref(back);
9761                 if (back->full_backref)
9762                         parent = dback->parent;
9763                 else
9764                         parent = 0;
9765
9766                 for (i = 0; i < dback->found_ref; i++) {
9767                         /* if parent != 0, we're doing a full backref
9768                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9769                          * just makes the backref allocator create a data
9770                          * backref
9771                          */
9772                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9773                                                    rec->start, rec->max_size,
9774                                                    parent,
9775                                                    dback->root,
9776                                                    parent ?
9777                                                    BTRFS_FIRST_FREE_OBJECTID :
9778                                                    dback->owner,
9779                                                    dback->offset);
9780                         if (ret)
9781                                 break;
9782                 }
9783                 fprintf(stderr, "adding new data backref"
9784                                 " on %llu %s %llu owner %llu"
9785                                 " offset %llu found %d\n",
9786                                 (unsigned long long)rec->start,
9787                                 back->full_backref ?
9788                                 "parent" : "root",
9789                                 back->full_backref ?
9790                                 (unsigned long long)parent :
9791                                 (unsigned long long)dback->root,
9792                                 (unsigned long long)dback->owner,
9793                                 (unsigned long long)dback->offset,
9794                                 dback->found_ref);
9795         } else {
9796                 u64 parent;
9797                 struct tree_backref *tback;
9798
9799                 tback = to_tree_backref(back);
9800                 if (back->full_backref)
9801                         parent = tback->parent;
9802                 else
9803                         parent = 0;
9804
9805                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9806                                            rec->start, rec->max_size,
9807                                            parent, tback->root, 0, 0);
9808                 fprintf(stderr, "adding new tree backref on "
9809                         "start %llu len %llu parent %llu root %llu\n",
9810                         rec->start, rec->max_size, parent, tback->root);
9811         }
9812 fail:
9813         btrfs_release_path(path);
9814         return ret;
9815 }
9816
9817 static struct extent_entry *find_entry(struct list_head *entries,
9818                                        u64 bytenr, u64 bytes)
9819 {
9820         struct extent_entry *entry = NULL;
9821
9822         list_for_each_entry(entry, entries, list) {
9823                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9824                         return entry;
9825         }
9826
9827         return NULL;
9828 }
9829
9830 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9831 {
9832         struct extent_entry *entry, *best = NULL, *prev = NULL;
9833
9834         list_for_each_entry(entry, entries, list) {
9835                 /*
9836                  * If there are as many broken entries as entries then we know
9837                  * not to trust this particular entry.
9838                  */
9839                 if (entry->broken == entry->count)
9840                         continue;
9841
9842                 /*
9843                  * Special case, when there are only two entries and 'best' is
9844                  * the first one
9845                  */
9846                 if (!prev) {
9847                         best = entry;
9848                         prev = entry;
9849                         continue;
9850                 }
9851
9852                 /*
9853                  * If our current entry == best then we can't be sure our best
9854                  * is really the best, so we need to keep searching.
9855                  */
9856                 if (best && best->count == entry->count) {
9857                         prev = entry;
9858                         best = NULL;
9859                         continue;
9860                 }
9861
9862                 /* Prev == entry, not good enough, have to keep searching */
9863                 if (!prev->broken && prev->count == entry->count)
9864                         continue;
9865
9866                 if (!best)
9867                         best = (prev->count > entry->count) ? prev : entry;
9868                 else if (best->count < entry->count)
9869                         best = entry;
9870                 prev = entry;
9871         }
9872
9873         return best;
9874 }
9875
9876 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9877                       struct data_backref *dback, struct extent_entry *entry)
9878 {
9879         struct btrfs_trans_handle *trans;
9880         struct btrfs_root *root;
9881         struct btrfs_file_extent_item *fi;
9882         struct extent_buffer *leaf;
9883         struct btrfs_key key;
9884         u64 bytenr, bytes;
9885         int ret, err;
9886
9887         key.objectid = dback->root;
9888         key.type = BTRFS_ROOT_ITEM_KEY;
9889         key.offset = (u64)-1;
9890         root = btrfs_read_fs_root(info, &key);
9891         if (IS_ERR(root)) {
9892                 fprintf(stderr, "Couldn't find root for our ref\n");
9893                 return -EINVAL;
9894         }
9895
9896         /*
9897          * The backref points to the original offset of the extent if it was
9898          * split, so we need to search down to the offset we have and then walk
9899          * forward until we find the backref we're looking for.
9900          */
9901         key.objectid = dback->owner;
9902         key.type = BTRFS_EXTENT_DATA_KEY;
9903         key.offset = dback->offset;
9904         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9905         if (ret < 0) {
9906                 fprintf(stderr, "Error looking up ref %d\n", ret);
9907                 return ret;
9908         }
9909
9910         while (1) {
9911                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9912                         ret = btrfs_next_leaf(root, path);
9913                         if (ret) {
9914                                 fprintf(stderr, "Couldn't find our ref, next\n");
9915                                 return -EINVAL;
9916                         }
9917                 }
9918                 leaf = path->nodes[0];
9919                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9920                 if (key.objectid != dback->owner ||
9921                     key.type != BTRFS_EXTENT_DATA_KEY) {
9922                         fprintf(stderr, "Couldn't find our ref, search\n");
9923                         return -EINVAL;
9924                 }
9925                 fi = btrfs_item_ptr(leaf, path->slots[0],
9926                                     struct btrfs_file_extent_item);
9927                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9928                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9929
9930                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9931                         break;
9932                 path->slots[0]++;
9933         }
9934
9935         btrfs_release_path(path);
9936
9937         trans = btrfs_start_transaction(root, 1);
9938         if (IS_ERR(trans))
9939                 return PTR_ERR(trans);
9940
9941         /*
9942          * Ok we have the key of the file extent we want to fix, now we can cow
9943          * down to the thing and fix it.
9944          */
9945         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9946         if (ret < 0) {
9947                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9948                         key.objectid, key.type, key.offset, ret);
9949                 goto out;
9950         }
9951         if (ret > 0) {
9952                 fprintf(stderr, "Well that's odd, we just found this key "
9953                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9954                         key.offset);
9955                 ret = -EINVAL;
9956                 goto out;
9957         }
9958         leaf = path->nodes[0];
9959         fi = btrfs_item_ptr(leaf, path->slots[0],
9960                             struct btrfs_file_extent_item);
9961
9962         if (btrfs_file_extent_compression(leaf, fi) &&
9963             dback->disk_bytenr != entry->bytenr) {
9964                 fprintf(stderr, "Ref doesn't match the record start and is "
9965                         "compressed, please take a btrfs-image of this file "
9966                         "system and send it to a btrfs developer so they can "
9967                         "complete this functionality for bytenr %Lu\n",
9968                         dback->disk_bytenr);
9969                 ret = -EINVAL;
9970                 goto out;
9971         }
9972
9973         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9974                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9975         } else if (dback->disk_bytenr > entry->bytenr) {
9976                 u64 off_diff, offset;
9977
9978                 off_diff = dback->disk_bytenr - entry->bytenr;
9979                 offset = btrfs_file_extent_offset(leaf, fi);
9980                 if (dback->disk_bytenr + offset +
9981                     btrfs_file_extent_num_bytes(leaf, fi) >
9982                     entry->bytenr + entry->bytes) {
9983                         fprintf(stderr, "Ref is past the entry end, please "
9984                                 "take a btrfs-image of this file system and "
9985                                 "send it to a btrfs developer, ref %Lu\n",
9986                                 dback->disk_bytenr);
9987                         ret = -EINVAL;
9988                         goto out;
9989                 }
9990                 offset += off_diff;
9991                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9992                 btrfs_set_file_extent_offset(leaf, fi, offset);
9993         } else if (dback->disk_bytenr < entry->bytenr) {
9994                 u64 offset;
9995
9996                 offset = btrfs_file_extent_offset(leaf, fi);
9997                 if (dback->disk_bytenr + offset < entry->bytenr) {
9998                         fprintf(stderr, "Ref is before the entry start, please"
9999                                 " take a btrfs-image of this file system and "
10000                                 "send it to a btrfs developer, ref %Lu\n",
10001                                 dback->disk_bytenr);
10002                         ret = -EINVAL;
10003                         goto out;
10004                 }
10005
10006                 offset += dback->disk_bytenr;
10007                 offset -= entry->bytenr;
10008                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
10009                 btrfs_set_file_extent_offset(leaf, fi, offset);
10010         }
10011
10012         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
10013
10014         /*
10015          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
10016          * only do this if we aren't using compression, otherwise it's a
10017          * trickier case.
10018          */
10019         if (!btrfs_file_extent_compression(leaf, fi))
10020                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
10021         else
10022                 printf("ram bytes may be wrong?\n");
10023         btrfs_mark_buffer_dirty(leaf);
10024 out:
10025         err = btrfs_commit_transaction(trans, root);
10026         btrfs_release_path(path);
10027         return ret ? ret : err;
10028 }
10029
10030 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
10031                            struct extent_record *rec)
10032 {
10033         struct extent_backref *back, *tmp;
10034         struct data_backref *dback;
10035         struct extent_entry *entry, *best = NULL;
10036         LIST_HEAD(entries);
10037         int nr_entries = 0;
10038         int broken_entries = 0;
10039         int ret = 0;
10040         short mismatch = 0;
10041
10042         /*
10043          * Metadata is easy and the backrefs should always agree on bytenr and
10044          * size, if not we've got bigger issues.
10045          */
10046         if (rec->metadata)
10047                 return 0;
10048
10049         rbtree_postorder_for_each_entry_safe(back, tmp,
10050                                              &rec->backref_tree, node) {
10051                 if (back->full_backref || !back->is_data)
10052                         continue;
10053
10054                 dback = to_data_backref(back);
10055
10056                 /*
10057                  * We only pay attention to backrefs that we found a real
10058                  * backref for.
10059                  */
10060                 if (dback->found_ref == 0)
10061                         continue;
10062
10063                 /*
10064                  * For now we only catch when the bytes don't match, not the
10065                  * bytenr.  We can easily do this at the same time, but I want
10066                  * to have a fs image to test on before we just add repair
10067                  * functionality willy-nilly so we know we won't screw up the
10068                  * repair.
10069                  */
10070
10071                 entry = find_entry(&entries, dback->disk_bytenr,
10072                                    dback->bytes);
10073                 if (!entry) {
10074                         entry = malloc(sizeof(struct extent_entry));
10075                         if (!entry) {
10076                                 ret = -ENOMEM;
10077                                 goto out;
10078                         }
10079                         memset(entry, 0, sizeof(*entry));
10080                         entry->bytenr = dback->disk_bytenr;
10081                         entry->bytes = dback->bytes;
10082                         list_add_tail(&entry->list, &entries);
10083                         nr_entries++;
10084                 }
10085
10086                 /*
10087                  * If we only have on entry we may think the entries agree when
10088                  * in reality they don't so we have to do some extra checking.
10089                  */
10090                 if (dback->disk_bytenr != rec->start ||
10091                     dback->bytes != rec->nr || back->broken)
10092                         mismatch = 1;
10093
10094                 if (back->broken) {
10095                         entry->broken++;
10096                         broken_entries++;
10097                 }
10098
10099                 entry->count++;
10100         }
10101
10102         /* Yay all the backrefs agree, carry on good sir */
10103         if (nr_entries <= 1 && !mismatch)
10104                 goto out;
10105
10106         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
10107                 "%Lu\n", rec->start);
10108
10109         /*
10110          * First we want to see if the backrefs can agree amongst themselves who
10111          * is right, so figure out which one of the entries has the highest
10112          * count.
10113          */
10114         best = find_most_right_entry(&entries);
10115
10116         /*
10117          * Ok so we may have an even split between what the backrefs think, so
10118          * this is where we use the extent ref to see what it thinks.
10119          */
10120         if (!best) {
10121                 entry = find_entry(&entries, rec->start, rec->nr);
10122                 if (!entry && (!broken_entries || !rec->found_rec)) {
10123                         fprintf(stderr, "Backrefs don't agree with each other "
10124                                 "and extent record doesn't agree with anybody,"
10125                                 " so we can't fix bytenr %Lu bytes %Lu\n",
10126                                 rec->start, rec->nr);
10127                         ret = -EINVAL;
10128                         goto out;
10129                 } else if (!entry) {
10130                         /*
10131                          * Ok our backrefs were broken, we'll assume this is the
10132                          * correct value and add an entry for this range.
10133                          */
10134                         entry = malloc(sizeof(struct extent_entry));
10135                         if (!entry) {
10136                                 ret = -ENOMEM;
10137                                 goto out;
10138                         }
10139                         memset(entry, 0, sizeof(*entry));
10140                         entry->bytenr = rec->start;
10141                         entry->bytes = rec->nr;
10142                         list_add_tail(&entry->list, &entries);
10143                         nr_entries++;
10144                 }
10145                 entry->count++;
10146                 best = find_most_right_entry(&entries);
10147                 if (!best) {
10148                         fprintf(stderr, "Backrefs and extent record evenly "
10149                                 "split on who is right, this is going to "
10150                                 "require user input to fix bytenr %Lu bytes "
10151                                 "%Lu\n", rec->start, rec->nr);
10152                         ret = -EINVAL;
10153                         goto out;
10154                 }
10155         }
10156
10157         /*
10158          * I don't think this can happen currently as we'll abort() if we catch
10159          * this case higher up, but in case somebody removes that we still can't
10160          * deal with it properly here yet, so just bail out of that's the case.
10161          */
10162         if (best->bytenr != rec->start) {
10163                 fprintf(stderr, "Extent start and backref starts don't match, "
10164                         "please use btrfs-image on this file system and send "
10165                         "it to a btrfs developer so they can make fsck fix "
10166                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
10167                         rec->start, rec->nr);
10168                 ret = -EINVAL;
10169                 goto out;
10170         }
10171
10172         /*
10173          * Ok great we all agreed on an extent record, let's go find the real
10174          * references and fix up the ones that don't match.
10175          */
10176         rbtree_postorder_for_each_entry_safe(back, tmp,
10177                                              &rec->backref_tree, node) {
10178                 if (back->full_backref || !back->is_data)
10179                         continue;
10180
10181                 dback = to_data_backref(back);
10182
10183                 /*
10184                  * Still ignoring backrefs that don't have a real ref attached
10185                  * to them.
10186                  */
10187                 if (dback->found_ref == 0)
10188                         continue;
10189
10190                 if (dback->bytes == best->bytes &&
10191                     dback->disk_bytenr == best->bytenr)
10192                         continue;
10193
10194                 ret = repair_ref(info, path, dback, best);
10195                 if (ret)
10196                         goto out;
10197         }
10198
10199         /*
10200          * Ok we messed with the actual refs, which means we need to drop our
10201          * entire cache and go back and rescan.  I know this is a huge pain and
10202          * adds a lot of extra work, but it's the only way to be safe.  Once all
10203          * the backrefs agree we may not need to do anything to the extent
10204          * record itself.
10205          */
10206         ret = -EAGAIN;
10207 out:
10208         while (!list_empty(&entries)) {
10209                 entry = list_entry(entries.next, struct extent_entry, list);
10210                 list_del_init(&entry->list);
10211                 free(entry);
10212         }
10213         return ret;
10214 }
10215
10216 static int process_duplicates(struct cache_tree *extent_cache,
10217                               struct extent_record *rec)
10218 {
10219         struct extent_record *good, *tmp;
10220         struct cache_extent *cache;
10221         int ret;
10222
10223         /*
10224          * If we found a extent record for this extent then return, or if we
10225          * have more than one duplicate we are likely going to need to delete
10226          * something.
10227          */
10228         if (rec->found_rec || rec->num_duplicates > 1)
10229                 return 0;
10230
10231         /* Shouldn't happen but just in case */
10232         BUG_ON(!rec->num_duplicates);
10233
10234         /*
10235          * So this happens if we end up with a backref that doesn't match the
10236          * actual extent entry.  So either the backref is bad or the extent
10237          * entry is bad.  Either way we want to have the extent_record actually
10238          * reflect what we found in the extent_tree, so we need to take the
10239          * duplicate out and use that as the extent_record since the only way we
10240          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
10241          */
10242         remove_cache_extent(extent_cache, &rec->cache);
10243
10244         good = to_extent_record(rec->dups.next);
10245         list_del_init(&good->list);
10246         INIT_LIST_HEAD(&good->backrefs);
10247         INIT_LIST_HEAD(&good->dups);
10248         good->cache.start = good->start;
10249         good->cache.size = good->nr;
10250         good->content_checked = 0;
10251         good->owner_ref_checked = 0;
10252         good->num_duplicates = 0;
10253         good->refs = rec->refs;
10254         list_splice_init(&rec->backrefs, &good->backrefs);
10255         while (1) {
10256                 cache = lookup_cache_extent(extent_cache, good->start,
10257                                             good->nr);
10258                 if (!cache)
10259                         break;
10260                 tmp = container_of(cache, struct extent_record, cache);
10261
10262                 /*
10263                  * If we find another overlapping extent and it's found_rec is
10264                  * set then it's a duplicate and we need to try and delete
10265                  * something.
10266                  */
10267                 if (tmp->found_rec || tmp->num_duplicates > 0) {
10268                         if (list_empty(&good->list))
10269                                 list_add_tail(&good->list,
10270                                               &duplicate_extents);
10271                         good->num_duplicates += tmp->num_duplicates + 1;
10272                         list_splice_init(&tmp->dups, &good->dups);
10273                         list_del_init(&tmp->list);
10274                         list_add_tail(&tmp->list, &good->dups);
10275                         remove_cache_extent(extent_cache, &tmp->cache);
10276                         continue;
10277                 }
10278
10279                 /*
10280                  * Ok we have another non extent item backed extent rec, so lets
10281                  * just add it to this extent and carry on like we did above.
10282                  */
10283                 good->refs += tmp->refs;
10284                 list_splice_init(&tmp->backrefs, &good->backrefs);
10285                 remove_cache_extent(extent_cache, &tmp->cache);
10286                 free(tmp);
10287         }
10288         ret = insert_cache_extent(extent_cache, &good->cache);
10289         BUG_ON(ret);
10290         free(rec);
10291         return good->num_duplicates ? 0 : 1;
10292 }
10293
10294 static int delete_duplicate_records(struct btrfs_root *root,
10295                                     struct extent_record *rec)
10296 {
10297         struct btrfs_trans_handle *trans;
10298         LIST_HEAD(delete_list);
10299         struct btrfs_path path;
10300         struct extent_record *tmp, *good, *n;
10301         int nr_del = 0;
10302         int ret = 0, err;
10303         struct btrfs_key key;
10304
10305         btrfs_init_path(&path);
10306
10307         good = rec;
10308         /* Find the record that covers all of the duplicates. */
10309         list_for_each_entry(tmp, &rec->dups, list) {
10310                 if (good->start < tmp->start)
10311                         continue;
10312                 if (good->nr > tmp->nr)
10313                         continue;
10314
10315                 if (tmp->start + tmp->nr < good->start + good->nr) {
10316                         fprintf(stderr, "Ok we have overlapping extents that "
10317                                 "aren't completely covered by each other, this "
10318                                 "is going to require more careful thought.  "
10319                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
10320                                 tmp->start, tmp->nr, good->start, good->nr);
10321                         abort();
10322                 }
10323                 good = tmp;
10324         }
10325
10326         if (good != rec)
10327                 list_add_tail(&rec->list, &delete_list);
10328
10329         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10330                 if (tmp == good)
10331                         continue;
10332                 list_move_tail(&tmp->list, &delete_list);
10333         }
10334
10335         root = root->fs_info->extent_root;
10336         trans = btrfs_start_transaction(root, 1);
10337         if (IS_ERR(trans)) {
10338                 ret = PTR_ERR(trans);
10339                 goto out;
10340         }
10341
10342         list_for_each_entry(tmp, &delete_list, list) {
10343                 if (tmp->found_rec == 0)
10344                         continue;
10345                 key.objectid = tmp->start;
10346                 key.type = BTRFS_EXTENT_ITEM_KEY;
10347                 key.offset = tmp->nr;
10348
10349                 /* Shouldn't happen but just in case */
10350                 if (tmp->metadata) {
10351                         fprintf(stderr, "Well this shouldn't happen, extent "
10352                                 "record overlaps but is metadata? "
10353                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10354                         abort();
10355                 }
10356
10357                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10358                 if (ret) {
10359                         if (ret > 0)
10360                                 ret = -EINVAL;
10361                         break;
10362                 }
10363                 ret = btrfs_del_item(trans, root, &path);
10364                 if (ret)
10365                         break;
10366                 btrfs_release_path(&path);
10367                 nr_del++;
10368         }
10369         err = btrfs_commit_transaction(trans, root);
10370         if (err && !ret)
10371                 ret = err;
10372 out:
10373         while (!list_empty(&delete_list)) {
10374                 tmp = to_extent_record(delete_list.next);
10375                 list_del_init(&tmp->list);
10376                 if (tmp == rec)
10377                         continue;
10378                 free(tmp);
10379         }
10380
10381         while (!list_empty(&rec->dups)) {
10382                 tmp = to_extent_record(rec->dups.next);
10383                 list_del_init(&tmp->list);
10384                 free(tmp);
10385         }
10386
10387         btrfs_release_path(&path);
10388
10389         if (!ret && !nr_del)
10390                 rec->num_duplicates = 0;
10391
10392         return ret ? ret : nr_del;
10393 }
10394
10395 static int find_possible_backrefs(struct btrfs_fs_info *info,
10396                                   struct btrfs_path *path,
10397                                   struct cache_tree *extent_cache,
10398                                   struct extent_record *rec)
10399 {
10400         struct btrfs_root *root;
10401         struct extent_backref *back, *tmp;
10402         struct data_backref *dback;
10403         struct cache_extent *cache;
10404         struct btrfs_file_extent_item *fi;
10405         struct btrfs_key key;
10406         u64 bytenr, bytes;
10407         int ret;
10408
10409         rbtree_postorder_for_each_entry_safe(back, tmp,
10410                                              &rec->backref_tree, node) {
10411                 /* Don't care about full backrefs (poor unloved backrefs) */
10412                 if (back->full_backref || !back->is_data)
10413                         continue;
10414
10415                 dback = to_data_backref(back);
10416
10417                 /* We found this one, we don't need to do a lookup */
10418                 if (dback->found_ref)
10419                         continue;
10420
10421                 key.objectid = dback->root;
10422                 key.type = BTRFS_ROOT_ITEM_KEY;
10423                 key.offset = (u64)-1;
10424
10425                 root = btrfs_read_fs_root(info, &key);
10426
10427                 /* No root, definitely a bad ref, skip */
10428                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10429                         continue;
10430                 /* Other err, exit */
10431                 if (IS_ERR(root))
10432                         return PTR_ERR(root);
10433
10434                 key.objectid = dback->owner;
10435                 key.type = BTRFS_EXTENT_DATA_KEY;
10436                 key.offset = dback->offset;
10437                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10438                 if (ret) {
10439                         btrfs_release_path(path);
10440                         if (ret < 0)
10441                                 return ret;
10442                         /* Didn't find it, we can carry on */
10443                         ret = 0;
10444                         continue;
10445                 }
10446
10447                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10448                                     struct btrfs_file_extent_item);
10449                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10450                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10451                 btrfs_release_path(path);
10452                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10453                 if (cache) {
10454                         struct extent_record *tmp;
10455                         tmp = container_of(cache, struct extent_record, cache);
10456
10457                         /*
10458                          * If we found an extent record for the bytenr for this
10459                          * particular backref then we can't add it to our
10460                          * current extent record.  We only want to add backrefs
10461                          * that don't have a corresponding extent item in the
10462                          * extent tree since they likely belong to this record
10463                          * and we need to fix it if it doesn't match bytenrs.
10464                          */
10465                         if  (tmp->found_rec)
10466                                 continue;
10467                 }
10468
10469                 dback->found_ref += 1;
10470                 dback->disk_bytenr = bytenr;
10471                 dback->bytes = bytes;
10472
10473                 /*
10474                  * Set this so the verify backref code knows not to trust the
10475                  * values in this backref.
10476                  */
10477                 back->broken = 1;
10478         }
10479
10480         return 0;
10481 }
10482
10483 /*
10484  * Record orphan data ref into corresponding root.
10485  *
10486  * Return 0 if the extent item contains data ref and recorded.
10487  * Return 1 if the extent item contains no useful data ref
10488  *   On that case, it may contains only shared_dataref or metadata backref
10489  *   or the file extent exists(this should be handled by the extent bytenr
10490  *   recovery routine)
10491  * Return <0 if something goes wrong.
10492  */
10493 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10494                                       struct extent_record *rec)
10495 {
10496         struct btrfs_key key;
10497         struct btrfs_root *dest_root;
10498         struct extent_backref *back, *tmp;
10499         struct data_backref *dback;
10500         struct orphan_data_extent *orphan;
10501         struct btrfs_path path;
10502         int recorded_data_ref = 0;
10503         int ret = 0;
10504
10505         if (rec->metadata)
10506                 return 1;
10507         btrfs_init_path(&path);
10508         rbtree_postorder_for_each_entry_safe(back, tmp,
10509                                              &rec->backref_tree, node) {
10510                 if (back->full_backref || !back->is_data ||
10511                     !back->found_extent_tree)
10512                         continue;
10513                 dback = to_data_backref(back);
10514                 if (dback->found_ref)
10515                         continue;
10516                 key.objectid = dback->root;
10517                 key.type = BTRFS_ROOT_ITEM_KEY;
10518                 key.offset = (u64)-1;
10519
10520                 dest_root = btrfs_read_fs_root(fs_info, &key);
10521
10522                 /* For non-exist root we just skip it */
10523                 if (IS_ERR(dest_root) || !dest_root)
10524                         continue;
10525
10526                 key.objectid = dback->owner;
10527                 key.type = BTRFS_EXTENT_DATA_KEY;
10528                 key.offset = dback->offset;
10529
10530                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10531                 btrfs_release_path(&path);
10532                 /*
10533                  * For ret < 0, it's OK since the fs-tree may be corrupted,
10534                  * we need to record it for inode/file extent rebuild.
10535                  * For ret > 0, we record it only for file extent rebuild.
10536                  * For ret == 0, the file extent exists but only bytenr
10537                  * mismatch, let the original bytenr fix routine to handle,
10538                  * don't record it.
10539                  */
10540                 if (ret == 0)
10541                         continue;
10542                 ret = 0;
10543                 orphan = malloc(sizeof(*orphan));
10544                 if (!orphan) {
10545                         ret = -ENOMEM;
10546                         goto out;
10547                 }
10548                 INIT_LIST_HEAD(&orphan->list);
10549                 orphan->root = dback->root;
10550                 orphan->objectid = dback->owner;
10551                 orphan->offset = dback->offset;
10552                 orphan->disk_bytenr = rec->cache.start;
10553                 orphan->disk_len = rec->cache.size;
10554                 list_add(&dest_root->orphan_data_extents, &orphan->list);
10555                 recorded_data_ref = 1;
10556         }
10557 out:
10558         btrfs_release_path(&path);
10559         if (!ret)
10560                 return !recorded_data_ref;
10561         else
10562                 return ret;
10563 }
10564
10565 /*
10566  * when an incorrect extent item is found, this will delete
10567  * all of the existing entries for it and recreate them
10568  * based on what the tree scan found.
10569  */
10570 static int fixup_extent_refs(struct btrfs_fs_info *info,
10571                              struct cache_tree *extent_cache,
10572                              struct extent_record *rec)
10573 {
10574         struct btrfs_trans_handle *trans = NULL;
10575         int ret;
10576         struct btrfs_path path;
10577         struct cache_extent *cache;
10578         struct extent_backref *back, *tmp;
10579         int allocated = 0;
10580         u64 flags = 0;
10581
10582         if (rec->flag_block_full_backref)
10583                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10584
10585         btrfs_init_path(&path);
10586         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10587                 /*
10588                  * Sometimes the backrefs themselves are so broken they don't
10589                  * get attached to any meaningful rec, so first go back and
10590                  * check any of our backrefs that we couldn't find and throw
10591                  * them into the list if we find the backref so that
10592                  * verify_backrefs can figure out what to do.
10593                  */
10594                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10595                 if (ret < 0)
10596                         goto out;
10597         }
10598
10599         /* step one, make sure all of the backrefs agree */
10600         ret = verify_backrefs(info, &path, rec);
10601         if (ret < 0)
10602                 goto out;
10603
10604         trans = btrfs_start_transaction(info->extent_root, 1);
10605         if (IS_ERR(trans)) {
10606                 ret = PTR_ERR(trans);
10607                 goto out;
10608         }
10609
10610         /* step two, delete all the existing records */
10611         ret = delete_extent_records(trans, info->extent_root, &path,
10612                                     rec->start);
10613
10614         if (ret < 0)
10615                 goto out;
10616
10617         /* was this block corrupt?  If so, don't add references to it */
10618         cache = lookup_cache_extent(info->corrupt_blocks,
10619                                     rec->start, rec->max_size);
10620         if (cache) {
10621                 ret = 0;
10622                 goto out;
10623         }
10624
10625         /* step three, recreate all the refs we did find */
10626         rbtree_postorder_for_each_entry_safe(back, tmp,
10627                                              &rec->backref_tree, node) {
10628                 /*
10629                  * if we didn't find any references, don't create a
10630                  * new extent record
10631                  */
10632                 if (!back->found_ref)
10633                         continue;
10634
10635                 rec->bad_full_backref = 0;
10636                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10637                 allocated = 1;
10638
10639                 if (ret)
10640                         goto out;
10641         }
10642 out:
10643         if (trans) {
10644                 int err = btrfs_commit_transaction(trans, info->extent_root);
10645                 if (!ret)
10646                         ret = err;
10647         }
10648
10649         if (!ret)
10650                 fprintf(stderr, "Repaired extent references for %llu\n",
10651                                 (unsigned long long)rec->start);
10652
10653         btrfs_release_path(&path);
10654         return ret;
10655 }
10656
10657 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10658                               struct extent_record *rec)
10659 {
10660         struct btrfs_trans_handle *trans;
10661         struct btrfs_root *root = fs_info->extent_root;
10662         struct btrfs_path path;
10663         struct btrfs_extent_item *ei;
10664         struct btrfs_key key;
10665         u64 flags;
10666         int ret = 0;
10667
10668         key.objectid = rec->start;
10669         if (rec->metadata) {
10670                 key.type = BTRFS_METADATA_ITEM_KEY;
10671                 key.offset = rec->info_level;
10672         } else {
10673                 key.type = BTRFS_EXTENT_ITEM_KEY;
10674                 key.offset = rec->max_size;
10675         }
10676
10677         trans = btrfs_start_transaction(root, 0);
10678         if (IS_ERR(trans))
10679                 return PTR_ERR(trans);
10680
10681         btrfs_init_path(&path);
10682         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10683         if (ret < 0) {
10684                 btrfs_release_path(&path);
10685                 btrfs_commit_transaction(trans, root);
10686                 return ret;
10687         } else if (ret) {
10688                 fprintf(stderr, "Didn't find extent for %llu\n",
10689                         (unsigned long long)rec->start);
10690                 btrfs_release_path(&path);
10691                 btrfs_commit_transaction(trans, root);
10692                 return -ENOENT;
10693         }
10694
10695         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10696                             struct btrfs_extent_item);
10697         flags = btrfs_extent_flags(path.nodes[0], ei);
10698         if (rec->flag_block_full_backref) {
10699                 fprintf(stderr, "setting full backref on %llu\n",
10700                         (unsigned long long)key.objectid);
10701                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10702         } else {
10703                 fprintf(stderr, "clearing full backref on %llu\n",
10704                         (unsigned long long)key.objectid);
10705                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10706         }
10707         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10708         btrfs_mark_buffer_dirty(path.nodes[0]);
10709         btrfs_release_path(&path);
10710         ret = btrfs_commit_transaction(trans, root);
10711         if (!ret)
10712                 fprintf(stderr, "Repaired extent flags for %llu\n",
10713                                 (unsigned long long)rec->start);
10714
10715         return ret;
10716 }
10717
10718 /* right now we only prune from the extent allocation tree */
10719 static int prune_one_block(struct btrfs_trans_handle *trans,
10720                            struct btrfs_fs_info *info,
10721                            struct btrfs_corrupt_block *corrupt)
10722 {
10723         int ret;
10724         struct btrfs_path path;
10725         struct extent_buffer *eb;
10726         u64 found;
10727         int slot;
10728         int nritems;
10729         int level = corrupt->level + 1;
10730
10731         btrfs_init_path(&path);
10732 again:
10733         /* we want to stop at the parent to our busted block */
10734         path.lowest_level = level;
10735
10736         ret = btrfs_search_slot(trans, info->extent_root,
10737                                 &corrupt->key, &path, -1, 1);
10738
10739         if (ret < 0)
10740                 goto out;
10741
10742         eb = path.nodes[level];
10743         if (!eb) {
10744                 ret = -ENOENT;
10745                 goto out;
10746         }
10747
10748         /*
10749          * hopefully the search gave us the block we want to prune,
10750          * lets try that first
10751          */
10752         slot = path.slots[level];
10753         found =  btrfs_node_blockptr(eb, slot);
10754         if (found == corrupt->cache.start)
10755                 goto del_ptr;
10756
10757         nritems = btrfs_header_nritems(eb);
10758
10759         /* the search failed, lets scan this node and hope we find it */
10760         for (slot = 0; slot < nritems; slot++) {
10761                 found =  btrfs_node_blockptr(eb, slot);
10762                 if (found == corrupt->cache.start)
10763                         goto del_ptr;
10764         }
10765         /*
10766          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10767          * to this block
10768          */
10769         if (eb == info->extent_root->node) {
10770                 ret = -ENOENT;
10771                 goto out;
10772         } else {
10773                 level++;
10774                 btrfs_release_path(&path);
10775                 goto again;
10776         }
10777
10778 del_ptr:
10779         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10780         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10781
10782 out:
10783         btrfs_release_path(&path);
10784         return ret;
10785 }
10786
10787 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10788 {
10789         struct btrfs_trans_handle *trans = NULL;
10790         struct cache_extent *cache;
10791         struct btrfs_corrupt_block *corrupt;
10792
10793         while (1) {
10794                 cache = search_cache_extent(info->corrupt_blocks, 0);
10795                 if (!cache)
10796                         break;
10797                 if (!trans) {
10798                         trans = btrfs_start_transaction(info->extent_root, 1);
10799                         if (IS_ERR(trans))
10800                                 return PTR_ERR(trans);
10801                 }
10802                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10803                 prune_one_block(trans, info, corrupt);
10804                 remove_cache_extent(info->corrupt_blocks, cache);
10805         }
10806         if (trans)
10807                 return btrfs_commit_transaction(trans, info->extent_root);
10808         return 0;
10809 }
10810
10811 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10812 {
10813         struct btrfs_block_group_cache *cache;
10814         u64 start, end;
10815         int ret;
10816
10817         while (1) {
10818                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10819                                             &start, &end, EXTENT_DIRTY);
10820                 if (ret)
10821                         break;
10822                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10823         }
10824
10825         start = 0;
10826         while (1) {
10827                 cache = btrfs_lookup_first_block_group(fs_info, start);
10828                 if (!cache)
10829                         break;
10830                 if (cache->cached)
10831                         cache->cached = 0;
10832                 start = cache->key.objectid + cache->key.offset;
10833         }
10834 }
10835
10836 static int check_extent_refs(struct btrfs_root *root,
10837                              struct cache_tree *extent_cache)
10838 {
10839         struct extent_record *rec;
10840         struct cache_extent *cache;
10841         int ret = 0;
10842         int had_dups = 0;
10843         int err = 0;
10844
10845         if (repair) {
10846                 /*
10847                  * if we're doing a repair, we have to make sure
10848                  * we don't allocate from the problem extents.
10849                  * In the worst case, this will be all the
10850                  * extents in the FS
10851                  */
10852                 cache = search_cache_extent(extent_cache, 0);
10853                 while(cache) {
10854                         rec = container_of(cache, struct extent_record, cache);
10855                         set_extent_dirty(root->fs_info->excluded_extents,
10856                                          rec->start,
10857                                          rec->start + rec->max_size - 1);
10858                         cache = next_cache_extent(cache);
10859                 }
10860
10861                 /* pin down all the corrupted blocks too */
10862                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10863                 while(cache) {
10864                         set_extent_dirty(root->fs_info->excluded_extents,
10865                                          cache->start,
10866                                          cache->start + cache->size - 1);
10867                         cache = next_cache_extent(cache);
10868                 }
10869                 prune_corrupt_blocks(root->fs_info);
10870                 reset_cached_block_groups(root->fs_info);
10871         }
10872
10873         reset_cached_block_groups(root->fs_info);
10874
10875         /*
10876          * We need to delete any duplicate entries we find first otherwise we
10877          * could mess up the extent tree when we have backrefs that actually
10878          * belong to a different extent item and not the weird duplicate one.
10879          */
10880         while (repair && !list_empty(&duplicate_extents)) {
10881                 rec = to_extent_record(duplicate_extents.next);
10882                 list_del_init(&rec->list);
10883
10884                 /* Sometimes we can find a backref before we find an actual
10885                  * extent, so we need to process it a little bit to see if there
10886                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10887                  * if this is a backref screwup.  If we need to delete stuff
10888                  * process_duplicates() will return 0, otherwise it will return
10889                  * 1 and we
10890                  */
10891                 if (process_duplicates(extent_cache, rec))
10892                         continue;
10893                 ret = delete_duplicate_records(root, rec);
10894                 if (ret < 0)
10895                         return ret;
10896                 /*
10897                  * delete_duplicate_records will return the number of entries
10898                  * deleted, so if it's greater than 0 then we know we actually
10899                  * did something and we need to remove.
10900                  */
10901                 if (ret)
10902                         had_dups = 1;
10903         }
10904
10905         if (had_dups)
10906                 return -EAGAIN;
10907
10908         while(1) {
10909                 int cur_err = 0;
10910                 int fix = 0;
10911
10912                 cache = search_cache_extent(extent_cache, 0);
10913                 if (!cache)
10914                         break;
10915                 rec = container_of(cache, struct extent_record, cache);
10916                 if (rec->num_duplicates) {
10917                         fprintf(stderr, "extent item %llu has multiple extent "
10918                                 "items\n", (unsigned long long)rec->start);
10919                         cur_err = 1;
10920                 }
10921
10922                 if (rec->refs != rec->extent_item_refs) {
10923                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10924                                 (unsigned long long)rec->start,
10925                                 (unsigned long long)rec->nr);
10926                         fprintf(stderr, "extent item %llu, found %llu\n",
10927                                 (unsigned long long)rec->extent_item_refs,
10928                                 (unsigned long long)rec->refs);
10929                         ret = record_orphan_data_extents(root->fs_info, rec);
10930                         if (ret < 0)
10931                                 goto repair_abort;
10932                         fix = ret;
10933                         cur_err = 1;
10934                 }
10935                 if (all_backpointers_checked(rec, 1)) {
10936                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10937                                 (unsigned long long)rec->start,
10938                                 (unsigned long long)rec->nr);
10939                         fix = 1;
10940                         cur_err = 1;
10941                 }
10942                 if (!rec->owner_ref_checked) {
10943                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10944                                 (unsigned long long)rec->start,
10945                                 (unsigned long long)rec->nr);
10946                         fix = 1;
10947                         cur_err = 1;
10948                 }
10949
10950                 if (repair && fix) {
10951                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10952                         if (ret)
10953                                 goto repair_abort;
10954                 }
10955
10956
10957                 if (rec->bad_full_backref) {
10958                         fprintf(stderr, "bad full backref, on [%llu]\n",
10959                                 (unsigned long long)rec->start);
10960                         if (repair) {
10961                                 ret = fixup_extent_flags(root->fs_info, rec);
10962                                 if (ret)
10963                                         goto repair_abort;
10964                                 fix = 1;
10965                         }
10966                         cur_err = 1;
10967                 }
10968                 /*
10969                  * Although it's not a extent ref's problem, we reuse this
10970                  * routine for error reporting.
10971                  * No repair function yet.
10972                  */
10973                 if (rec->crossing_stripes) {
10974                         fprintf(stderr,
10975                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10976                                 rec->start, rec->start + rec->max_size);
10977                         cur_err = 1;
10978                 }
10979
10980                 if (rec->wrong_chunk_type) {
10981                         fprintf(stderr,
10982                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10983                                 rec->start, rec->start + rec->max_size);
10984                         cur_err = 1;
10985                 }
10986
10987                 err = cur_err;
10988                 remove_cache_extent(extent_cache, cache);
10989                 free_all_extent_backrefs(rec);
10990                 if (!init_extent_tree && repair && (!cur_err || fix))
10991                         clear_extent_dirty(root->fs_info->excluded_extents,
10992                                            rec->start,
10993                                            rec->start + rec->max_size - 1);
10994                 free(rec);
10995         }
10996 repair_abort:
10997         if (repair) {
10998                 if (ret && ret != -EAGAIN) {
10999                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
11000                         exit(1);
11001                 } else if (!ret) {
11002                         struct btrfs_trans_handle *trans;
11003
11004                         root = root->fs_info->extent_root;
11005                         trans = btrfs_start_transaction(root, 1);
11006                         if (IS_ERR(trans)) {
11007                                 ret = PTR_ERR(trans);
11008                                 goto repair_abort;
11009                         }
11010
11011                         ret = btrfs_fix_block_accounting(trans, root);
11012                         if (ret)
11013                                 goto repair_abort;
11014                         ret = btrfs_commit_transaction(trans, root);
11015                         if (ret)
11016                                 goto repair_abort;
11017                 }
11018                 return ret;
11019         }
11020
11021         if (err)
11022                 err = -EIO;
11023         return err;
11024 }
11025
11026 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
11027 {
11028         u64 stripe_size;
11029
11030         if (type & BTRFS_BLOCK_GROUP_RAID0) {
11031                 stripe_size = length;
11032                 stripe_size /= num_stripes;
11033         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
11034                 stripe_size = length * 2;
11035                 stripe_size /= num_stripes;
11036         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
11037                 stripe_size = length;
11038                 stripe_size /= (num_stripes - 1);
11039         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
11040                 stripe_size = length;
11041                 stripe_size /= (num_stripes - 2);
11042         } else {
11043                 stripe_size = length;
11044         }
11045         return stripe_size;
11046 }
11047
11048 /*
11049  * Check the chunk with its block group/dev list ref:
11050  * Return 0 if all refs seems valid.
11051  * Return 1 if part of refs seems valid, need later check for rebuild ref
11052  * like missing block group and needs to search extent tree to rebuild them.
11053  * Return -1 if essential refs are missing and unable to rebuild.
11054  */
11055 static int check_chunk_refs(struct chunk_record *chunk_rec,
11056                             struct block_group_tree *block_group_cache,
11057                             struct device_extent_tree *dev_extent_cache,
11058                             int silent)
11059 {
11060         struct cache_extent *block_group_item;
11061         struct block_group_record *block_group_rec;
11062         struct cache_extent *dev_extent_item;
11063         struct device_extent_record *dev_extent_rec;
11064         u64 devid;
11065         u64 offset;
11066         u64 length;
11067         int metadump_v2 = 0;
11068         int i;
11069         int ret = 0;
11070
11071         block_group_item = lookup_cache_extent(&block_group_cache->tree,
11072                                                chunk_rec->offset,
11073                                                chunk_rec->length);
11074         if (block_group_item) {
11075                 block_group_rec = container_of(block_group_item,
11076                                                struct block_group_record,
11077                                                cache);
11078                 if (chunk_rec->length != block_group_rec->offset ||
11079                     chunk_rec->offset != block_group_rec->objectid ||
11080                     (!metadump_v2 &&
11081                      chunk_rec->type_flags != block_group_rec->flags)) {
11082                         if (!silent)
11083                                 fprintf(stderr,
11084                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
11085                                         chunk_rec->objectid,
11086                                         chunk_rec->type,
11087                                         chunk_rec->offset,
11088                                         chunk_rec->length,
11089                                         chunk_rec->offset,
11090                                         chunk_rec->type_flags,
11091                                         block_group_rec->objectid,
11092                                         block_group_rec->type,
11093                                         block_group_rec->offset,
11094                                         block_group_rec->offset,
11095                                         block_group_rec->objectid,
11096                                         block_group_rec->flags);
11097                         ret = -1;
11098                 } else {
11099                         list_del_init(&block_group_rec->list);
11100                         chunk_rec->bg_rec = block_group_rec;
11101                 }
11102         } else {
11103                 if (!silent)
11104                         fprintf(stderr,
11105                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
11106                                 chunk_rec->objectid,
11107                                 chunk_rec->type,
11108                                 chunk_rec->offset,
11109                                 chunk_rec->length,
11110                                 chunk_rec->offset,
11111                                 chunk_rec->type_flags);
11112                 ret = 1;
11113         }
11114
11115         if (metadump_v2)
11116                 return ret;
11117
11118         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
11119                                     chunk_rec->num_stripes);
11120         for (i = 0; i < chunk_rec->num_stripes; ++i) {
11121                 devid = chunk_rec->stripes[i].devid;
11122                 offset = chunk_rec->stripes[i].offset;
11123                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
11124                                                        devid, offset, length);
11125                 if (dev_extent_item) {
11126                         dev_extent_rec = container_of(dev_extent_item,
11127                                                 struct device_extent_record,
11128                                                 cache);
11129                         if (dev_extent_rec->objectid != devid ||
11130                             dev_extent_rec->offset != offset ||
11131                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
11132                             dev_extent_rec->length != length) {
11133                                 if (!silent)
11134                                         fprintf(stderr,
11135                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
11136                                                 chunk_rec->objectid,
11137                                                 chunk_rec->type,
11138                                                 chunk_rec->offset,
11139                                                 chunk_rec->stripes[i].devid,
11140                                                 chunk_rec->stripes[i].offset,
11141                                                 dev_extent_rec->objectid,
11142                                                 dev_extent_rec->offset,
11143                                                 dev_extent_rec->length);
11144                                 ret = -1;
11145                         } else {
11146                                 list_move(&dev_extent_rec->chunk_list,
11147                                           &chunk_rec->dextents);
11148                         }
11149                 } else {
11150                         if (!silent)
11151                                 fprintf(stderr,
11152                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
11153                                         chunk_rec->objectid,
11154                                         chunk_rec->type,
11155                                         chunk_rec->offset,
11156                                         chunk_rec->stripes[i].devid,
11157                                         chunk_rec->stripes[i].offset);
11158                         ret = -1;
11159                 }
11160         }
11161         return ret;
11162 }
11163
11164 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
11165 int check_chunks(struct cache_tree *chunk_cache,
11166                  struct block_group_tree *block_group_cache,
11167                  struct device_extent_tree *dev_extent_cache,
11168                  struct list_head *good, struct list_head *bad,
11169                  struct list_head *rebuild, int silent)
11170 {
11171         struct cache_extent *chunk_item;
11172         struct chunk_record *chunk_rec;
11173         struct block_group_record *bg_rec;
11174         struct device_extent_record *dext_rec;
11175         int err;
11176         int ret = 0;
11177
11178         chunk_item = first_cache_extent(chunk_cache);
11179         while (chunk_item) {
11180                 chunk_rec = container_of(chunk_item, struct chunk_record,
11181                                          cache);
11182                 err = check_chunk_refs(chunk_rec, block_group_cache,
11183                                        dev_extent_cache, silent);
11184                 if (err < 0)
11185                         ret = err;
11186                 if (err == 0 && good)
11187                         list_add_tail(&chunk_rec->list, good);
11188                 if (err > 0 && rebuild)
11189                         list_add_tail(&chunk_rec->list, rebuild);
11190                 if (err < 0 && bad)
11191                         list_add_tail(&chunk_rec->list, bad);
11192                 chunk_item = next_cache_extent(chunk_item);
11193         }
11194
11195         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
11196                 if (!silent)
11197                         fprintf(stderr,
11198                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
11199                                 bg_rec->objectid,
11200                                 bg_rec->offset,
11201                                 bg_rec->flags);
11202                 if (!ret)
11203                         ret = 1;
11204         }
11205
11206         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
11207                             chunk_list) {
11208                 if (!silent)
11209                         fprintf(stderr,
11210                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
11211                                 dext_rec->objectid,
11212                                 dext_rec->offset,
11213                                 dext_rec->length);
11214                 if (!ret)
11215                         ret = 1;
11216         }
11217         return ret;
11218 }
11219
11220
11221 static int check_device_used(struct device_record *dev_rec,
11222                              struct device_extent_tree *dext_cache)
11223 {
11224         struct cache_extent *cache;
11225         struct device_extent_record *dev_extent_rec;
11226         u64 total_byte = 0;
11227
11228         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
11229         while (cache) {
11230                 dev_extent_rec = container_of(cache,
11231                                               struct device_extent_record,
11232                                               cache);
11233                 if (dev_extent_rec->objectid != dev_rec->devid)
11234                         break;
11235
11236                 list_del_init(&dev_extent_rec->device_list);
11237                 total_byte += dev_extent_rec->length;
11238                 cache = next_cache_extent(cache);
11239         }
11240
11241         if (total_byte != dev_rec->byte_used) {
11242                 fprintf(stderr,
11243                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
11244                         total_byte, dev_rec->byte_used, dev_rec->objectid,
11245                         dev_rec->type, dev_rec->offset);
11246                 return -1;
11247         } else {
11248                 return 0;
11249         }
11250 }
11251
11252 /*
11253  * Extra (optional) check for dev_item size to report possbile problem on a new
11254  * kernel.
11255  */
11256 static void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
11257 {
11258         if (!IS_ALIGNED(total_bytes, sectorsize)) {
11259                 warning(
11260 "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
11261                         devid, total_bytes, sectorsize);
11262                 warning(
11263 "this is OK for older kernel, but may cause kernel warning for newer kernels");
11264                 warning("this can be fixed by 'btrfs rescue fix-device-size'");
11265         }
11266 }
11267
11268 /*
11269  * Unlike device size alignment check above, some super total_bytes check
11270  * failure can lead to mount failure for newer kernel.
11271  *
11272  * So this function will return the error for a fatal super total_bytes problem.
11273  */
11274 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
11275 {
11276         struct btrfs_device *dev;
11277         struct list_head *dev_list = &fs_info->fs_devices->devices;
11278         u64 total_bytes = 0;
11279         u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
11280
11281         list_for_each_entry(dev, dev_list, dev_list)
11282                 total_bytes += dev->total_bytes;
11283
11284         /* Important check, which can cause unmountable fs */
11285         if (super_bytes < total_bytes) {
11286                 error("super total bytes %llu smaller than real device(s) size %llu",
11287                         super_bytes, total_bytes);
11288                 error("mounting this fs may fail for newer kernels");
11289                 error("this can be fixed by 'btrfs rescue fix-device-size'");
11290                 return false;
11291         }
11292
11293         /*
11294          * Optional check, just to make everything aligned and match with each
11295          * other.
11296          *
11297          * For a btrfs-image restored fs, we don't need to check it anyway.
11298          */
11299         if (btrfs_super_flags(fs_info->super_copy) &
11300             (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
11301                 return true;
11302         if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
11303             !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
11304             super_bytes != total_bytes) {
11305                 warning("minor unaligned/mismatch device size detected");
11306                 warning(
11307                 "recommended to use 'btrfs rescue fix-device-size' to fix it");
11308         }
11309         return true;
11310 }
11311
11312 /* check btrfs_dev_item -> btrfs_dev_extent */
11313 static int check_devices(struct rb_root *dev_cache,
11314                          struct device_extent_tree *dev_extent_cache)
11315 {
11316         struct rb_node *dev_node;
11317         struct device_record *dev_rec;
11318         struct device_extent_record *dext_rec;
11319         int err;
11320         int ret = 0;
11321
11322         dev_node = rb_first(dev_cache);
11323         while (dev_node) {
11324                 dev_rec = container_of(dev_node, struct device_record, node);
11325                 err = check_device_used(dev_rec, dev_extent_cache);
11326                 if (err)
11327                         ret = err;
11328
11329                 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
11330                                          global_info->sectorsize);
11331                 dev_node = rb_next(dev_node);
11332         }
11333         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11334                             device_list) {
11335                 fprintf(stderr,
11336                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11337                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
11338                 if (!ret)
11339                         ret = 1;
11340         }
11341         return ret;
11342 }
11343
11344 static int add_root_item_to_list(struct list_head *head,
11345                                   u64 objectid, u64 bytenr, u64 last_snapshot,
11346                                   u8 level, u8 drop_level,
11347                                   struct btrfs_key *drop_key)
11348 {
11349
11350         struct root_item_record *ri_rec;
11351         ri_rec = malloc(sizeof(*ri_rec));
11352         if (!ri_rec)
11353                 return -ENOMEM;
11354         ri_rec->bytenr = bytenr;
11355         ri_rec->objectid = objectid;
11356         ri_rec->level = level;
11357         ri_rec->drop_level = drop_level;
11358         ri_rec->last_snapshot = last_snapshot;
11359         if (drop_key)
11360                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11361         list_add_tail(&ri_rec->list, head);
11362
11363         return 0;
11364 }
11365
11366 static void free_root_item_list(struct list_head *list)
11367 {
11368         struct root_item_record *ri_rec;
11369
11370         while (!list_empty(list)) {
11371                 ri_rec = list_first_entry(list, struct root_item_record,
11372                                           list);
11373                 list_del_init(&ri_rec->list);
11374                 free(ri_rec);
11375         }
11376 }
11377
11378 static int deal_root_from_list(struct list_head *list,
11379                                struct btrfs_root *root,
11380                                struct block_info *bits,
11381                                int bits_nr,
11382                                struct cache_tree *pending,
11383                                struct cache_tree *seen,
11384                                struct cache_tree *reada,
11385                                struct cache_tree *nodes,
11386                                struct cache_tree *extent_cache,
11387                                struct cache_tree *chunk_cache,
11388                                struct rb_root *dev_cache,
11389                                struct block_group_tree *block_group_cache,
11390                                struct device_extent_tree *dev_extent_cache)
11391 {
11392         int ret = 0;
11393         u64 last;
11394
11395         while (!list_empty(list)) {
11396                 struct root_item_record *rec;
11397                 struct extent_buffer *buf;
11398                 rec = list_entry(list->next,
11399                                  struct root_item_record, list);
11400                 last = 0;
11401                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11402                 if (!extent_buffer_uptodate(buf)) {
11403                         free_extent_buffer(buf);
11404                         ret = -EIO;
11405                         break;
11406                 }
11407                 ret = add_root_to_pending(buf, extent_cache, pending,
11408                                     seen, nodes, rec->objectid);
11409                 if (ret < 0)
11410                         break;
11411                 /*
11412                  * To rebuild extent tree, we need deal with snapshot
11413                  * one by one, otherwise we deal with node firstly which
11414                  * can maximize readahead.
11415                  */
11416                 while (1) {
11417                         ret = run_next_block(root, bits, bits_nr, &last,
11418                                              pending, seen, reada, nodes,
11419                                              extent_cache, chunk_cache,
11420                                              dev_cache, block_group_cache,
11421                                              dev_extent_cache, rec);
11422                         if (ret != 0)
11423                                 break;
11424                 }
11425                 free_extent_buffer(buf);
11426                 list_del(&rec->list);
11427                 free(rec);
11428                 if (ret < 0)
11429                         break;
11430         }
11431         while (ret >= 0) {
11432                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11433                                      reada, nodes, extent_cache, chunk_cache,
11434                                      dev_cache, block_group_cache,
11435                                      dev_extent_cache, NULL);
11436                 if (ret != 0) {
11437                         if (ret > 0)
11438                                 ret = 0;
11439                         break;
11440                 }
11441         }
11442         return ret;
11443 }
11444
11445 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11446 {
11447         struct rb_root dev_cache;
11448         struct cache_tree chunk_cache;
11449         struct block_group_tree block_group_cache;
11450         struct device_extent_tree dev_extent_cache;
11451         struct cache_tree extent_cache;
11452         struct cache_tree seen;
11453         struct cache_tree pending;
11454         struct cache_tree reada;
11455         struct cache_tree nodes;
11456         struct extent_io_tree excluded_extents;
11457         struct cache_tree corrupt_blocks;
11458         struct btrfs_path path;
11459         struct btrfs_key key;
11460         struct btrfs_key found_key;
11461         int ret, err = 0;
11462         struct block_info *bits;
11463         int bits_nr;
11464         struct extent_buffer *leaf;
11465         int slot;
11466         struct btrfs_root_item ri;
11467         struct list_head dropping_trees;
11468         struct list_head normal_trees;
11469         struct btrfs_root *root1;
11470         struct btrfs_root *root;
11471         u64 objectid;
11472         u8 level;
11473
11474         root = fs_info->fs_root;
11475         dev_cache = RB_ROOT;
11476         cache_tree_init(&chunk_cache);
11477         block_group_tree_init(&block_group_cache);
11478         device_extent_tree_init(&dev_extent_cache);
11479
11480         cache_tree_init(&extent_cache);
11481         cache_tree_init(&seen);
11482         cache_tree_init(&pending);
11483         cache_tree_init(&nodes);
11484         cache_tree_init(&reada);
11485         cache_tree_init(&corrupt_blocks);
11486         extent_io_tree_init(&excluded_extents);
11487         INIT_LIST_HEAD(&dropping_trees);
11488         INIT_LIST_HEAD(&normal_trees);
11489
11490         if (repair) {
11491                 fs_info->excluded_extents = &excluded_extents;
11492                 fs_info->fsck_extent_cache = &extent_cache;
11493                 fs_info->free_extent_hook = free_extent_hook;
11494                 fs_info->corrupt_blocks = &corrupt_blocks;
11495         }
11496
11497         bits_nr = 1024;
11498         bits = malloc(bits_nr * sizeof(struct block_info));
11499         if (!bits) {
11500                 perror("malloc");
11501                 exit(1);
11502         }
11503
11504         if (ctx.progress_enabled) {
11505                 ctx.tp = TASK_EXTENTS;
11506                 task_start(ctx.info);
11507         }
11508
11509 again:
11510         root1 = fs_info->tree_root;
11511         level = btrfs_header_level(root1->node);
11512         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11513                                     root1->node->start, 0, level, 0, NULL);
11514         if (ret < 0)
11515                 goto out;
11516         root1 = fs_info->chunk_root;
11517         level = btrfs_header_level(root1->node);
11518         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11519                                     root1->node->start, 0, level, 0, NULL);
11520         if (ret < 0)
11521                 goto out;
11522         btrfs_init_path(&path);
11523         key.offset = 0;
11524         key.objectid = 0;
11525         key.type = BTRFS_ROOT_ITEM_KEY;
11526         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11527         if (ret < 0)
11528                 goto out;
11529         while(1) {
11530                 leaf = path.nodes[0];
11531                 slot = path.slots[0];
11532                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11533                         ret = btrfs_next_leaf(root, &path);
11534                         if (ret != 0)
11535                                 break;
11536                         leaf = path.nodes[0];
11537                         slot = path.slots[0];
11538                 }
11539                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11540                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11541                         unsigned long offset;
11542                         u64 last_snapshot;
11543
11544                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11545                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11546                         last_snapshot = btrfs_root_last_snapshot(&ri);
11547                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11548                                 level = btrfs_root_level(&ri);
11549                                 ret = add_root_item_to_list(&normal_trees,
11550                                                 found_key.objectid,
11551                                                 btrfs_root_bytenr(&ri),
11552                                                 last_snapshot, level,
11553                                                 0, NULL);
11554                                 if (ret < 0)
11555                                         goto out;
11556                         } else {
11557                                 level = btrfs_root_level(&ri);
11558                                 objectid = found_key.objectid;
11559                                 btrfs_disk_key_to_cpu(&found_key,
11560                                                       &ri.drop_progress);
11561                                 ret = add_root_item_to_list(&dropping_trees,
11562                                                 objectid,
11563                                                 btrfs_root_bytenr(&ri),
11564                                                 last_snapshot, level,
11565                                                 ri.drop_level, &found_key);
11566                                 if (ret < 0)
11567                                         goto out;
11568                         }
11569                 }
11570                 path.slots[0]++;
11571         }
11572         btrfs_release_path(&path);
11573
11574         /*
11575          * check_block can return -EAGAIN if it fixes something, please keep
11576          * this in mind when dealing with return values from these functions, if
11577          * we get -EAGAIN we want to fall through and restart the loop.
11578          */
11579         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11580                                   &seen, &reada, &nodes, &extent_cache,
11581                                   &chunk_cache, &dev_cache, &block_group_cache,
11582                                   &dev_extent_cache);
11583         if (ret < 0) {
11584                 if (ret == -EAGAIN)
11585                         goto loop;
11586                 goto out;
11587         }
11588         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11589                                   &pending, &seen, &reada, &nodes,
11590                                   &extent_cache, &chunk_cache, &dev_cache,
11591                                   &block_group_cache, &dev_extent_cache);
11592         if (ret < 0) {
11593                 if (ret == -EAGAIN)
11594                         goto loop;
11595                 goto out;
11596         }
11597
11598         ret = check_chunks(&chunk_cache, &block_group_cache,
11599                            &dev_extent_cache, NULL, NULL, NULL, 0);
11600         if (ret) {
11601                 if (ret == -EAGAIN)
11602                         goto loop;
11603                 err = ret;
11604         }
11605
11606         ret = check_extent_refs(root, &extent_cache);
11607         if (ret < 0) {
11608                 if (ret == -EAGAIN)
11609                         goto loop;
11610                 goto out;
11611         }
11612
11613         ret = check_devices(&dev_cache, &dev_extent_cache);
11614         if (ret && err)
11615                 ret = err;
11616
11617 out:
11618         task_stop(ctx.info);
11619         if (repair) {
11620                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11621                 extent_io_tree_cleanup(&excluded_extents);
11622                 fs_info->fsck_extent_cache = NULL;
11623                 fs_info->free_extent_hook = NULL;
11624                 fs_info->corrupt_blocks = NULL;
11625                 fs_info->excluded_extents = NULL;
11626         }
11627         free(bits);
11628         free_chunk_cache_tree(&chunk_cache);
11629         free_device_cache_tree(&dev_cache);
11630         free_block_group_tree(&block_group_cache);
11631         free_device_extent_tree(&dev_extent_cache);
11632         free_extent_cache_tree(&seen);
11633         free_extent_cache_tree(&pending);
11634         free_extent_cache_tree(&reada);
11635         free_extent_cache_tree(&nodes);
11636         free_root_item_list(&normal_trees);
11637         free_root_item_list(&dropping_trees);
11638         return ret;
11639 loop:
11640         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11641         free_extent_cache_tree(&seen);
11642         free_extent_cache_tree(&pending);
11643         free_extent_cache_tree(&reada);
11644         free_extent_cache_tree(&nodes);
11645         free_chunk_cache_tree(&chunk_cache);
11646         free_block_group_tree(&block_group_cache);
11647         free_device_cache_tree(&dev_cache);
11648         free_device_extent_tree(&dev_extent_cache);
11649         free_extent_record_cache(&extent_cache);
11650         free_root_item_list(&normal_trees);
11651         free_root_item_list(&dropping_trees);
11652         extent_io_tree_cleanup(&excluded_extents);
11653         goto again;
11654 }
11655
11656 static int check_extent_inline_ref(struct extent_buffer *eb,
11657                    struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11658 {
11659         int ret;
11660         u8 type = btrfs_extent_inline_ref_type(eb, iref);
11661
11662         switch (type) {
11663         case BTRFS_TREE_BLOCK_REF_KEY:
11664         case BTRFS_EXTENT_DATA_REF_KEY:
11665         case BTRFS_SHARED_BLOCK_REF_KEY:
11666         case BTRFS_SHARED_DATA_REF_KEY:
11667                 ret = 0;
11668                 break;
11669         default:
11670                 error("extent[%llu %u %llu] has unknown ref type: %d",
11671                       key->objectid, key->type, key->offset, type);
11672                 ret = UNKNOWN_TYPE;
11673                 break;
11674         }
11675
11676         return ret;
11677 }
11678
11679 /*
11680  * Check backrefs of a tree block given by @bytenr or @eb.
11681  *
11682  * @root:       the root containing the @bytenr or @eb
11683  * @eb:         tree block extent buffer, can be NULL
11684  * @bytenr:     bytenr of the tree block to search
11685  * @level:      tree level of the tree block
11686  * @owner:      owner of the tree block
11687  *
11688  * Return >0 for any error found and output error message
11689  * Return 0 for no error found
11690  */
11691 static int check_tree_block_ref(struct btrfs_root *root,
11692                                 struct extent_buffer *eb, u64 bytenr,
11693                                 int level, u64 owner, struct node_refs *nrefs)
11694 {
11695         struct btrfs_key key;
11696         struct btrfs_root *extent_root = root->fs_info->extent_root;
11697         struct btrfs_path path;
11698         struct btrfs_extent_item *ei;
11699         struct btrfs_extent_inline_ref *iref;
11700         struct extent_buffer *leaf;
11701         unsigned long end;
11702         unsigned long ptr;
11703         int slot;
11704         int skinny_level;
11705         int root_level = btrfs_header_level(root->node);
11706         int type;
11707         u32 nodesize = root->fs_info->nodesize;
11708         u32 item_size;
11709         u64 offset;
11710         int found_ref = 0;
11711         int err = 0;
11712         int ret;
11713         int strict = 1;
11714         int parent = 0;
11715
11716         btrfs_init_path(&path);
11717         key.objectid = bytenr;
11718         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11719                 key.type = BTRFS_METADATA_ITEM_KEY;
11720         else
11721                 key.type = BTRFS_EXTENT_ITEM_KEY;
11722         key.offset = (u64)-1;
11723
11724         /* Search for the backref in extent tree */
11725         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11726         if (ret < 0) {
11727                 err |= BACKREF_MISSING;
11728                 goto out;
11729         }
11730         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11731         if (ret) {
11732                 err |= BACKREF_MISSING;
11733                 goto out;
11734         }
11735
11736         leaf = path.nodes[0];
11737         slot = path.slots[0];
11738         btrfs_item_key_to_cpu(leaf, &key, slot);
11739
11740         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11741
11742         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11743                 skinny_level = (int)key.offset;
11744                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11745         } else {
11746                 struct btrfs_tree_block_info *info;
11747
11748                 info = (struct btrfs_tree_block_info *)(ei + 1);
11749                 skinny_level = btrfs_tree_block_level(leaf, info);
11750                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11751         }
11752
11753
11754         if (eb) {
11755                 u64 header_gen;
11756                 u64 extent_gen;
11757
11758                 /*
11759                  * Due to the feature of shared tree blocks, if the upper node
11760                  * is a fs root or shared node, the extent of checked node may
11761                  * not be updated until the next CoW.
11762                  */
11763                 if (nrefs)
11764                         strict = should_check_extent_strictly(root, nrefs,
11765                                         level);
11766                 if (!(btrfs_extent_flags(leaf, ei) &
11767                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11768                         error(
11769                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11770                                 key.objectid, nodesize,
11771                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11772                         err = BACKREF_MISMATCH;
11773                 }
11774                 header_gen = btrfs_header_generation(eb);
11775                 extent_gen = btrfs_extent_generation(leaf, ei);
11776                 if (header_gen != extent_gen) {
11777                         error(
11778         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11779                                 key.objectid, nodesize, header_gen,
11780                                 extent_gen);
11781                         err = BACKREF_MISMATCH;
11782                 }
11783                 if (level != skinny_level) {
11784                         error(
11785                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11786                                 key.objectid, nodesize, level, skinny_level);
11787                         err = BACKREF_MISMATCH;
11788                 }
11789                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11790                         error(
11791                         "extent[%llu %u] is referred by other roots than %llu",
11792                                 key.objectid, nodesize, root->objectid);
11793                         err = BACKREF_MISMATCH;
11794                 }
11795         }
11796
11797         /*
11798          * Iterate the extent/metadata item to find the exact backref
11799          */
11800         item_size = btrfs_item_size_nr(leaf, slot);
11801         ptr = (unsigned long)iref;
11802         end = (unsigned long)ei + item_size;
11803
11804         while (ptr < end) {
11805                 iref = (struct btrfs_extent_inline_ref *)ptr;
11806                 type = btrfs_extent_inline_ref_type(leaf, iref);
11807                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11808
11809                 ret = check_extent_inline_ref(leaf, &key, iref);
11810                 if (ret) {
11811                         err |= ret;
11812                         break;
11813                 }
11814                 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11815                         if (offset == root->objectid)
11816                                 found_ref = 1;
11817                         if (!strict && owner == offset)
11818                                 found_ref = 1;
11819                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11820                         /*
11821                          * Backref of tree reloc root points to itself, no need
11822                          * to check backref any more.
11823                          *
11824                          * This may be an error of loop backref, but extent tree
11825                          * checker should have already handled it.
11826                          * Here we only need to avoid infinite iteration.
11827                          */
11828                         if (offset == bytenr) {
11829                                 found_ref = 1;
11830                         } else {
11831                                 /*
11832                                  * Check if the backref points to valid
11833                                  * referencer
11834                                  */
11835                                 found_ref = !check_tree_block_ref( root, NULL,
11836                                                 offset, level + 1, owner,
11837                                                 NULL);
11838                         }
11839                 }
11840
11841                 if (found_ref)
11842                         break;
11843                 ptr += btrfs_extent_inline_ref_size(type);
11844         }
11845
11846         /*
11847          * Inlined extent item doesn't have what we need, check
11848          * TREE_BLOCK_REF_KEY
11849          */
11850         if (!found_ref) {
11851                 btrfs_release_path(&path);
11852                 key.objectid = bytenr;
11853                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11854                 key.offset = root->objectid;
11855
11856                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11857                 if (!ret)
11858                         found_ref = 1;
11859         }
11860         /*
11861          * Finally check SHARED BLOCK REF, any found will be good
11862          * Here we're not doing comprehensive extent backref checking,
11863          * only need to ensure there is some extent referring to this
11864          * tree block.
11865          */
11866         if (!found_ref) {
11867                 btrfs_release_path(&path);
11868                 key.objectid = bytenr;
11869                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11870                 key.offset = (u64)-1;
11871
11872                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11873                 if (ret < 0) {
11874                         err |= BACKREF_MISSING;
11875                         goto out;
11876                 }
11877                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11878                 if (ret) {
11879                         err |= BACKREF_MISSING;
11880                         goto out;
11881                 }
11882                 found_ref = 1;
11883         }
11884         if (!found_ref)
11885                 err |= BACKREF_MISSING;
11886 out:
11887         btrfs_release_path(&path);
11888         if (nrefs && strict &&
11889             level < root_level && nrefs->full_backref[level + 1])
11890                 parent = nrefs->bytenr[level + 1];
11891         if (eb && (err & BACKREF_MISSING))
11892                 error(
11893         "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11894                       bytenr, nodesize, owner, level,
11895                       parent ? "parent" : "root",
11896                       parent ? parent : root->objectid);
11897         return err;
11898 }
11899
11900 /*
11901  * If @err contains BACKREF_MISSING then add extent of the
11902  * file_extent_data_item.
11903  *
11904  * Returns error bits after reapir.
11905  */
11906 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11907                                    struct btrfs_root *root,
11908                                    struct btrfs_path *pathp,
11909                                    struct node_refs *nrefs,
11910                                    int err)
11911 {
11912         struct btrfs_file_extent_item *fi;
11913         struct btrfs_key fi_key;
11914         struct btrfs_key key;
11915         struct btrfs_extent_item *ei;
11916         struct btrfs_path path;
11917         struct btrfs_root *extent_root = root->fs_info->extent_root;
11918         struct extent_buffer *eb;
11919         u64 size;
11920         u64 disk_bytenr;
11921         u64 num_bytes;
11922         u64 parent;
11923         u64 offset;
11924         u64 extent_offset;
11925         u64 file_offset;
11926         int generation;
11927         int slot;
11928         int ret = 0;
11929
11930         eb = pathp->nodes[0];
11931         slot = pathp->slots[0];
11932         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11933         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11934
11935         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11936             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11937                 return err;
11938
11939         file_offset = fi_key.offset;
11940         generation = btrfs_file_extent_generation(eb, fi);
11941         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11942         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11943         extent_offset = btrfs_file_extent_offset(eb, fi);
11944         offset = file_offset - extent_offset;
11945
11946         /* now repair only adds backref */
11947         if ((err & BACKREF_MISSING) == 0)
11948                 return err;
11949
11950         /* search extent item */
11951         key.objectid = disk_bytenr;
11952         key.type = BTRFS_EXTENT_ITEM_KEY;
11953         key.offset = num_bytes;
11954
11955         btrfs_init_path(&path);
11956         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11957         if (ret < 0) {
11958                 ret = -EIO;
11959                 goto out;
11960         }
11961
11962         /* insert an extent item */
11963         if (ret > 0) {
11964                 key.objectid = disk_bytenr;
11965                 key.type = BTRFS_EXTENT_ITEM_KEY;
11966                 key.offset = num_bytes;
11967                 size = sizeof(*ei);
11968
11969                 btrfs_release_path(&path);
11970                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11971                                               size);
11972                 if (ret)
11973                         goto out;
11974                 eb = path.nodes[0];
11975                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11976
11977                 btrfs_set_extent_refs(eb, ei, 0);
11978                 btrfs_set_extent_generation(eb, ei, generation);
11979                 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11980
11981                 btrfs_mark_buffer_dirty(eb);
11982                 ret = btrfs_update_block_group(extent_root, disk_bytenr,
11983                                                num_bytes, 1, 0);
11984                 btrfs_release_path(&path);
11985         }
11986
11987         if (nrefs->full_backref[0])
11988                 parent = btrfs_header_bytenr(eb);
11989         else
11990                 parent = 0;
11991
11992         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11993                                    root->objectid,
11994                    parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11995                                    offset);
11996         if (ret) {
11997                 error(
11998                 "failed to increase extent data backref[%llu %llu] root %llu",
11999                       disk_bytenr, num_bytes, root->objectid);
12000                 goto out;
12001         } else {
12002                 printf("Add one extent data backref [%llu %llu]\n",
12003                        disk_bytenr, num_bytes);
12004         }
12005
12006         err &= ~BACKREF_MISSING;
12007 out:
12008         if (ret)
12009                 error("can't repair root %llu extent data item[%llu %llu]",
12010                       root->objectid, disk_bytenr, num_bytes);
12011         return err;
12012 }
12013
12014 /*
12015  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
12016  *
12017  * Return >0 any error found and output error message
12018  * Return 0 for no error found
12019  */
12020 static int check_extent_data_item(struct btrfs_root *root,
12021                                   struct btrfs_path *pathp,
12022                                   struct node_refs *nrefs,  int account_bytes)
12023 {
12024         struct btrfs_file_extent_item *fi;
12025         struct extent_buffer *eb = pathp->nodes[0];
12026         struct btrfs_path path;
12027         struct btrfs_root *extent_root = root->fs_info->extent_root;
12028         struct btrfs_key fi_key;
12029         struct btrfs_key dbref_key;
12030         struct extent_buffer *leaf;
12031         struct btrfs_extent_item *ei;
12032         struct btrfs_extent_inline_ref *iref;
12033         struct btrfs_extent_data_ref *dref;
12034         u64 owner;
12035         u64 disk_bytenr;
12036         u64 disk_num_bytes;
12037         u64 extent_num_bytes;
12038         u64 extent_flags;
12039         u64 offset;
12040         u32 item_size;
12041         unsigned long end;
12042         unsigned long ptr;
12043         int type;
12044         int found_dbackref = 0;
12045         int slot = pathp->slots[0];
12046         int err = 0;
12047         int ret;
12048         int strict;
12049
12050         btrfs_item_key_to_cpu(eb, &fi_key, slot);
12051         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
12052
12053         /* Nothing to check for hole and inline data extents */
12054         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
12055             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
12056                 return 0;
12057
12058         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
12059         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
12060         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
12061         offset = btrfs_file_extent_offset(eb, fi);
12062
12063         /* Check unaligned disk_num_bytes and num_bytes */
12064         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
12065                 error(
12066 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
12067                         fi_key.objectid, fi_key.offset, disk_num_bytes,
12068                         root->fs_info->sectorsize);
12069                 err |= BYTES_UNALIGNED;
12070         } else if (account_bytes) {
12071                 data_bytes_allocated += disk_num_bytes;
12072         }
12073         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
12074                 error(
12075 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
12076                         fi_key.objectid, fi_key.offset, extent_num_bytes,
12077                         root->fs_info->sectorsize);
12078                 err |= BYTES_UNALIGNED;
12079         } else if (account_bytes) {
12080                 data_bytes_referenced += extent_num_bytes;
12081         }
12082         owner = btrfs_header_owner(eb);
12083
12084         /* Check the extent item of the file extent in extent tree */
12085         btrfs_init_path(&path);
12086         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12087         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
12088         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
12089
12090         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
12091         if (ret)
12092                 goto out;
12093
12094         leaf = path.nodes[0];
12095         slot = path.slots[0];
12096         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12097
12098         extent_flags = btrfs_extent_flags(leaf, ei);
12099
12100         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
12101                 error(
12102                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
12103                     disk_bytenr, disk_num_bytes,
12104                     BTRFS_EXTENT_FLAG_DATA);
12105                 err |= BACKREF_MISMATCH;
12106         }
12107
12108         /* Check data backref inside that extent item */
12109         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
12110         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12111         ptr = (unsigned long)iref;
12112         end = (unsigned long)ei + item_size;
12113         strict = should_check_extent_strictly(root, nrefs, -1);
12114
12115         while (ptr < end) {
12116                 u64 ref_root;
12117                 u64 ref_objectid;
12118                 u64 ref_offset;
12119                 bool match = false;
12120
12121                 iref = (struct btrfs_extent_inline_ref *)ptr;
12122                 type = btrfs_extent_inline_ref_type(leaf, iref);
12123                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12124
12125                 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
12126                 if (ret) {
12127                         err |= ret;
12128                         break;
12129                 }
12130                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
12131                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
12132                         ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
12133                         ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
12134
12135                         if (ref_objectid == fi_key.objectid &&
12136                             ref_offset == fi_key.offset - offset)
12137                                 match = true;
12138                         if (ref_root == root->objectid && match)
12139                                 found_dbackref = 1;
12140                         else if (!strict && owner == ref_root && match)
12141                                 found_dbackref = 1;
12142                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
12143                         found_dbackref = !check_tree_block_ref(root, NULL,
12144                                 btrfs_extent_inline_ref_offset(leaf, iref),
12145                                 0, owner, NULL);
12146                 }
12147
12148                 if (found_dbackref)
12149                         break;
12150                 ptr += btrfs_extent_inline_ref_size(type);
12151         }
12152
12153         if (!found_dbackref) {
12154                 btrfs_release_path(&path);
12155
12156                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
12157                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12158                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
12159                 dbref_key.offset = hash_extent_data_ref(root->objectid,
12160                                 fi_key.objectid, fi_key.offset - offset);
12161
12162                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12163                                         &dbref_key, &path, 0, 0);
12164                 if (!ret) {
12165                         found_dbackref = 1;
12166                         goto out;
12167                 }
12168
12169                 btrfs_release_path(&path);
12170
12171                 /*
12172                  * Neither inlined nor EXTENT_DATA_REF found, try
12173                  * SHARED_DATA_REF as last chance.
12174                  */
12175                 dbref_key.objectid = disk_bytenr;
12176                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
12177                 dbref_key.offset = eb->start;
12178
12179                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12180                                         &dbref_key, &path, 0, 0);
12181                 if (!ret) {
12182                         found_dbackref = 1;
12183                         goto out;
12184                 }
12185         }
12186
12187 out:
12188         if (!found_dbackref)
12189                 err |= BACKREF_MISSING;
12190         btrfs_release_path(&path);
12191         if (err & BACKREF_MISSING) {
12192                 error("data extent[%llu %llu] backref lost",
12193                       disk_bytenr, disk_num_bytes);
12194         }
12195         return err;
12196 }
12197
12198 /*
12199  * Get real tree block level for the case like shared block
12200  * Return >= 0 as tree level
12201  * Return <0 for error
12202  */
12203 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
12204 {
12205         struct extent_buffer *eb;
12206         struct btrfs_path path;
12207         struct btrfs_key key;
12208         struct btrfs_extent_item *ei;
12209         u64 flags;
12210         u64 transid;
12211         u8 backref_level;
12212         u8 header_level;
12213         int ret;
12214
12215         /* Search extent tree for extent generation and level */
12216         key.objectid = bytenr;
12217         key.type = BTRFS_METADATA_ITEM_KEY;
12218         key.offset = (u64)-1;
12219
12220         btrfs_init_path(&path);
12221         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
12222         if (ret < 0)
12223                 goto release_out;
12224         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
12225         if (ret < 0)
12226                 goto release_out;
12227         if (ret > 0) {
12228                 ret = -ENOENT;
12229                 goto release_out;
12230         }
12231
12232         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12233         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
12234                             struct btrfs_extent_item);
12235         flags = btrfs_extent_flags(path.nodes[0], ei);
12236         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
12237                 ret = -ENOENT;
12238                 goto release_out;
12239         }
12240
12241         /* Get transid for later read_tree_block() check */
12242         transid = btrfs_extent_generation(path.nodes[0], ei);
12243
12244         /* Get backref level as one source */
12245         if (key.type == BTRFS_METADATA_ITEM_KEY) {
12246                 backref_level = key.offset;
12247         } else {
12248                 struct btrfs_tree_block_info *info;
12249
12250                 info = (struct btrfs_tree_block_info *)(ei + 1);
12251                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
12252         }
12253         btrfs_release_path(&path);
12254
12255         /* Get level from tree block as an alternative source */
12256         eb = read_tree_block(fs_info, bytenr, transid);
12257         if (!extent_buffer_uptodate(eb)) {
12258                 free_extent_buffer(eb);
12259                 return -EIO;
12260         }
12261         header_level = btrfs_header_level(eb);
12262         free_extent_buffer(eb);
12263
12264         if (header_level != backref_level)
12265                 return -EIO;
12266         return header_level;
12267
12268 release_out:
12269         btrfs_release_path(&path);
12270         return ret;
12271 }
12272
12273 /*
12274  * Check if a tree block backref is valid (points to a valid tree block)
12275  * if level == -1, level will be resolved
12276  * Return >0 for any error found and print error message
12277  */
12278 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
12279                                     u64 bytenr, int level)
12280 {
12281         struct btrfs_root *root;
12282         struct btrfs_key key;
12283         struct btrfs_path path;
12284         struct extent_buffer *eb;
12285         struct extent_buffer *node;
12286         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12287         int err = 0;
12288         int ret;
12289
12290         /* Query level for level == -1 special case */
12291         if (level == -1)
12292                 level = query_tree_block_level(fs_info, bytenr);
12293         if (level < 0) {
12294                 err |= REFERENCER_MISSING;
12295                 goto out;
12296         }
12297
12298         key.objectid = root_id;
12299         key.type = BTRFS_ROOT_ITEM_KEY;
12300         key.offset = (u64)-1;
12301
12302         root = btrfs_read_fs_root(fs_info, &key);
12303         if (IS_ERR(root)) {
12304                 err |= REFERENCER_MISSING;
12305                 goto out;
12306         }
12307
12308         /* Read out the tree block to get item/node key */
12309         eb = read_tree_block(fs_info, bytenr, 0);
12310         if (!extent_buffer_uptodate(eb)) {
12311                 err |= REFERENCER_MISSING;
12312                 free_extent_buffer(eb);
12313                 goto out;
12314         }
12315
12316         /* Empty tree, no need to check key */
12317         if (!btrfs_header_nritems(eb) && !level) {
12318                 free_extent_buffer(eb);
12319                 goto out;
12320         }
12321
12322         if (level)
12323                 btrfs_node_key_to_cpu(eb, &key, 0);
12324         else
12325                 btrfs_item_key_to_cpu(eb, &key, 0);
12326
12327         free_extent_buffer(eb);
12328
12329         btrfs_init_path(&path);
12330         path.lowest_level = level;
12331         /* Search with the first key, to ensure we can reach it */
12332         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12333         if (ret < 0) {
12334                 err |= REFERENCER_MISSING;
12335                 goto release_out;
12336         }
12337
12338         node = path.nodes[level];
12339         if (btrfs_header_bytenr(node) != bytenr) {
12340                 error(
12341         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
12342                         bytenr, nodesize, bytenr,
12343                         btrfs_header_bytenr(node));
12344                 err |= REFERENCER_MISMATCH;
12345         }
12346         if (btrfs_header_level(node) != level) {
12347                 error(
12348         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
12349                         bytenr, nodesize, level,
12350                         btrfs_header_level(node));
12351                 err |= REFERENCER_MISMATCH;
12352         }
12353
12354 release_out:
12355         btrfs_release_path(&path);
12356 out:
12357         if (err & REFERENCER_MISSING) {
12358                 if (level < 0)
12359                         error("extent [%llu %d] lost referencer (owner: %llu)",
12360                                 bytenr, nodesize, root_id);
12361                 else
12362                         error(
12363                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
12364                                 bytenr, nodesize, root_id, level);
12365         }
12366
12367         return err;
12368 }
12369
12370 /*
12371  * Check if tree block @eb is tree reloc root.
12372  * Return 0 if it's not or any problem happens
12373  * Return 1 if it's a tree reloc root
12374  */
12375 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
12376                                  struct extent_buffer *eb)
12377 {
12378         struct btrfs_root *tree_reloc_root;
12379         struct btrfs_key key;
12380         u64 bytenr = btrfs_header_bytenr(eb);
12381         u64 owner = btrfs_header_owner(eb);
12382         int ret = 0;
12383
12384         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12385         key.offset = owner;
12386         key.type = BTRFS_ROOT_ITEM_KEY;
12387
12388         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
12389         if (IS_ERR(tree_reloc_root))
12390                 return 0;
12391
12392         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
12393                 ret = 1;
12394         btrfs_free_fs_root(tree_reloc_root);
12395         return ret;
12396 }
12397
12398 /*
12399  * Check referencer for shared block backref
12400  * If level == -1, this function will resolve the level.
12401  */
12402 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
12403                                      u64 parent, u64 bytenr, int level)
12404 {
12405         struct extent_buffer *eb;
12406         u32 nr;
12407         int found_parent = 0;
12408         int i;
12409
12410         eb = read_tree_block(fs_info, parent, 0);
12411         if (!extent_buffer_uptodate(eb))
12412                 goto out;
12413
12414         if (level == -1)
12415                 level = query_tree_block_level(fs_info, bytenr);
12416         if (level < 0)
12417                 goto out;
12418
12419         /* It's possible it's a tree reloc root */
12420         if (parent == bytenr) {
12421                 if (is_tree_reloc_root(fs_info, eb))
12422                         found_parent = 1;
12423                 goto out;
12424         }
12425
12426         if (level + 1 != btrfs_header_level(eb))
12427                 goto out;
12428
12429         nr = btrfs_header_nritems(eb);
12430         for (i = 0; i < nr; i++) {
12431                 if (bytenr == btrfs_node_blockptr(eb, i)) {
12432                         found_parent = 1;
12433                         break;
12434                 }
12435         }
12436 out:
12437         free_extent_buffer(eb);
12438         if (!found_parent) {
12439                 error(
12440         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12441                         bytenr, fs_info->nodesize, parent, level);
12442                 return REFERENCER_MISSING;
12443         }
12444         return 0;
12445 }
12446
12447 /*
12448  * Check referencer for normal (inlined) data ref
12449  * If len == 0, it will be resolved by searching in extent tree
12450  */
12451 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12452                                      u64 root_id, u64 objectid, u64 offset,
12453                                      u64 bytenr, u64 len, u32 count)
12454 {
12455         struct btrfs_root *root;
12456         struct btrfs_root *extent_root = fs_info->extent_root;
12457         struct btrfs_key key;
12458         struct btrfs_path path;
12459         struct extent_buffer *leaf;
12460         struct btrfs_file_extent_item *fi;
12461         u32 found_count = 0;
12462         int slot;
12463         int ret = 0;
12464
12465         if (!len) {
12466                 key.objectid = bytenr;
12467                 key.type = BTRFS_EXTENT_ITEM_KEY;
12468                 key.offset = (u64)-1;
12469
12470                 btrfs_init_path(&path);
12471                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12472                 if (ret < 0)
12473                         goto out;
12474                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12475                 if (ret)
12476                         goto out;
12477                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12478                 if (key.objectid != bytenr ||
12479                     key.type != BTRFS_EXTENT_ITEM_KEY)
12480                         goto out;
12481                 len = key.offset;
12482                 btrfs_release_path(&path);
12483         }
12484         key.objectid = root_id;
12485         key.type = BTRFS_ROOT_ITEM_KEY;
12486         key.offset = (u64)-1;
12487         btrfs_init_path(&path);
12488
12489         root = btrfs_read_fs_root(fs_info, &key);
12490         if (IS_ERR(root))
12491                 goto out;
12492
12493         key.objectid = objectid;
12494         key.type = BTRFS_EXTENT_DATA_KEY;
12495         /*
12496          * It can be nasty as data backref offset is
12497          * file offset - file extent offset, which is smaller or
12498          * equal to original backref offset.  The only special case is
12499          * overflow.  So we need to special check and do further search.
12500          */
12501         key.offset = offset & (1ULL << 63) ? 0 : offset;
12502
12503         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12504         if (ret < 0)
12505                 goto out;
12506
12507         /*
12508          * Search afterwards to get correct one
12509          * NOTE: As we must do a comprehensive check on the data backref to
12510          * make sure the dref count also matches, we must iterate all file
12511          * extents for that inode.
12512          */
12513         while (1) {
12514                 leaf = path.nodes[0];
12515                 slot = path.slots[0];
12516
12517                 if (slot >= btrfs_header_nritems(leaf) ||
12518                     btrfs_header_owner(leaf) != root_id)
12519                         goto next;
12520                 btrfs_item_key_to_cpu(leaf, &key, slot);
12521                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12522                         break;
12523                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12524                 /*
12525                  * Except normal disk bytenr and disk num bytes, we still
12526                  * need to do extra check on dbackref offset as
12527                  * dbackref offset = file_offset - file_extent_offset
12528                  *
12529                  * Also, we must check the leaf owner.
12530                  * In case of shared tree blocks (snapshots) we can inherit
12531                  * leaves from source snapshot.
12532                  * In that case, reference from source snapshot should not
12533                  * count.
12534                  */
12535                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12536                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12537                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12538                     offset && btrfs_header_owner(leaf) == root_id)
12539                         found_count++;
12540
12541 next:
12542                 ret = btrfs_next_item(root, &path);
12543                 if (ret)
12544                         break;
12545         }
12546 out:
12547         btrfs_release_path(&path);
12548         if (found_count != count) {
12549                 error(
12550 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12551                         bytenr, len, root_id, objectid, offset, count, found_count);
12552                 return REFERENCER_MISSING;
12553         }
12554         return 0;
12555 }
12556
12557 /*
12558  * Check if the referencer of a shared data backref exists
12559  */
12560 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12561                                      u64 parent, u64 bytenr)
12562 {
12563         struct extent_buffer *eb;
12564         struct btrfs_key key;
12565         struct btrfs_file_extent_item *fi;
12566         u32 nr;
12567         int found_parent = 0;
12568         int i;
12569
12570         eb = read_tree_block(fs_info, parent, 0);
12571         if (!extent_buffer_uptodate(eb))
12572                 goto out;
12573
12574         nr = btrfs_header_nritems(eb);
12575         for (i = 0; i < nr; i++) {
12576                 btrfs_item_key_to_cpu(eb, &key, i);
12577                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12578                         continue;
12579
12580                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12581                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12582                         continue;
12583
12584                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12585                         found_parent = 1;
12586                         break;
12587                 }
12588         }
12589
12590 out:
12591         free_extent_buffer(eb);
12592         if (!found_parent) {
12593                 error("shared extent %llu referencer lost (parent: %llu)",
12594                         bytenr, parent);
12595                 return REFERENCER_MISSING;
12596         }
12597         return 0;
12598 }
12599
12600 /*
12601  * Only delete backref if REFERENCER_MISSING now
12602  *
12603  * Returns <0   the extent was deleted
12604  * Returns >0   the backref was deleted but extent still exists, returned value
12605  *               means error after repair
12606  * Returns  0   nothing happened
12607  */
12608 static int repair_extent_item(struct btrfs_trans_handle *trans,
12609                       struct btrfs_root *root, struct btrfs_path *path,
12610                       u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12611                       u64 owner, u64 offset, int err)
12612 {
12613         struct btrfs_key old_key;
12614         int freed = 0;
12615         int ret;
12616
12617         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12618
12619         if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12620                 /* delete the backref */
12621                 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12622                           num_bytes, parent, root_objectid, owner, offset);
12623                 if (!ret) {
12624                         freed = 1;
12625                         err &= ~REFERENCER_MISSING;
12626                         printf("Delete backref in extent [%llu %llu]\n",
12627                                bytenr, num_bytes);
12628                 } else {
12629                         error("fail to delete backref in extent [%llu %llu]",
12630                                bytenr, num_bytes);
12631                 }
12632         }
12633
12634         /* btrfs_free_extent may delete the extent */
12635         btrfs_release_path(path);
12636         ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12637
12638         if (ret)
12639                 ret = -ENOENT;
12640         else if (freed)
12641                 ret = err;
12642         return ret;
12643 }
12644
12645 /*
12646  * This function will check a given extent item, including its backref and
12647  * itself (like crossing stripe boundary and type)
12648  *
12649  * Since we don't use extent_record anymore, introduce new error bit
12650  */
12651 static int check_extent_item(struct btrfs_trans_handle *trans,
12652                              struct btrfs_fs_info *fs_info,
12653                              struct btrfs_path *path)
12654 {
12655         struct btrfs_extent_item *ei;
12656         struct btrfs_extent_inline_ref *iref;
12657         struct btrfs_extent_data_ref *dref;
12658         struct extent_buffer *eb = path->nodes[0];
12659         unsigned long end;
12660         unsigned long ptr;
12661         int slot = path->slots[0];
12662         int type;
12663         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12664         u32 item_size = btrfs_item_size_nr(eb, slot);
12665         u64 flags;
12666         u64 offset;
12667         u64 parent;
12668         u64 num_bytes;
12669         u64 root_objectid;
12670         u64 owner;
12671         u64 owner_offset;
12672         int metadata = 0;
12673         int level;
12674         struct btrfs_key key;
12675         int ret;
12676         int err = 0;
12677
12678         btrfs_item_key_to_cpu(eb, &key, slot);
12679         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12680                 bytes_used += key.offset;
12681                 num_bytes = key.offset;
12682         } else {
12683                 bytes_used += nodesize;
12684                 num_bytes = nodesize;
12685         }
12686
12687         if (item_size < sizeof(*ei)) {
12688                 /*
12689                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12690                  * old thing when on disk format is still un-determined.
12691                  * No need to care about it anymore
12692                  */
12693                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12694                 return -ENOTTY;
12695         }
12696
12697         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12698         flags = btrfs_extent_flags(eb, ei);
12699
12700         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12701                 metadata = 1;
12702         if (metadata && check_crossing_stripes(global_info, key.objectid,
12703                                                eb->len)) {
12704                 error("bad metadata [%llu, %llu) crossing stripe boundary",
12705                       key.objectid, key.objectid + nodesize);
12706                 err |= CROSSING_STRIPE_BOUNDARY;
12707         }
12708
12709         ptr = (unsigned long)(ei + 1);
12710
12711         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12712                 /* Old EXTENT_ITEM metadata */
12713                 struct btrfs_tree_block_info *info;
12714
12715                 info = (struct btrfs_tree_block_info *)ptr;
12716                 level = btrfs_tree_block_level(eb, info);
12717                 ptr += sizeof(struct btrfs_tree_block_info);
12718         } else {
12719                 /* New METADATA_ITEM */
12720                 level = key.offset;
12721         }
12722         end = (unsigned long)ei + item_size;
12723
12724 next:
12725         /* Reached extent item end normally */
12726         if (ptr == end)
12727                 goto out;
12728
12729         /* Beyond extent item end, wrong item size */
12730         if (ptr > end) {
12731                 err |= ITEM_SIZE_MISMATCH;
12732                 error("extent item at bytenr %llu slot %d has wrong size",
12733                         eb->start, slot);
12734                 goto out;
12735         }
12736
12737         parent = 0;
12738         root_objectid = 0;
12739         owner = 0;
12740         owner_offset = 0;
12741         /* Now check every backref in this extent item */
12742         iref = (struct btrfs_extent_inline_ref *)ptr;
12743         type = btrfs_extent_inline_ref_type(eb, iref);
12744         offset = btrfs_extent_inline_ref_offset(eb, iref);
12745         switch (type) {
12746         case BTRFS_TREE_BLOCK_REF_KEY:
12747                 root_objectid = offset;
12748                 owner = level;
12749                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12750                                                level);
12751                 err |= ret;
12752                 break;
12753         case BTRFS_SHARED_BLOCK_REF_KEY:
12754                 parent = offset;
12755                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12756                                                  level);
12757                 err |= ret;
12758                 break;
12759         case BTRFS_EXTENT_DATA_REF_KEY:
12760                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12761                 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12762                 owner = btrfs_extent_data_ref_objectid(eb, dref);
12763                 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12764                 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12765                                         owner_offset, key.objectid, key.offset,
12766                                         btrfs_extent_data_ref_count(eb, dref));
12767                 err |= ret;
12768                 break;
12769         case BTRFS_SHARED_DATA_REF_KEY:
12770                 parent = offset;
12771                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12772                 err |= ret;
12773                 break;
12774         default:
12775                 error("extent[%llu %d %llu] has unknown ref type: %d",
12776                         key.objectid, key.type, key.offset, type);
12777                 ret = UNKNOWN_TYPE;
12778                 err |= ret;
12779                 goto out;
12780         }
12781
12782         if (err && repair) {
12783                 ret = repair_extent_item(trans, fs_info->extent_root, path,
12784                          key.objectid, num_bytes, parent, root_objectid,
12785                          owner, owner_offset, ret);
12786                 if (ret < 0)
12787                         goto out;
12788                 if (ret) {
12789                         goto next;
12790                         err = ret;
12791                 }
12792         }
12793
12794         ptr += btrfs_extent_inline_ref_size(type);
12795         goto next;
12796
12797 out:
12798         return err;
12799 }
12800
12801 /*
12802  * Check if a dev extent item is referred correctly by its chunk
12803  */
12804 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12805                                  struct extent_buffer *eb, int slot)
12806 {
12807         struct btrfs_root *chunk_root = fs_info->chunk_root;
12808         struct btrfs_dev_extent *ptr;
12809         struct btrfs_path path;
12810         struct btrfs_key chunk_key;
12811         struct btrfs_key devext_key;
12812         struct btrfs_chunk *chunk;
12813         struct extent_buffer *l;
12814         int num_stripes;
12815         u64 length;
12816         int i;
12817         int found_chunk = 0;
12818         int ret;
12819
12820         btrfs_item_key_to_cpu(eb, &devext_key, slot);
12821         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12822         length = btrfs_dev_extent_length(eb, ptr);
12823
12824         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12825         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12826         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12827
12828         btrfs_init_path(&path);
12829         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12830         if (ret)
12831                 goto out;
12832
12833         l = path.nodes[0];
12834         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12835         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12836                                       chunk_key.offset);
12837         if (ret < 0)
12838                 goto out;
12839
12840         if (btrfs_stripe_length(fs_info, l, chunk) != length)
12841                 goto out;
12842
12843         num_stripes = btrfs_chunk_num_stripes(l, chunk);
12844         for (i = 0; i < num_stripes; i++) {
12845                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12846                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12847
12848                 if (devid == devext_key.objectid &&
12849                     offset == devext_key.offset) {
12850                         found_chunk = 1;
12851                         break;
12852                 }
12853         }
12854 out:
12855         btrfs_release_path(&path);
12856         if (!found_chunk) {
12857                 error(
12858                 "device extent[%llu, %llu, %llu] did not find the related chunk",
12859                         devext_key.objectid, devext_key.offset, length);
12860                 return REFERENCER_MISSING;
12861         }
12862         return 0;
12863 }
12864
12865 /*
12866  * Check if the used space is correct with the dev item
12867  */
12868 static int check_dev_item(struct btrfs_fs_info *fs_info,
12869                           struct extent_buffer *eb, int slot)
12870 {
12871         struct btrfs_root *dev_root = fs_info->dev_root;
12872         struct btrfs_dev_item *dev_item;
12873         struct btrfs_path path;
12874         struct btrfs_key key;
12875         struct btrfs_dev_extent *ptr;
12876         u64 total_bytes;
12877         u64 dev_id;
12878         u64 used;
12879         u64 total = 0;
12880         int ret;
12881
12882         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12883         dev_id = btrfs_device_id(eb, dev_item);
12884         used = btrfs_device_bytes_used(eb, dev_item);
12885         total_bytes = btrfs_device_total_bytes(eb, dev_item);
12886
12887         key.objectid = dev_id;
12888         key.type = BTRFS_DEV_EXTENT_KEY;
12889         key.offset = 0;
12890
12891         btrfs_init_path(&path);
12892         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12893         if (ret < 0) {
12894                 btrfs_item_key_to_cpu(eb, &key, slot);
12895                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12896                         key.objectid, key.type, key.offset);
12897                 btrfs_release_path(&path);
12898                 return REFERENCER_MISSING;
12899         }
12900
12901         /* Iterate dev_extents to calculate the used space of a device */
12902         while (1) {
12903                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12904                         goto next;
12905
12906                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12907                 if (key.objectid > dev_id)
12908                         break;
12909                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12910                         goto next;
12911
12912                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12913                                      struct btrfs_dev_extent);
12914                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12915 next:
12916                 ret = btrfs_next_item(dev_root, &path);
12917                 if (ret)
12918                         break;
12919         }
12920         btrfs_release_path(&path);
12921
12922         if (used != total) {
12923                 btrfs_item_key_to_cpu(eb, &key, slot);
12924                 error(
12925 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12926                         total, used, BTRFS_ROOT_TREE_OBJECTID,
12927                         BTRFS_DEV_EXTENT_KEY, dev_id);
12928                 return ACCOUNTING_MISMATCH;
12929         }
12930         check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12931
12932         return 0;
12933 }
12934
12935 /*
12936  * Check a block group item with its referener (chunk) and its used space
12937  * with extent/metadata item
12938  */
12939 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12940                                   struct extent_buffer *eb, int slot)
12941 {
12942         struct btrfs_root *extent_root = fs_info->extent_root;
12943         struct btrfs_root *chunk_root = fs_info->chunk_root;
12944         struct btrfs_block_group_item *bi;
12945         struct btrfs_block_group_item bg_item;
12946         struct btrfs_path path;
12947         struct btrfs_key bg_key;
12948         struct btrfs_key chunk_key;
12949         struct btrfs_key extent_key;
12950         struct btrfs_chunk *chunk;
12951         struct extent_buffer *leaf;
12952         struct btrfs_extent_item *ei;
12953         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12954         u64 flags;
12955         u64 bg_flags;
12956         u64 used;
12957         u64 total = 0;
12958         int ret;
12959         int err = 0;
12960
12961         btrfs_item_key_to_cpu(eb, &bg_key, slot);
12962         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12963         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12964         used = btrfs_block_group_used(&bg_item);
12965         bg_flags = btrfs_block_group_flags(&bg_item);
12966
12967         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12968         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12969         chunk_key.offset = bg_key.objectid;
12970
12971         btrfs_init_path(&path);
12972         /* Search for the referencer chunk */
12973         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12974         if (ret) {
12975                 error(
12976                 "block group[%llu %llu] did not find the related chunk item",
12977                         bg_key.objectid, bg_key.offset);
12978                 err |= REFERENCER_MISSING;
12979         } else {
12980                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12981                                         struct btrfs_chunk);
12982                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12983                                                 bg_key.offset) {
12984                         error(
12985         "block group[%llu %llu] related chunk item length does not match",
12986                                 bg_key.objectid, bg_key.offset);
12987                         err |= REFERENCER_MISMATCH;
12988                 }
12989         }
12990         btrfs_release_path(&path);
12991
12992         /* Search from the block group bytenr */
12993         extent_key.objectid = bg_key.objectid;
12994         extent_key.type = 0;
12995         extent_key.offset = 0;
12996
12997         btrfs_init_path(&path);
12998         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12999         if (ret < 0)
13000                 goto out;
13001
13002         /* Iterate extent tree to account used space */
13003         while (1) {
13004                 leaf = path.nodes[0];
13005
13006                 /* Search slot can point to the last item beyond leaf nritems */
13007                 if (path.slots[0] >= btrfs_header_nritems(leaf))
13008                         goto next;
13009
13010                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
13011                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
13012                         break;
13013
13014                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
13015                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
13016                         goto next;
13017                 if (extent_key.objectid < bg_key.objectid)
13018                         goto next;
13019
13020                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
13021                         total += nodesize;
13022                 else
13023                         total += extent_key.offset;
13024
13025                 ei = btrfs_item_ptr(leaf, path.slots[0],
13026                                     struct btrfs_extent_item);
13027                 flags = btrfs_extent_flags(leaf, ei);
13028                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
13029                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
13030                                 error(
13031                         "bad extent[%llu, %llu) type mismatch with chunk",
13032                                         extent_key.objectid,
13033                                         extent_key.objectid + extent_key.offset);
13034                                 err |= CHUNK_TYPE_MISMATCH;
13035                         }
13036                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
13037                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
13038                                     BTRFS_BLOCK_GROUP_METADATA))) {
13039                                 error(
13040                         "bad extent[%llu, %llu) type mismatch with chunk",
13041                                         extent_key.objectid,
13042                                         extent_key.objectid + nodesize);
13043                                 err |= CHUNK_TYPE_MISMATCH;
13044                         }
13045                 }
13046 next:
13047                 ret = btrfs_next_item(extent_root, &path);
13048                 if (ret)
13049                         break;
13050         }
13051
13052 out:
13053         btrfs_release_path(&path);
13054
13055         if (total != used) {
13056                 error(
13057                 "block group[%llu %llu] used %llu but extent items used %llu",
13058                         bg_key.objectid, bg_key.offset, used, total);
13059                 err |= BG_ACCOUNTING_ERROR;
13060         }
13061         return err;
13062 }
13063
13064 /*
13065  * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
13066  * FIXME: We still need to repair error of dev_item.
13067  *
13068  * Returns error after repair.
13069  */
13070 static int repair_chunk_item(struct btrfs_trans_handle *trans,
13071                              struct btrfs_root *chunk_root,
13072                              struct btrfs_path *path, int err)
13073 {
13074         struct btrfs_chunk *chunk;
13075         struct btrfs_key chunk_key;
13076         struct extent_buffer *eb = path->nodes[0];
13077         u64 length;
13078         int slot = path->slots[0];
13079         u64 type;
13080         int ret = 0;
13081
13082         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
13083         if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
13084                 return err;
13085         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
13086         type = btrfs_chunk_type(path->nodes[0], chunk);
13087         length = btrfs_chunk_length(eb, chunk);
13088
13089         if (err & REFERENCER_MISSING) {
13090                 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
13091                                              type, chunk_key.offset, length);
13092                 if (ret) {
13093                         error("fail to add block group item[%llu %llu]",
13094                               chunk_key.offset, length);
13095                         goto out;
13096                 } else {
13097                         err &= ~REFERENCER_MISSING;
13098                         printf("Added block group item[%llu %llu]\n",
13099                                chunk_key.offset, length);
13100                 }
13101         }
13102
13103 out:
13104         return err;
13105 }
13106
13107 /*
13108  * Check a chunk item.
13109  * Including checking all referred dev_extents and block group
13110  */
13111 static int check_chunk_item(struct btrfs_fs_info *fs_info,
13112                             struct extent_buffer *eb, int slot)
13113 {
13114         struct btrfs_root *extent_root = fs_info->extent_root;
13115         struct btrfs_root *dev_root = fs_info->dev_root;
13116         struct btrfs_path path;
13117         struct btrfs_key chunk_key;
13118         struct btrfs_key bg_key;
13119         struct btrfs_key devext_key;
13120         struct btrfs_chunk *chunk;
13121         struct extent_buffer *leaf;
13122         struct btrfs_block_group_item *bi;
13123         struct btrfs_block_group_item bg_item;
13124         struct btrfs_dev_extent *ptr;
13125         u64 length;
13126         u64 chunk_end;
13127         u64 stripe_len;
13128         u64 type;
13129         int num_stripes;
13130         u64 offset;
13131         u64 objectid;
13132         int i;
13133         int ret;
13134         int err = 0;
13135
13136         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
13137         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
13138         length = btrfs_chunk_length(eb, chunk);
13139         chunk_end = chunk_key.offset + length;
13140         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
13141                                       chunk_key.offset);
13142         if (ret < 0) {
13143                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
13144                         chunk_end);
13145                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
13146                 goto out;
13147         }
13148         type = btrfs_chunk_type(eb, chunk);
13149
13150         bg_key.objectid = chunk_key.offset;
13151         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
13152         bg_key.offset = length;
13153
13154         btrfs_init_path(&path);
13155         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
13156         if (ret) {
13157                 error(
13158                 "chunk[%llu %llu) did not find the related block group item",
13159                         chunk_key.offset, chunk_end);
13160                 err |= REFERENCER_MISSING;
13161         } else{
13162                 leaf = path.nodes[0];
13163                 bi = btrfs_item_ptr(leaf, path.slots[0],
13164                                     struct btrfs_block_group_item);
13165                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
13166                                    sizeof(bg_item));
13167                 if (btrfs_block_group_flags(&bg_item) != type) {
13168                         error(
13169 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
13170                                 chunk_key.offset, chunk_end, type,
13171                                 btrfs_block_group_flags(&bg_item));
13172                         err |= REFERENCER_MISSING;
13173                 }
13174         }
13175
13176         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
13177         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
13178         for (i = 0; i < num_stripes; i++) {
13179                 btrfs_release_path(&path);
13180                 btrfs_init_path(&path);
13181                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
13182                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
13183                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
13184
13185                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
13186                                         0, 0);
13187                 if (ret)
13188                         goto not_match_dev;
13189
13190                 leaf = path.nodes[0];
13191                 ptr = btrfs_item_ptr(leaf, path.slots[0],
13192                                      struct btrfs_dev_extent);
13193                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
13194                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
13195                 if (objectid != chunk_key.objectid ||
13196                     offset != chunk_key.offset ||
13197                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
13198                         goto not_match_dev;
13199                 continue;
13200 not_match_dev:
13201                 err |= BACKREF_MISSING;
13202                 error(
13203                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
13204                         chunk_key.objectid, chunk_end, i);
13205                 continue;
13206         }
13207         btrfs_release_path(&path);
13208 out:
13209         return err;
13210 }
13211
13212 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
13213                                    struct btrfs_root *root,
13214                                    struct btrfs_path *path)
13215 {
13216         struct btrfs_key key;
13217         int ret = 0;
13218
13219         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
13220         btrfs_release_path(path);
13221         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
13222         if (ret) {
13223                 ret = -ENOENT;
13224                 goto out;
13225         }
13226
13227         ret = btrfs_del_item(trans, root, path);
13228         if (ret)
13229                 goto out;
13230
13231         if (path->slots[0] == 0)
13232                 btrfs_prev_leaf(root, path);
13233         else
13234                 path->slots[0]--;
13235 out:
13236         if (ret)
13237                 error("failed to delete root %llu item[%llu, %u, %llu]",
13238                       root->objectid, key.objectid, key.type, key.offset);
13239         else
13240                 printf("Deleted root %llu item[%llu, %u, %llu]\n",
13241                        root->objectid, key.objectid, key.type, key.offset);
13242         return ret;
13243 }
13244
13245 /*
13246  * Main entry function to check known items and update related accounting info
13247  */
13248 static int check_leaf_items(struct btrfs_trans_handle *trans,
13249                             struct btrfs_root *root, struct btrfs_path *path,
13250                             struct node_refs *nrefs, int account_bytes)
13251 {
13252         struct btrfs_fs_info *fs_info = root->fs_info;
13253         struct btrfs_key key;
13254         struct extent_buffer *eb;
13255         int slot;
13256         int type;
13257         struct btrfs_extent_data_ref *dref;
13258         int ret = 0;
13259         int err = 0;
13260
13261 again:
13262         eb = path->nodes[0];
13263         slot = path->slots[0];
13264         if (slot >= btrfs_header_nritems(eb)) {
13265                 if (slot == 0) {
13266                         error("empty leaf [%llu %u] root %llu", eb->start,
13267                                 root->fs_info->nodesize, root->objectid);
13268                         err |= EIO;
13269                 }
13270                 goto out;
13271         }
13272
13273         btrfs_item_key_to_cpu(eb, &key, slot);
13274         type = key.type;
13275
13276         switch (type) {
13277         case BTRFS_EXTENT_DATA_KEY:
13278                 ret = check_extent_data_item(root, path, nrefs, account_bytes);
13279                 if (repair && ret)
13280                         ret = repair_extent_data_item(trans, root, path, nrefs,
13281                                                       ret);
13282                 err |= ret;
13283                 break;
13284         case BTRFS_BLOCK_GROUP_ITEM_KEY:
13285                 ret = check_block_group_item(fs_info, eb, slot);
13286                 if (repair &&
13287                     ret & REFERENCER_MISSING)
13288                         ret = delete_extent_tree_item(trans, root, path);
13289                 err |= ret;
13290                 break;
13291         case BTRFS_DEV_ITEM_KEY:
13292                 ret = check_dev_item(fs_info, eb, slot);
13293                 err |= ret;
13294                 break;
13295         case BTRFS_CHUNK_ITEM_KEY:
13296                 ret = check_chunk_item(fs_info, eb, slot);
13297                 if (repair && ret)
13298                         ret = repair_chunk_item(trans, root, path, ret);
13299                 err |= ret;
13300                 break;
13301         case BTRFS_DEV_EXTENT_KEY:
13302                 ret = check_dev_extent_item(fs_info, eb, slot);
13303                 err |= ret;
13304                 break;
13305         case BTRFS_EXTENT_ITEM_KEY:
13306         case BTRFS_METADATA_ITEM_KEY:
13307                 ret = check_extent_item(trans, fs_info, path);
13308                 err |= ret;
13309                 break;
13310         case BTRFS_EXTENT_CSUM_KEY:
13311                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
13312                 err |= ret;
13313                 break;
13314         case BTRFS_TREE_BLOCK_REF_KEY:
13315                 ret = check_tree_block_backref(fs_info, key.offset,
13316                                                key.objectid, -1);
13317                 if (repair &&
13318                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13319                         ret = delete_extent_tree_item(trans, root, path);
13320                 err |= ret;
13321                 break;
13322         case BTRFS_EXTENT_DATA_REF_KEY:
13323                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
13324                 ret = check_extent_data_backref(fs_info,
13325                                 btrfs_extent_data_ref_root(eb, dref),
13326                                 btrfs_extent_data_ref_objectid(eb, dref),
13327                                 btrfs_extent_data_ref_offset(eb, dref),
13328                                 key.objectid, 0,
13329                                 btrfs_extent_data_ref_count(eb, dref));
13330                 if (repair &&
13331                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13332                         ret = delete_extent_tree_item(trans, root, path);
13333                 err |= ret;
13334                 break;
13335         case BTRFS_SHARED_BLOCK_REF_KEY:
13336                 ret = check_shared_block_backref(fs_info, key.offset,
13337                                                  key.objectid, -1);
13338                 if (repair &&
13339                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13340                         ret = delete_extent_tree_item(trans, root, path);
13341                 err |= ret;
13342                 break;
13343         case BTRFS_SHARED_DATA_REF_KEY:
13344                 ret = check_shared_data_backref(fs_info, key.offset,
13345                                                 key.objectid);
13346                 if (repair &&
13347                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13348                         ret = delete_extent_tree_item(trans, root, path);
13349                 err |= ret;
13350                 break;
13351         default:
13352                 break;
13353         }
13354
13355         ++path->slots[0];
13356         goto again;
13357 out:
13358         return err;
13359 }
13360
13361 /*
13362  * Low memory usage version check_chunks_and_extents.
13363  */
13364 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
13365 {
13366         struct btrfs_trans_handle *trans = NULL;
13367         struct btrfs_path path;
13368         struct btrfs_key old_key;
13369         struct btrfs_key key;
13370         struct btrfs_root *root1;
13371         struct btrfs_root *root;
13372         struct btrfs_root *cur_root;
13373         int err = 0;
13374         int ret;
13375
13376         root = fs_info->fs_root;
13377
13378         if (repair) {
13379                 trans = btrfs_start_transaction(fs_info->extent_root, 1);
13380                 if (IS_ERR(trans)) {
13381                         error("failed to start transaction before check");
13382                         return PTR_ERR(trans);
13383                 }
13384         }
13385
13386         root1 = root->fs_info->chunk_root;
13387         ret = check_btrfs_root(trans, root1, 0, 1);
13388         err |= ret;
13389
13390         root1 = root->fs_info->tree_root;
13391         ret = check_btrfs_root(trans, root1, 0, 1);
13392         err |= ret;
13393
13394         btrfs_init_path(&path);
13395         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
13396         key.offset = 0;
13397         key.type = BTRFS_ROOT_ITEM_KEY;
13398
13399         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
13400         if (ret) {
13401                 error("cannot find extent tree in tree_root");
13402                 goto out;
13403         }
13404
13405         while (1) {
13406                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13407                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13408                         goto next;
13409                 old_key = key;
13410                 key.offset = (u64)-1;
13411
13412                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13413                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13414                                         &key);
13415                 else
13416                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
13417                 if (IS_ERR(cur_root) || !cur_root) {
13418                         error("failed to read tree: %lld", key.objectid);
13419                         goto next;
13420                 }
13421
13422                 ret = check_btrfs_root(trans, cur_root, 0, 1);
13423                 err |= ret;
13424
13425                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13426                         btrfs_free_fs_root(cur_root);
13427
13428                 btrfs_release_path(&path);
13429                 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13430                                         &old_key, &path, 0, 0);
13431                 if (ret)
13432                         goto out;
13433 next:
13434                 ret = btrfs_next_item(root1, &path);
13435                 if (ret)
13436                         goto out;
13437         }
13438 out:
13439
13440         /* if repair, update block accounting */
13441         if (repair) {
13442                 ret = btrfs_fix_block_accounting(trans, root);
13443                 if (ret)
13444                         err |= ret;
13445                 else
13446                         err &= ~BG_ACCOUNTING_ERROR;
13447         }
13448
13449         if (trans)
13450                 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13451
13452         btrfs_release_path(&path);
13453
13454         return err;
13455 }
13456
13457 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13458 {
13459         int ret;
13460
13461         if (!ctx.progress_enabled)
13462                 fprintf(stderr, "checking extents\n");
13463         if (check_mode == CHECK_MODE_LOWMEM)
13464                 ret = check_chunks_and_extents_v2(fs_info);
13465         else
13466                 ret = check_chunks_and_extents(fs_info);
13467
13468         /* Also repair device size related problems */
13469         if (repair && !ret) {
13470                 ret = btrfs_fix_device_and_super_size(fs_info);
13471                 if (ret > 0)
13472                         ret = 0;
13473         }
13474         return ret;
13475 }
13476
13477 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13478                            struct btrfs_root *root, int overwrite)
13479 {
13480         struct extent_buffer *c;
13481         struct extent_buffer *old = root->node;
13482         int level;
13483         int ret;
13484         struct btrfs_disk_key disk_key = {0,0,0};
13485
13486         level = 0;
13487
13488         if (overwrite) {
13489                 c = old;
13490                 extent_buffer_get(c);
13491                 goto init;
13492         }
13493         c = btrfs_alloc_free_block(trans, root,
13494                                    root->fs_info->nodesize,
13495                                    root->root_key.objectid,
13496                                    &disk_key, level, 0, 0);
13497         if (IS_ERR(c)) {
13498                 c = old;
13499                 extent_buffer_get(c);
13500                 overwrite = 1;
13501         }
13502 init:
13503         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13504         btrfs_set_header_level(c, level);
13505         btrfs_set_header_bytenr(c, c->start);
13506         btrfs_set_header_generation(c, trans->transid);
13507         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13508         btrfs_set_header_owner(c, root->root_key.objectid);
13509
13510         write_extent_buffer(c, root->fs_info->fsid,
13511                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
13512
13513         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13514                             btrfs_header_chunk_tree_uuid(c),
13515                             BTRFS_UUID_SIZE);
13516
13517         btrfs_mark_buffer_dirty(c);
13518         /*
13519          * this case can happen in the following case:
13520          *
13521          * 1.overwrite previous root.
13522          *
13523          * 2.reinit reloc data root, this is because we skip pin
13524          * down reloc data tree before which means we can allocate
13525          * same block bytenr here.
13526          */
13527         if (old->start == c->start) {
13528                 btrfs_set_root_generation(&root->root_item,
13529                                           trans->transid);
13530                 root->root_item.level = btrfs_header_level(root->node);
13531                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13532                                         &root->root_key, &root->root_item);
13533                 if (ret) {
13534                         free_extent_buffer(c);
13535                         return ret;
13536                 }
13537         }
13538         free_extent_buffer(old);
13539         root->node = c;
13540         add_root_to_dirty_list(root);
13541         return 0;
13542 }
13543
13544 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13545                                 struct extent_buffer *eb, int tree_root)
13546 {
13547         struct extent_buffer *tmp;
13548         struct btrfs_root_item *ri;
13549         struct btrfs_key key;
13550         u64 bytenr;
13551         int level = btrfs_header_level(eb);
13552         int nritems;
13553         int ret;
13554         int i;
13555
13556         /*
13557          * If we have pinned this block before, don't pin it again.
13558          * This can not only avoid forever loop with broken filesystem
13559          * but also give us some speedups.
13560          */
13561         if (test_range_bit(&fs_info->pinned_extents, eb->start,
13562                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13563                 return 0;
13564
13565         btrfs_pin_extent(fs_info, eb->start, eb->len);
13566
13567         nritems = btrfs_header_nritems(eb);
13568         for (i = 0; i < nritems; i++) {
13569                 if (level == 0) {
13570                         btrfs_item_key_to_cpu(eb, &key, i);
13571                         if (key.type != BTRFS_ROOT_ITEM_KEY)
13572                                 continue;
13573                         /* Skip the extent root and reloc roots */
13574                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13575                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13576                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13577                                 continue;
13578                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13579                         bytenr = btrfs_disk_root_bytenr(eb, ri);
13580
13581                         /*
13582                          * If at any point we start needing the real root we
13583                          * will have to build a stump root for the root we are
13584                          * in, but for now this doesn't actually use the root so
13585                          * just pass in extent_root.
13586                          */
13587                         tmp = read_tree_block(fs_info, bytenr, 0);
13588                         if (!extent_buffer_uptodate(tmp)) {
13589                                 fprintf(stderr, "Error reading root block\n");
13590                                 return -EIO;
13591                         }
13592                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
13593                         free_extent_buffer(tmp);
13594                         if (ret)
13595                                 return ret;
13596                 } else {
13597                         bytenr = btrfs_node_blockptr(eb, i);
13598
13599                         /* If we aren't the tree root don't read the block */
13600                         if (level == 1 && !tree_root) {
13601                                 btrfs_pin_extent(fs_info, bytenr,
13602                                                 fs_info->nodesize);
13603                                 continue;
13604                         }
13605
13606                         tmp = read_tree_block(fs_info, bytenr, 0);
13607                         if (!extent_buffer_uptodate(tmp)) {
13608                                 fprintf(stderr, "Error reading tree block\n");
13609                                 return -EIO;
13610                         }
13611                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13612                         free_extent_buffer(tmp);
13613                         if (ret)
13614                                 return ret;
13615                 }
13616         }
13617
13618         return 0;
13619 }
13620
13621 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13622 {
13623         int ret;
13624
13625         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13626         if (ret)
13627                 return ret;
13628
13629         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13630 }
13631
13632 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13633 {
13634         struct btrfs_block_group_cache *cache;
13635         struct btrfs_path path;
13636         struct extent_buffer *leaf;
13637         struct btrfs_chunk *chunk;
13638         struct btrfs_key key;
13639         int ret;
13640         u64 start;
13641
13642         btrfs_init_path(&path);
13643         key.objectid = 0;
13644         key.type = BTRFS_CHUNK_ITEM_KEY;
13645         key.offset = 0;
13646         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13647         if (ret < 0) {
13648                 btrfs_release_path(&path);
13649                 return ret;
13650         }
13651
13652         /*
13653          * We do this in case the block groups were screwed up and had alloc
13654          * bits that aren't actually set on the chunks.  This happens with
13655          * restored images every time and could happen in real life I guess.
13656          */
13657         fs_info->avail_data_alloc_bits = 0;
13658         fs_info->avail_metadata_alloc_bits = 0;
13659         fs_info->avail_system_alloc_bits = 0;
13660
13661         /* First we need to create the in-memory block groups */
13662         while (1) {
13663                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13664                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13665                         if (ret < 0) {
13666                                 btrfs_release_path(&path);
13667                                 return ret;
13668                         }
13669                         if (ret) {
13670                                 ret = 0;
13671                                 break;
13672                         }
13673                 }
13674                 leaf = path.nodes[0];
13675                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13676                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13677                         path.slots[0]++;
13678                         continue;
13679                 }
13680
13681                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13682                 btrfs_add_block_group(fs_info, 0,
13683                                       btrfs_chunk_type(leaf, chunk), key.offset,
13684                                       btrfs_chunk_length(leaf, chunk));
13685                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13686                                  key.offset + btrfs_chunk_length(leaf, chunk));
13687                 path.slots[0]++;
13688         }
13689         start = 0;
13690         while (1) {
13691                 cache = btrfs_lookup_first_block_group(fs_info, start);
13692                 if (!cache)
13693                         break;
13694                 cache->cached = 1;
13695                 start = cache->key.objectid + cache->key.offset;
13696         }
13697
13698         btrfs_release_path(&path);
13699         return 0;
13700 }
13701
13702 static int reset_balance(struct btrfs_trans_handle *trans,
13703                          struct btrfs_fs_info *fs_info)
13704 {
13705         struct btrfs_root *root = fs_info->tree_root;
13706         struct btrfs_path path;
13707         struct extent_buffer *leaf;
13708         struct btrfs_key key;
13709         int del_slot, del_nr = 0;
13710         int ret;
13711         int found = 0;
13712
13713         btrfs_init_path(&path);
13714         key.objectid = BTRFS_BALANCE_OBJECTID;
13715         key.type = BTRFS_BALANCE_ITEM_KEY;
13716         key.offset = 0;
13717         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13718         if (ret) {
13719                 if (ret > 0)
13720                         ret = 0;
13721                 if (!ret)
13722                         goto reinit_data_reloc;
13723                 else
13724                         goto out;
13725         }
13726
13727         ret = btrfs_del_item(trans, root, &path);
13728         if (ret)
13729                 goto out;
13730         btrfs_release_path(&path);
13731
13732         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13733         key.type = BTRFS_ROOT_ITEM_KEY;
13734         key.offset = 0;
13735         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13736         if (ret < 0)
13737                 goto out;
13738         while (1) {
13739                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13740                         if (!found)
13741                                 break;
13742
13743                         if (del_nr) {
13744                                 ret = btrfs_del_items(trans, root, &path,
13745                                                       del_slot, del_nr);
13746                                 del_nr = 0;
13747                                 if (ret)
13748                                         goto out;
13749                         }
13750                         key.offset++;
13751                         btrfs_release_path(&path);
13752
13753                         found = 0;
13754                         ret = btrfs_search_slot(trans, root, &key, &path,
13755                                                 -1, 1);
13756                         if (ret < 0)
13757                                 goto out;
13758                         continue;
13759                 }
13760                 found = 1;
13761                 leaf = path.nodes[0];
13762                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13763                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13764                         break;
13765                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13766                         path.slots[0]++;
13767                         continue;
13768                 }
13769                 if (!del_nr) {
13770                         del_slot = path.slots[0];
13771                         del_nr = 1;
13772                 } else {
13773                         del_nr++;
13774                 }
13775                 path.slots[0]++;
13776         }
13777
13778         if (del_nr) {
13779                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13780                 if (ret)
13781                         goto out;
13782         }
13783         btrfs_release_path(&path);
13784
13785 reinit_data_reloc:
13786         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13787         key.type = BTRFS_ROOT_ITEM_KEY;
13788         key.offset = (u64)-1;
13789         root = btrfs_read_fs_root(fs_info, &key);
13790         if (IS_ERR(root)) {
13791                 fprintf(stderr, "Error reading data reloc tree\n");
13792                 ret = PTR_ERR(root);
13793                 goto out;
13794         }
13795         record_root_in_trans(trans, root);
13796         ret = btrfs_fsck_reinit_root(trans, root, 0);
13797         if (ret)
13798                 goto out;
13799         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13800 out:
13801         btrfs_release_path(&path);
13802         return ret;
13803 }
13804
13805 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13806                               struct btrfs_fs_info *fs_info)
13807 {
13808         u64 start = 0;
13809         int ret;
13810
13811         /*
13812          * The only reason we don't do this is because right now we're just
13813          * walking the trees we find and pinning down their bytes, we don't look
13814          * at any of the leaves.  In order to do mixed groups we'd have to check
13815          * the leaves of any fs roots and pin down the bytes for any file
13816          * extents we find.  Not hard but why do it if we don't have to?
13817          */
13818         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13819                 fprintf(stderr, "We don't support re-initing the extent tree "
13820                         "for mixed block groups yet, please notify a btrfs "
13821                         "developer you want to do this so they can add this "
13822                         "functionality.\n");
13823                 return -EINVAL;
13824         }
13825
13826         /*
13827          * first we need to walk all of the trees except the extent tree and pin
13828          * down the bytes that are in use so we don't overwrite any existing
13829          * metadata.
13830          */
13831         ret = pin_metadata_blocks(fs_info);
13832         if (ret) {
13833                 fprintf(stderr, "error pinning down used bytes\n");
13834                 return ret;
13835         }
13836
13837         /*
13838          * Need to drop all the block groups since we're going to recreate all
13839          * of them again.
13840          */
13841         btrfs_free_block_groups(fs_info);
13842         ret = reset_block_groups(fs_info);
13843         if (ret) {
13844                 fprintf(stderr, "error resetting the block groups\n");
13845                 return ret;
13846         }
13847
13848         /* Ok we can allocate now, reinit the extent root */
13849         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13850         if (ret) {
13851                 fprintf(stderr, "extent root initialization failed\n");
13852                 /*
13853                  * When the transaction code is updated we should end the
13854                  * transaction, but for now progs only knows about commit so
13855                  * just return an error.
13856                  */
13857                 return ret;
13858         }
13859
13860         /*
13861          * Now we have all the in-memory block groups setup so we can make
13862          * allocations properly, and the metadata we care about is safe since we
13863          * pinned all of it above.
13864          */
13865         while (1) {
13866                 struct btrfs_block_group_cache *cache;
13867
13868                 cache = btrfs_lookup_first_block_group(fs_info, start);
13869                 if (!cache)
13870                         break;
13871                 start = cache->key.objectid + cache->key.offset;
13872                 ret = btrfs_insert_item(trans, fs_info->extent_root,
13873                                         &cache->key, &cache->item,
13874                                         sizeof(cache->item));
13875                 if (ret) {
13876                         fprintf(stderr, "Error adding block group\n");
13877                         return ret;
13878                 }
13879                 btrfs_extent_post_op(trans, fs_info->extent_root);
13880         }
13881
13882         ret = reset_balance(trans, fs_info);
13883         if (ret)
13884                 fprintf(stderr, "error resetting the pending balance\n");
13885
13886         return ret;
13887 }
13888
13889 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13890 {
13891         struct btrfs_path path;
13892         struct btrfs_trans_handle *trans;
13893         struct btrfs_key key;
13894         int ret;
13895
13896         printf("Recowing metadata block %llu\n", eb->start);
13897         key.objectid = btrfs_header_owner(eb);
13898         key.type = BTRFS_ROOT_ITEM_KEY;
13899         key.offset = (u64)-1;
13900
13901         root = btrfs_read_fs_root(root->fs_info, &key);
13902         if (IS_ERR(root)) {
13903                 fprintf(stderr, "Couldn't find owner root %llu\n",
13904                         key.objectid);
13905                 return PTR_ERR(root);
13906         }
13907
13908         trans = btrfs_start_transaction(root, 1);
13909         if (IS_ERR(trans))
13910                 return PTR_ERR(trans);
13911
13912         btrfs_init_path(&path);
13913         path.lowest_level = btrfs_header_level(eb);
13914         if (path.lowest_level)
13915                 btrfs_node_key_to_cpu(eb, &key, 0);
13916         else
13917                 btrfs_item_key_to_cpu(eb, &key, 0);
13918
13919         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13920         btrfs_commit_transaction(trans, root);
13921         btrfs_release_path(&path);
13922         return ret;
13923 }
13924
13925 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13926 {
13927         struct btrfs_path path;
13928         struct btrfs_trans_handle *trans;
13929         struct btrfs_key key;
13930         int ret;
13931
13932         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13933                bad->key.type, bad->key.offset);
13934         key.objectid = bad->root_id;
13935         key.type = BTRFS_ROOT_ITEM_KEY;
13936         key.offset = (u64)-1;
13937
13938         root = btrfs_read_fs_root(root->fs_info, &key);
13939         if (IS_ERR(root)) {
13940                 fprintf(stderr, "Couldn't find owner root %llu\n",
13941                         key.objectid);
13942                 return PTR_ERR(root);
13943         }
13944
13945         trans = btrfs_start_transaction(root, 1);
13946         if (IS_ERR(trans))
13947                 return PTR_ERR(trans);
13948
13949         btrfs_init_path(&path);
13950         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13951         if (ret) {
13952                 if (ret > 0)
13953                         ret = 0;
13954                 goto out;
13955         }
13956         ret = btrfs_del_item(trans, root, &path);
13957 out:
13958         btrfs_commit_transaction(trans, root);
13959         btrfs_release_path(&path);
13960         return ret;
13961 }
13962
13963 static int zero_log_tree(struct btrfs_root *root)
13964 {
13965         struct btrfs_trans_handle *trans;
13966         int ret;
13967
13968         trans = btrfs_start_transaction(root, 1);
13969         if (IS_ERR(trans)) {
13970                 ret = PTR_ERR(trans);
13971                 return ret;
13972         }
13973         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13974         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13975         ret = btrfs_commit_transaction(trans, root);
13976         return ret;
13977 }
13978
13979 static int populate_csum(struct btrfs_trans_handle *trans,
13980                          struct btrfs_root *csum_root, char *buf, u64 start,
13981                          u64 len)
13982 {
13983         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13984         u64 offset = 0;
13985         u64 sectorsize;
13986         int ret = 0;
13987
13988         while (offset < len) {
13989                 sectorsize = fs_info->sectorsize;
13990                 ret = read_extent_data(fs_info, buf, start + offset,
13991                                        &sectorsize, 0);
13992                 if (ret)
13993                         break;
13994                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13995                                             start + offset, buf, sectorsize);
13996                 if (ret)
13997                         break;
13998                 offset += sectorsize;
13999         }
14000         return ret;
14001 }
14002
14003 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
14004                                       struct btrfs_root *csum_root,
14005                                       struct btrfs_root *cur_root)
14006 {
14007         struct btrfs_path path;
14008         struct btrfs_key key;
14009         struct extent_buffer *node;
14010         struct btrfs_file_extent_item *fi;
14011         char *buf = NULL;
14012         u64 start = 0;
14013         u64 len = 0;
14014         int slot = 0;
14015         int ret = 0;
14016
14017         buf = malloc(cur_root->fs_info->sectorsize);
14018         if (!buf)
14019                 return -ENOMEM;
14020
14021         btrfs_init_path(&path);
14022         key.objectid = 0;
14023         key.offset = 0;
14024         key.type = 0;
14025         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
14026         if (ret < 0)
14027                 goto out;
14028         /* Iterate all regular file extents and fill its csum */
14029         while (1) {
14030                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
14031
14032                 if (key.type != BTRFS_EXTENT_DATA_KEY)
14033                         goto next;
14034                 node = path.nodes[0];
14035                 slot = path.slots[0];
14036                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
14037                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
14038                         goto next;
14039                 start = btrfs_file_extent_disk_bytenr(node, fi);
14040                 len = btrfs_file_extent_disk_num_bytes(node, fi);
14041
14042                 ret = populate_csum(trans, csum_root, buf, start, len);
14043                 if (ret == -EEXIST)
14044                         ret = 0;
14045                 if (ret < 0)
14046                         goto out;
14047 next:
14048                 /*
14049                  * TODO: if next leaf is corrupted, jump to nearest next valid
14050                  * leaf.
14051                  */
14052                 ret = btrfs_next_item(cur_root, &path);
14053                 if (ret < 0)
14054                         goto out;
14055                 if (ret > 0) {
14056                         ret = 0;
14057                         goto out;
14058                 }
14059         }
14060
14061 out:
14062         btrfs_release_path(&path);
14063         free(buf);
14064         return ret;
14065 }
14066
14067 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
14068                                   struct btrfs_root *csum_root)
14069 {
14070         struct btrfs_fs_info *fs_info = csum_root->fs_info;
14071         struct btrfs_path path;
14072         struct btrfs_root *tree_root = fs_info->tree_root;
14073         struct btrfs_root *cur_root;
14074         struct extent_buffer *node;
14075         struct btrfs_key key;
14076         int slot = 0;
14077         int ret = 0;
14078
14079         btrfs_init_path(&path);
14080         key.objectid = BTRFS_FS_TREE_OBJECTID;
14081         key.offset = 0;
14082         key.type = BTRFS_ROOT_ITEM_KEY;
14083         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
14084         if (ret < 0)
14085                 goto out;
14086         if (ret > 0) {
14087                 ret = -ENOENT;
14088                 goto out;
14089         }
14090
14091         while (1) {
14092                 node = path.nodes[0];
14093                 slot = path.slots[0];
14094                 btrfs_item_key_to_cpu(node, &key, slot);
14095                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
14096                         goto out;
14097                 if (key.type != BTRFS_ROOT_ITEM_KEY)
14098                         goto next;
14099                 if (!is_fstree(key.objectid))
14100                         goto next;
14101                 key.offset = (u64)-1;
14102
14103                 cur_root = btrfs_read_fs_root(fs_info, &key);
14104                 if (IS_ERR(cur_root) || !cur_root) {
14105                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
14106                                 key.objectid);
14107                         goto out;
14108                 }
14109                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
14110                                 cur_root);
14111                 if (ret < 0)
14112                         goto out;
14113 next:
14114                 ret = btrfs_next_item(tree_root, &path);
14115                 if (ret > 0) {
14116                         ret = 0;
14117                         goto out;
14118                 }
14119                 if (ret < 0)
14120                         goto out;
14121         }
14122
14123 out:
14124         btrfs_release_path(&path);
14125         return ret;
14126 }
14127
14128 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
14129                                       struct btrfs_root *csum_root)
14130 {
14131         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
14132         struct btrfs_path path;
14133         struct btrfs_extent_item *ei;
14134         struct extent_buffer *leaf;
14135         char *buf;
14136         struct btrfs_key key;
14137         int ret;
14138
14139         btrfs_init_path(&path);
14140         key.objectid = 0;
14141         key.type = BTRFS_EXTENT_ITEM_KEY;
14142         key.offset = 0;
14143         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
14144         if (ret < 0) {
14145                 btrfs_release_path(&path);
14146                 return ret;
14147         }
14148
14149         buf = malloc(csum_root->fs_info->sectorsize);
14150         if (!buf) {
14151                 btrfs_release_path(&path);
14152                 return -ENOMEM;
14153         }
14154
14155         while (1) {
14156                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
14157                         ret = btrfs_next_leaf(extent_root, &path);
14158                         if (ret < 0)
14159                                 break;
14160                         if (ret) {
14161                                 ret = 0;
14162                                 break;
14163                         }
14164                 }
14165                 leaf = path.nodes[0];
14166
14167                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
14168                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
14169                         path.slots[0]++;
14170                         continue;
14171                 }
14172
14173                 ei = btrfs_item_ptr(leaf, path.slots[0],
14174                                     struct btrfs_extent_item);
14175                 if (!(btrfs_extent_flags(leaf, ei) &
14176                       BTRFS_EXTENT_FLAG_DATA)) {
14177                         path.slots[0]++;
14178                         continue;
14179                 }
14180
14181                 ret = populate_csum(trans, csum_root, buf, key.objectid,
14182                                     key.offset);
14183                 if (ret)
14184                         break;
14185                 path.slots[0]++;
14186         }
14187
14188         btrfs_release_path(&path);
14189         free(buf);
14190         return ret;
14191 }
14192
14193 /*
14194  * Recalculate the csum and put it into the csum tree.
14195  *
14196  * Extent tree init will wipe out all the extent info, so in that case, we
14197  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
14198  * will use fs/subvol trees to init the csum tree.
14199  */
14200 static int fill_csum_tree(struct btrfs_trans_handle *trans,
14201                           struct btrfs_root *csum_root,
14202                           int search_fs_tree)
14203 {
14204         if (search_fs_tree)
14205                 return fill_csum_tree_from_fs(trans, csum_root);
14206         else
14207                 return fill_csum_tree_from_extent(trans, csum_root);
14208 }
14209
14210 static void free_roots_info_cache(void)
14211 {
14212         if (!roots_info_cache)
14213                 return;
14214
14215         while (!cache_tree_empty(roots_info_cache)) {
14216                 struct cache_extent *entry;
14217                 struct root_item_info *rii;
14218
14219                 entry = first_cache_extent(roots_info_cache);
14220                 if (!entry)
14221                         break;
14222                 remove_cache_extent(roots_info_cache, entry);
14223                 rii = container_of(entry, struct root_item_info, cache_extent);
14224                 free(rii);
14225         }
14226
14227         free(roots_info_cache);
14228         roots_info_cache = NULL;
14229 }
14230
14231 static int build_roots_info_cache(struct btrfs_fs_info *info)
14232 {
14233         int ret = 0;
14234         struct btrfs_key key;
14235         struct extent_buffer *leaf;
14236         struct btrfs_path path;
14237
14238         if (!roots_info_cache) {
14239                 roots_info_cache = malloc(sizeof(*roots_info_cache));
14240                 if (!roots_info_cache)
14241                         return -ENOMEM;
14242                 cache_tree_init(roots_info_cache);
14243         }
14244
14245         btrfs_init_path(&path);
14246         key.objectid = 0;
14247         key.type = BTRFS_EXTENT_ITEM_KEY;
14248         key.offset = 0;
14249         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
14250         if (ret < 0)
14251                 goto out;
14252         leaf = path.nodes[0];
14253
14254         while (1) {
14255                 struct btrfs_key found_key;
14256                 struct btrfs_extent_item *ei;
14257                 struct btrfs_extent_inline_ref *iref;
14258                 int slot = path.slots[0];
14259                 int type;
14260                 u64 flags;
14261                 u64 root_id;
14262                 u8 level;
14263                 struct cache_extent *entry;
14264                 struct root_item_info *rii;
14265
14266                 if (slot >= btrfs_header_nritems(leaf)) {
14267                         ret = btrfs_next_leaf(info->extent_root, &path);
14268                         if (ret < 0) {
14269                                 break;
14270                         } else if (ret) {
14271                                 ret = 0;
14272                                 break;
14273                         }
14274                         leaf = path.nodes[0];
14275                         slot = path.slots[0];
14276                 }
14277
14278                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14279
14280                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
14281                     found_key.type != BTRFS_METADATA_ITEM_KEY)
14282                         goto next;
14283
14284                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
14285                 flags = btrfs_extent_flags(leaf, ei);
14286
14287                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
14288                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
14289                         goto next;
14290
14291                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
14292                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
14293                         level = found_key.offset;
14294                 } else {
14295                         struct btrfs_tree_block_info *binfo;
14296
14297                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
14298                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
14299                         level = btrfs_tree_block_level(leaf, binfo);
14300                 }
14301
14302                 /*
14303                  * For a root extent, it must be of the following type and the
14304                  * first (and only one) iref in the item.
14305                  */
14306                 type = btrfs_extent_inline_ref_type(leaf, iref);
14307                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
14308                         goto next;
14309
14310                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
14311                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14312                 if (!entry) {
14313                         rii = malloc(sizeof(struct root_item_info));
14314                         if (!rii) {
14315                                 ret = -ENOMEM;
14316                                 goto out;
14317                         }
14318                         rii->cache_extent.start = root_id;
14319                         rii->cache_extent.size = 1;
14320                         rii->level = (u8)-1;
14321                         entry = &rii->cache_extent;
14322                         ret = insert_cache_extent(roots_info_cache, entry);
14323                         ASSERT(ret == 0);
14324                 } else {
14325                         rii = container_of(entry, struct root_item_info,
14326                                            cache_extent);
14327                 }
14328
14329                 ASSERT(rii->cache_extent.start == root_id);
14330                 ASSERT(rii->cache_extent.size == 1);
14331
14332                 if (level > rii->level || rii->level == (u8)-1) {
14333                         rii->level = level;
14334                         rii->bytenr = found_key.objectid;
14335                         rii->gen = btrfs_extent_generation(leaf, ei);
14336                         rii->node_count = 1;
14337                 } else if (level == rii->level) {
14338                         rii->node_count++;
14339                 }
14340 next:
14341                 path.slots[0]++;
14342         }
14343
14344 out:
14345         btrfs_release_path(&path);
14346
14347         return ret;
14348 }
14349
14350 static int maybe_repair_root_item(struct btrfs_path *path,
14351                                   const struct btrfs_key *root_key,
14352                                   const int read_only_mode)
14353 {
14354         const u64 root_id = root_key->objectid;
14355         struct cache_extent *entry;
14356         struct root_item_info *rii;
14357         struct btrfs_root_item ri;
14358         unsigned long offset;
14359
14360         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14361         if (!entry) {
14362                 fprintf(stderr,
14363                         "Error: could not find extent items for root %llu\n",
14364                         root_key->objectid);
14365                 return -ENOENT;
14366         }
14367
14368         rii = container_of(entry, struct root_item_info, cache_extent);
14369         ASSERT(rii->cache_extent.start == root_id);
14370         ASSERT(rii->cache_extent.size == 1);
14371
14372         if (rii->node_count != 1) {
14373                 fprintf(stderr,
14374                         "Error: could not find btree root extent for root %llu\n",
14375                         root_id);
14376                 return -ENOENT;
14377         }
14378
14379         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
14380         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
14381
14382         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
14383             btrfs_root_level(&ri) != rii->level ||
14384             btrfs_root_generation(&ri) != rii->gen) {
14385
14386                 /*
14387                  * If we're in repair mode but our caller told us to not update
14388                  * the root item, i.e. just check if it needs to be updated, don't
14389                  * print this message, since the caller will call us again shortly
14390                  * for the same root item without read only mode (the caller will
14391                  * open a transaction first).
14392                  */
14393                 if (!(read_only_mode && repair))
14394                         fprintf(stderr,
14395                                 "%sroot item for root %llu,"
14396                                 " current bytenr %llu, current gen %llu, current level %u,"
14397                                 " new bytenr %llu, new gen %llu, new level %u\n",
14398                                 (read_only_mode ? "" : "fixing "),
14399                                 root_id,
14400                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
14401                                 btrfs_root_level(&ri),
14402                                 rii->bytenr, rii->gen, rii->level);
14403
14404                 if (btrfs_root_generation(&ri) > rii->gen) {
14405                         fprintf(stderr,
14406                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
14407                                 root_id, btrfs_root_generation(&ri), rii->gen);
14408                         return -EINVAL;
14409                 }
14410
14411                 if (!read_only_mode) {
14412                         btrfs_set_root_bytenr(&ri, rii->bytenr);
14413                         btrfs_set_root_level(&ri, rii->level);
14414                         btrfs_set_root_generation(&ri, rii->gen);
14415                         write_extent_buffer(path->nodes[0], &ri,
14416                                             offset, sizeof(ri));
14417                 }
14418
14419                 return 1;
14420         }
14421
14422         return 0;
14423 }
14424
14425 /*
14426  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14427  * caused read-only snapshots to be corrupted if they were created at a moment
14428  * when the source subvolume/snapshot had orphan items. The issue was that the
14429  * on-disk root items became incorrect, referring to the pre orphan cleanup root
14430  * node instead of the post orphan cleanup root node.
14431  * So this function, and its callees, just detects and fixes those cases. Even
14432  * though the regression was for read-only snapshots, this function applies to
14433  * any snapshot/subvolume root.
14434  * This must be run before any other repair code - not doing it so, makes other
14435  * repair code delete or modify backrefs in the extent tree for example, which
14436  * will result in an inconsistent fs after repairing the root items.
14437  */
14438 static int repair_root_items(struct btrfs_fs_info *info)
14439 {
14440         struct btrfs_path path;
14441         struct btrfs_key key;
14442         struct extent_buffer *leaf;
14443         struct btrfs_trans_handle *trans = NULL;
14444         int ret = 0;
14445         int bad_roots = 0;
14446         int need_trans = 0;
14447
14448         btrfs_init_path(&path);
14449
14450         ret = build_roots_info_cache(info);
14451         if (ret)
14452                 goto out;
14453
14454         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14455         key.type = BTRFS_ROOT_ITEM_KEY;
14456         key.offset = 0;
14457
14458 again:
14459         /*
14460          * Avoid opening and committing transactions if a leaf doesn't have
14461          * any root items that need to be fixed, so that we avoid rotating
14462          * backup roots unnecessarily.
14463          */
14464         if (need_trans) {
14465                 trans = btrfs_start_transaction(info->tree_root, 1);
14466                 if (IS_ERR(trans)) {
14467                         ret = PTR_ERR(trans);
14468                         goto out;
14469                 }
14470         }
14471
14472         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14473                                 0, trans ? 1 : 0);
14474         if (ret < 0)
14475                 goto out;
14476         leaf = path.nodes[0];
14477
14478         while (1) {
14479                 struct btrfs_key found_key;
14480
14481                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14482                         int no_more_keys = find_next_key(&path, &key);
14483
14484                         btrfs_release_path(&path);
14485                         if (trans) {
14486                                 ret = btrfs_commit_transaction(trans,
14487                                                                info->tree_root);
14488                                 trans = NULL;
14489                                 if (ret < 0)
14490                                         goto out;
14491                         }
14492                         need_trans = 0;
14493                         if (no_more_keys)
14494                                 break;
14495                         goto again;
14496                 }
14497
14498                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14499
14500                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14501                         goto next;
14502                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14503                         goto next;
14504
14505                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14506                 if (ret < 0)
14507                         goto out;
14508                 if (ret) {
14509                         if (!trans && repair) {
14510                                 need_trans = 1;
14511                                 key = found_key;
14512                                 btrfs_release_path(&path);
14513                                 goto again;
14514                         }
14515                         bad_roots++;
14516                 }
14517 next:
14518                 path.slots[0]++;
14519         }
14520         ret = 0;
14521 out:
14522         free_roots_info_cache();
14523         btrfs_release_path(&path);
14524         if (trans)
14525                 btrfs_commit_transaction(trans, info->tree_root);
14526         if (ret < 0)
14527                 return ret;
14528
14529         return bad_roots;
14530 }
14531
14532 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14533 {
14534         struct btrfs_trans_handle *trans;
14535         struct btrfs_block_group_cache *bg_cache;
14536         u64 current = 0;
14537         int ret = 0;
14538
14539         /* Clear all free space cache inodes and its extent data */
14540         while (1) {
14541                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14542                 if (!bg_cache)
14543                         break;
14544                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14545                 if (ret < 0)
14546                         return ret;
14547                 current = bg_cache->key.objectid + bg_cache->key.offset;
14548         }
14549
14550         /* Don't forget to set cache_generation to -1 */
14551         trans = btrfs_start_transaction(fs_info->tree_root, 0);
14552         if (IS_ERR(trans)) {
14553                 error("failed to update super block cache generation");
14554                 return PTR_ERR(trans);
14555         }
14556         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14557         btrfs_commit_transaction(trans, fs_info->tree_root);
14558
14559         return ret;
14560 }
14561
14562 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14563                 int clear_version)
14564 {
14565         int ret = 0;
14566
14567         if (clear_version == 1) {
14568                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14569                         error(
14570                 "free space cache v2 detected, use --clear-space-cache v2");
14571                         ret = 1;
14572                         goto close_out;
14573                 }
14574                 printf("Clearing free space cache\n");
14575                 ret = clear_free_space_cache(fs_info);
14576                 if (ret) {
14577                         error("failed to clear free space cache");
14578                         ret = 1;
14579                 } else {
14580                         printf("Free space cache cleared\n");
14581                 }
14582         } else if (clear_version == 2) {
14583                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14584                         printf("no free space cache v2 to clear\n");
14585                         ret = 0;
14586                         goto close_out;
14587                 }
14588                 printf("Clear free space cache v2\n");
14589                 ret = btrfs_clear_free_space_tree(fs_info);
14590                 if (ret) {
14591                         error("failed to clear free space cache v2: %d", ret);
14592                         ret = 1;
14593                 } else {
14594                         printf("free space cache v2 cleared\n");
14595                 }
14596         }
14597 close_out:
14598         return ret;
14599 }
14600
14601 const char * const cmd_check_usage[] = {
14602         "btrfs check [options] <device>",
14603         "Check structural integrity of a filesystem (unmounted).",
14604         "Check structural integrity of an unmounted filesystem. Verify internal",
14605         "trees' consistency and item connectivity. In the repair mode try to",
14606         "fix the problems found. ",
14607         "WARNING: the repair mode is considered dangerous",
14608         "",
14609         "-s|--super <superblock>     use this superblock copy",
14610         "-b|--backup                 use the first valid backup root copy",
14611         "--force                     skip mount checks, repair is not possible",
14612         "--repair                    try to repair the filesystem",
14613         "--readonly                  run in read-only mode (default)",
14614         "--init-csum-tree            create a new CRC tree",
14615         "--init-extent-tree          create a new extent tree",
14616         "--mode <MODE>               allows choice of memory/IO trade-offs",
14617         "                            where MODE is one of:",
14618         "                            original - read inodes and extents to memory (requires",
14619         "                                       more memory, does less IO)",
14620         "                            lowmem   - try to use less memory but read blocks again",
14621         "                                       when needed",
14622         "--check-data-csum           verify checksums of data blocks",
14623         "-Q|--qgroup-report          print a report on qgroup consistency",
14624         "-E|--subvol-extents <subvolid>",
14625         "                            print subvolume extents and sharing state",
14626         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
14627         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
14628         "-p|--progress               indicate progress",
14629         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
14630         NULL
14631 };
14632
14633 int cmd_check(int argc, char **argv)
14634 {
14635         struct cache_tree root_cache;
14636         struct btrfs_root *root;
14637         struct btrfs_fs_info *info;
14638         u64 bytenr = 0;
14639         u64 subvolid = 0;
14640         u64 tree_root_bytenr = 0;
14641         u64 chunk_root_bytenr = 0;
14642         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14643         int ret = 0;
14644         int err = 0;
14645         u64 num;
14646         int init_csum_tree = 0;
14647         int readonly = 0;
14648         int clear_space_cache = 0;
14649         int qgroup_report = 0;
14650         int qgroups_repaired = 0;
14651         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14652         int force = 0;
14653
14654         while(1) {
14655                 int c;
14656                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14657                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14658                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14659                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14660                         GETOPT_VAL_FORCE };
14661                 static const struct option long_options[] = {
14662                         { "super", required_argument, NULL, 's' },
14663                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14664                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14665                         { "init-csum-tree", no_argument, NULL,
14666                                 GETOPT_VAL_INIT_CSUM },
14667                         { "init-extent-tree", no_argument, NULL,
14668                                 GETOPT_VAL_INIT_EXTENT },
14669                         { "check-data-csum", no_argument, NULL,
14670                                 GETOPT_VAL_CHECK_CSUM },
14671                         { "backup", no_argument, NULL, 'b' },
14672                         { "subvol-extents", required_argument, NULL, 'E' },
14673                         { "qgroup-report", no_argument, NULL, 'Q' },
14674                         { "tree-root", required_argument, NULL, 'r' },
14675                         { "chunk-root", required_argument, NULL,
14676                                 GETOPT_VAL_CHUNK_TREE },
14677                         { "progress", no_argument, NULL, 'p' },
14678                         { "mode", required_argument, NULL,
14679                                 GETOPT_VAL_MODE },
14680                         { "clear-space-cache", required_argument, NULL,
14681                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
14682                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14683                         { NULL, 0, NULL, 0}
14684                 };
14685
14686                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14687                 if (c < 0)
14688                         break;
14689                 switch(c) {
14690                         case 'a': /* ignored */ break;
14691                         case 'b':
14692                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14693                                 break;
14694                         case 's':
14695                                 num = arg_strtou64(optarg);
14696                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14697                                         error(
14698                                         "super mirror should be less than %d",
14699                                                 BTRFS_SUPER_MIRROR_MAX);
14700                                         exit(1);
14701                                 }
14702                                 bytenr = btrfs_sb_offset(((int)num));
14703                                 printf("using SB copy %llu, bytenr %llu\n", num,
14704                                        (unsigned long long)bytenr);
14705                                 break;
14706                         case 'Q':
14707                                 qgroup_report = 1;
14708                                 break;
14709                         case 'E':
14710                                 subvolid = arg_strtou64(optarg);
14711                                 break;
14712                         case 'r':
14713                                 tree_root_bytenr = arg_strtou64(optarg);
14714                                 break;
14715                         case GETOPT_VAL_CHUNK_TREE:
14716                                 chunk_root_bytenr = arg_strtou64(optarg);
14717                                 break;
14718                         case 'p':
14719                                 ctx.progress_enabled = true;
14720                                 break;
14721                         case '?':
14722                         case 'h':
14723                                 usage(cmd_check_usage);
14724                         case GETOPT_VAL_REPAIR:
14725                                 printf("enabling repair mode\n");
14726                                 repair = 1;
14727                                 ctree_flags |= OPEN_CTREE_WRITES;
14728                                 break;
14729                         case GETOPT_VAL_READONLY:
14730                                 readonly = 1;
14731                                 break;
14732                         case GETOPT_VAL_INIT_CSUM:
14733                                 printf("Creating a new CRC tree\n");
14734                                 init_csum_tree = 1;
14735                                 repair = 1;
14736                                 ctree_flags |= OPEN_CTREE_WRITES;
14737                                 break;
14738                         case GETOPT_VAL_INIT_EXTENT:
14739                                 init_extent_tree = 1;
14740                                 ctree_flags |= (OPEN_CTREE_WRITES |
14741                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
14742                                 repair = 1;
14743                                 break;
14744                         case GETOPT_VAL_CHECK_CSUM:
14745                                 check_data_csum = 1;
14746                                 break;
14747                         case GETOPT_VAL_MODE:
14748                                 check_mode = parse_check_mode(optarg);
14749                                 if (check_mode == CHECK_MODE_UNKNOWN) {
14750                                         error("unknown mode: %s", optarg);
14751                                         exit(1);
14752                                 }
14753                                 break;
14754                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
14755                                 if (strcmp(optarg, "v1") == 0) {
14756                                         clear_space_cache = 1;
14757                                 } else if (strcmp(optarg, "v2") == 0) {
14758                                         clear_space_cache = 2;
14759                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14760                                 } else {
14761                                         error(
14762                 "invalid argument to --clear-space-cache, must be v1 or v2");
14763                                         exit(1);
14764                                 }
14765                                 ctree_flags |= OPEN_CTREE_WRITES;
14766                                 break;
14767                         case GETOPT_VAL_FORCE:
14768                                 force = 1;
14769                                 break;
14770                 }
14771         }
14772
14773         if (check_argc_exact(argc - optind, 1))
14774                 usage(cmd_check_usage);
14775
14776         if (ctx.progress_enabled) {
14777                 ctx.tp = TASK_NOTHING;
14778                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14779         }
14780
14781         /* This check is the only reason for --readonly to exist */
14782         if (readonly && repair) {
14783                 error("repair options are not compatible with --readonly");
14784                 exit(1);
14785         }
14786
14787         /*
14788          * experimental and dangerous
14789          */
14790         if (repair && check_mode == CHECK_MODE_LOWMEM)
14791                 warning("low-memory mode repair support is only partial");
14792
14793         radix_tree_init();
14794         cache_tree_init(&root_cache);
14795
14796         ret = check_mounted(argv[optind]);
14797         if (!force) {
14798                 if (ret < 0) {
14799                         error("could not check mount status: %s",
14800                                         strerror(-ret));
14801                         err |= !!ret;
14802                         goto err_out;
14803                 } else if (ret) {
14804                         error(
14805 "%s is currently mounted, use --force if you really intend to check the filesystem",
14806                                 argv[optind]);
14807                         ret = -EBUSY;
14808                         err |= !!ret;
14809                         goto err_out;
14810                 }
14811         } else {
14812                 if (repair) {
14813                         error("repair and --force is not yet supported");
14814                         ret = 1;
14815                         err |= !!ret;
14816                         goto err_out;
14817                 }
14818                 if (ret < 0) {
14819                         warning(
14820 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14821                                 argv[optind]);
14822                 } else if (ret) {
14823                         warning(
14824                         "filesystem mounted, continuing because of --force");
14825                 }
14826                 /* A block device is mounted in exclusive mode by kernel */
14827                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14828         }
14829
14830         /* only allow partial opening under repair mode */
14831         if (repair)
14832                 ctree_flags |= OPEN_CTREE_PARTIAL;
14833
14834         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14835                                   chunk_root_bytenr, ctree_flags);
14836         if (!info) {
14837                 error("cannot open file system");
14838                 ret = -EIO;
14839                 err |= !!ret;
14840                 goto err_out;
14841         }
14842
14843         global_info = info;
14844         root = info->fs_root;
14845         uuid_unparse(info->super_copy->fsid, uuidbuf);
14846
14847         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14848
14849         /*
14850          * Check the bare minimum before starting anything else that could rely
14851          * on it, namely the tree roots, any local consistency checks
14852          */
14853         if (!extent_buffer_uptodate(info->tree_root->node) ||
14854             !extent_buffer_uptodate(info->dev_root->node) ||
14855             !extent_buffer_uptodate(info->chunk_root->node)) {
14856                 error("critical roots corrupted, unable to check the filesystem");
14857                 err |= !!ret;
14858                 ret = -EIO;
14859                 goto close_out;
14860         }
14861
14862         if (clear_space_cache) {
14863                 ret = do_clear_free_space_cache(info, clear_space_cache);
14864                 err |= !!ret;
14865                 goto close_out;
14866         }
14867
14868         /*
14869          * repair mode will force us to commit transaction which
14870          * will make us fail to load log tree when mounting.
14871          */
14872         if (repair && btrfs_super_log_root(info->super_copy)) {
14873                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14874                 if (!ret) {
14875                         ret = 1;
14876                         err |= !!ret;
14877                         goto close_out;
14878                 }
14879                 ret = zero_log_tree(root);
14880                 err |= !!ret;
14881                 if (ret) {
14882                         error("failed to zero log tree: %d", ret);
14883                         goto close_out;
14884                 }
14885         }
14886
14887         if (qgroup_report) {
14888                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14889                        uuidbuf);
14890                 ret = qgroup_verify_all(info);
14891                 err |= !!ret;
14892                 if (ret == 0)
14893                         report_qgroups(1);
14894                 goto close_out;
14895         }
14896         if (subvolid) {
14897                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14898                        subvolid, argv[optind], uuidbuf);
14899                 ret = print_extent_state(info, subvolid);
14900                 err |= !!ret;
14901                 goto close_out;
14902         }
14903
14904         if (init_extent_tree || init_csum_tree) {
14905                 struct btrfs_trans_handle *trans;
14906
14907                 trans = btrfs_start_transaction(info->extent_root, 0);
14908                 if (IS_ERR(trans)) {
14909                         error("error starting transaction");
14910                         ret = PTR_ERR(trans);
14911                         err |= !!ret;
14912                         goto close_out;
14913                 }
14914
14915                 if (init_extent_tree) {
14916                         printf("Creating a new extent tree\n");
14917                         ret = reinit_extent_tree(trans, info);
14918                         err |= !!ret;
14919                         if (ret)
14920                                 goto close_out;
14921                 }
14922
14923                 if (init_csum_tree) {
14924                         printf("Reinitialize checksum tree\n");
14925                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14926                         if (ret) {
14927                                 error("checksum tree initialization failed: %d",
14928                                                 ret);
14929                                 ret = -EIO;
14930                                 err |= !!ret;
14931                                 goto close_out;
14932                         }
14933
14934                         ret = fill_csum_tree(trans, info->csum_root,
14935                                              init_extent_tree);
14936                         err |= !!ret;
14937                         if (ret) {
14938                                 error("checksum tree refilling failed: %d", ret);
14939                                 return -EIO;
14940                         }
14941                 }
14942                 /*
14943                  * Ok now we commit and run the normal fsck, which will add
14944                  * extent entries for all of the items it finds.
14945                  */
14946                 ret = btrfs_commit_transaction(trans, info->extent_root);
14947                 err |= !!ret;
14948                 if (ret)
14949                         goto close_out;
14950         }
14951         if (!extent_buffer_uptodate(info->extent_root->node)) {
14952                 error("critical: extent_root, unable to check the filesystem");
14953                 ret = -EIO;
14954                 err |= !!ret;
14955                 goto close_out;
14956         }
14957         if (!extent_buffer_uptodate(info->csum_root->node)) {
14958                 error("critical: csum_root, unable to check the filesystem");
14959                 ret = -EIO;
14960                 err |= !!ret;
14961                 goto close_out;
14962         }
14963
14964         if (!init_extent_tree) {
14965                 ret = repair_root_items(info);
14966                 if (ret < 0) {
14967                         err = !!ret;
14968                         error("failed to repair root items: %s", strerror(-ret));
14969                         goto close_out;
14970                 }
14971                 if (repair) {
14972                         fprintf(stderr, "Fixed %d roots.\n", ret);
14973                         ret = 0;
14974                 } else if (ret > 0) {
14975                         fprintf(stderr,
14976                                 "Found %d roots with an outdated root item.\n",
14977                                 ret);
14978                         fprintf(stderr,
14979         "Please run a filesystem check with the option --repair to fix them.\n");
14980                         ret = 1;
14981                         err |= ret;
14982                         goto close_out;
14983                 }
14984         }
14985
14986         ret = do_check_chunks_and_extents(info);
14987         err |= !!ret;
14988         if (ret)
14989                 error(
14990                 "errors found in extent allocation tree or chunk allocation");
14991
14992         /* Only re-check super size after we checked and repaired the fs */
14993         err |= !is_super_size_valid(info);
14994
14995         if (!ctx.progress_enabled) {
14996                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14997                         fprintf(stderr, "checking free space tree\n");
14998                 else
14999                         fprintf(stderr, "checking free space cache\n");
15000         }
15001         ret = check_space_cache(root);
15002         err |= !!ret;
15003         if (ret) {
15004                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
15005                         error("errors found in free space tree");
15006                 else
15007                         error("errors found in free space cache");
15008                 goto out;
15009         }
15010
15011         /*
15012          * We used to have to have these hole extents in between our real
15013          * extents so if we don't have this flag set we need to make sure there
15014          * are no gaps in the file extents for inodes, otherwise we can just
15015          * ignore it when this happens.
15016          */
15017         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
15018         ret = do_check_fs_roots(info, &root_cache);
15019         err |= !!ret;
15020         if (ret) {
15021                 error("errors found in fs roots");
15022                 goto out;
15023         }
15024
15025         fprintf(stderr, "checking csums\n");
15026         ret = check_csums(root);
15027         err |= !!ret;
15028         if (ret) {
15029                 error("errors found in csum tree");
15030                 goto out;
15031         }
15032
15033         fprintf(stderr, "checking root refs\n");
15034         /* For low memory mode, check_fs_roots_v2 handles root refs */
15035         if (check_mode != CHECK_MODE_LOWMEM) {
15036                 ret = check_root_refs(root, &root_cache);
15037                 err |= !!ret;
15038                 if (ret) {
15039                         error("errors found in root refs");
15040                         goto out;
15041                 }
15042         }
15043
15044         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
15045                 struct extent_buffer *eb;
15046
15047                 eb = list_first_entry(&root->fs_info->recow_ebs,
15048                                       struct extent_buffer, recow);
15049                 list_del_init(&eb->recow);
15050                 ret = recow_extent_buffer(root, eb);
15051                 err |= !!ret;
15052                 if (ret) {
15053                         error("fails to fix transid errors");
15054                         break;
15055                 }
15056         }
15057
15058         while (!list_empty(&delete_items)) {
15059                 struct bad_item *bad;
15060
15061                 bad = list_first_entry(&delete_items, struct bad_item, list);
15062                 list_del_init(&bad->list);
15063                 if (repair) {
15064                         ret = delete_bad_item(root, bad);
15065                         err |= !!ret;
15066                 }
15067                 free(bad);
15068         }
15069
15070         if (info->quota_enabled) {
15071                 fprintf(stderr, "checking quota groups\n");
15072                 ret = qgroup_verify_all(info);
15073                 err |= !!ret;
15074                 if (ret) {
15075                         error("failed to check quota groups");
15076                         goto out;
15077                 }
15078                 report_qgroups(0);
15079                 ret = repair_qgroups(info, &qgroups_repaired);
15080                 err |= !!ret;
15081                 if (err) {
15082                         error("failed to repair quota groups");
15083                         goto out;
15084                 }
15085                 ret = 0;
15086         }
15087
15088         if (!list_empty(&root->fs_info->recow_ebs)) {
15089                 error("transid errors in file system");
15090                 ret = 1;
15091                 err |= !!ret;
15092         }
15093 out:
15094         printf("found %llu bytes used, ",
15095                (unsigned long long)bytes_used);
15096         if (err)
15097                 printf("error(s) found\n");
15098         else
15099                 printf("no error found\n");
15100         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
15101         printf("total tree bytes: %llu\n",
15102                (unsigned long long)total_btree_bytes);
15103         printf("total fs tree bytes: %llu\n",
15104                (unsigned long long)total_fs_tree_bytes);
15105         printf("total extent tree bytes: %llu\n",
15106                (unsigned long long)total_extent_tree_bytes);
15107         printf("btree space waste bytes: %llu\n",
15108                (unsigned long long)btree_space_waste);
15109         printf("file data blocks allocated: %llu\n referenced %llu\n",
15110                 (unsigned long long)data_bytes_allocated,
15111                 (unsigned long long)data_bytes_referenced);
15112
15113         free_qgroup_counts();
15114         free_root_recs_tree(&root_cache);
15115 close_out:
15116         close_ctree(root);
15117 err_out:
15118         if (ctx.progress_enabled)
15119                 task_deinit(ctx.info);
15120
15121         return err;
15122 }