btrfs-progs: fsck-test: Add new image with shared block ref only metadata backref
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING       (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH      (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN         (1<<20) /* DIR isize should be recalculated */
136 #define BG_ACCOUNTING_ERROR     (1<<21) /* Block group accounting error */
137
138 static inline struct data_backref* to_data_backref(struct extent_backref *back)
139 {
140         return container_of(back, struct data_backref, node);
141 }
142
143 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
144 {
145         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
146         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
147         struct data_backref *back1 = to_data_backref(ext1);
148         struct data_backref *back2 = to_data_backref(ext2);
149
150         WARN_ON(!ext1->is_data);
151         WARN_ON(!ext2->is_data);
152
153         /* parent and root are a union, so this covers both */
154         if (back1->parent > back2->parent)
155                 return 1;
156         if (back1->parent < back2->parent)
157                 return -1;
158
159         /* This is a full backref and the parents match. */
160         if (back1->node.full_backref)
161                 return 0;
162
163         if (back1->owner > back2->owner)
164                 return 1;
165         if (back1->owner < back2->owner)
166                 return -1;
167
168         if (back1->offset > back2->offset)
169                 return 1;
170         if (back1->offset < back2->offset)
171                 return -1;
172
173         if (back1->found_ref && back2->found_ref) {
174                 if (back1->disk_bytenr > back2->disk_bytenr)
175                         return 1;
176                 if (back1->disk_bytenr < back2->disk_bytenr)
177                         return -1;
178
179                 if (back1->bytes > back2->bytes)
180                         return 1;
181                 if (back1->bytes < back2->bytes)
182                         return -1;
183         }
184
185         return 0;
186 }
187
188 /*
189  * Much like data_backref, just removed the undetermined members
190  * and change it to use list_head.
191  * During extent scan, it is stored in root->orphan_data_extent.
192  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
193  */
194 struct orphan_data_extent {
195         struct list_head list;
196         u64 root;
197         u64 objectid;
198         u64 offset;
199         u64 disk_bytenr;
200         u64 disk_len;
201 };
202
203 struct tree_backref {
204         struct extent_backref node;
205         union {
206                 u64 parent;
207                 u64 root;
208         };
209 };
210
211 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
212 {
213         return container_of(back, struct tree_backref, node);
214 }
215
216 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
217 {
218         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
219         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
220         struct tree_backref *back1 = to_tree_backref(ext1);
221         struct tree_backref *back2 = to_tree_backref(ext2);
222
223         WARN_ON(ext1->is_data);
224         WARN_ON(ext2->is_data);
225
226         /* parent and root are a union, so this covers both */
227         if (back1->parent > back2->parent)
228                 return 1;
229         if (back1->parent < back2->parent)
230                 return -1;
231
232         return 0;
233 }
234
235 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
236 {
237         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
238         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
239
240         if (ext1->is_data > ext2->is_data)
241                 return 1;
242
243         if (ext1->is_data < ext2->is_data)
244                 return -1;
245
246         if (ext1->full_backref > ext2->full_backref)
247                 return 1;
248         if (ext1->full_backref < ext2->full_backref)
249                 return -1;
250
251         if (ext1->is_data)
252                 return compare_data_backref(node1, node2);
253         else
254                 return compare_tree_backref(node1, node2);
255 }
256
257 /* Explicit initialization for extent_record::flag_block_full_backref */
258 enum { FLAG_UNSET = 2 };
259
260 struct extent_record {
261         struct list_head backrefs;
262         struct list_head dups;
263         struct rb_root backref_tree;
264         struct list_head list;
265         struct cache_extent cache;
266         struct btrfs_disk_key parent_key;
267         u64 start;
268         u64 max_size;
269         u64 nr;
270         u64 refs;
271         u64 extent_item_refs;
272         u64 generation;
273         u64 parent_generation;
274         u64 info_objectid;
275         u32 num_duplicates;
276         u8 info_level;
277         unsigned int flag_block_full_backref:2;
278         unsigned int found_rec:1;
279         unsigned int content_checked:1;
280         unsigned int owner_ref_checked:1;
281         unsigned int is_root:1;
282         unsigned int metadata:1;
283         unsigned int bad_full_backref:1;
284         unsigned int crossing_stripes:1;
285         unsigned int wrong_chunk_type:1;
286 };
287
288 static inline struct extent_record* to_extent_record(struct list_head *entry)
289 {
290         return container_of(entry, struct extent_record, list);
291 }
292
293 struct inode_backref {
294         struct list_head list;
295         unsigned int found_dir_item:1;
296         unsigned int found_dir_index:1;
297         unsigned int found_inode_ref:1;
298         u8 filetype;
299         u8 ref_type;
300         int errors;
301         u64 dir;
302         u64 index;
303         u16 namelen;
304         char name[0];
305 };
306
307 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
308 {
309         return list_entry(entry, struct inode_backref, list);
310 }
311
312 struct root_item_record {
313         struct list_head list;
314         u64 objectid;
315         u64 bytenr;
316         u64 last_snapshot;
317         u8 level;
318         u8 drop_level;
319         struct btrfs_key drop_key;
320 };
321
322 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
323 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
324 #define REF_ERR_NO_INODE_REF            (1 << 2)
325 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
326 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
327 #define REF_ERR_DUP_INODE_REF           (1 << 5)
328 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
329 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
330 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
331 #define REF_ERR_NO_ROOT_REF             (1 << 9)
332 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
333 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
334 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
335
336 struct file_extent_hole {
337         struct rb_node node;
338         u64 start;
339         u64 len;
340 };
341
342 struct inode_record {
343         struct list_head backrefs;
344         unsigned int checked:1;
345         unsigned int merging:1;
346         unsigned int found_inode_item:1;
347         unsigned int found_dir_item:1;
348         unsigned int found_file_extent:1;
349         unsigned int found_csum_item:1;
350         unsigned int some_csum_missing:1;
351         unsigned int nodatasum:1;
352         int errors;
353
354         u64 ino;
355         u32 nlink;
356         u32 imode;
357         u64 isize;
358         u64 nbytes;
359
360         u32 found_link;
361         u64 found_size;
362         u64 extent_start;
363         u64 extent_end;
364         struct rb_root holes;
365         struct list_head orphan_extents;
366
367         u32 refs;
368 };
369
370 #define I_ERR_NO_INODE_ITEM             (1 << 0)
371 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
372 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
373 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
374 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
375 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
376 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
377 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
378 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
379 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
380 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
381 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
382 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
383 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
384 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
385
386 struct root_backref {
387         struct list_head list;
388         unsigned int found_dir_item:1;
389         unsigned int found_dir_index:1;
390         unsigned int found_back_ref:1;
391         unsigned int found_forward_ref:1;
392         unsigned int reachable:1;
393         int errors;
394         u64 ref_root;
395         u64 dir;
396         u64 index;
397         u16 namelen;
398         char name[0];
399 };
400
401 static inline struct root_backref* to_root_backref(struct list_head *entry)
402 {
403         return list_entry(entry, struct root_backref, list);
404 }
405
406 struct root_record {
407         struct list_head backrefs;
408         struct cache_extent cache;
409         unsigned int found_root_item:1;
410         u64 objectid;
411         u32 found_ref;
412 };
413
414 struct ptr_node {
415         struct cache_extent cache;
416         void *data;
417 };
418
419 struct shared_node {
420         struct cache_extent cache;
421         struct cache_tree root_cache;
422         struct cache_tree inode_cache;
423         struct inode_record *current;
424         u32 refs;
425 };
426
427 struct block_info {
428         u64 start;
429         u32 size;
430 };
431
432 struct walk_control {
433         struct cache_tree shared;
434         struct shared_node *nodes[BTRFS_MAX_LEVEL];
435         int active_node;
436         int root_level;
437 };
438
439 struct bad_item {
440         struct btrfs_key key;
441         u64 root_id;
442         struct list_head list;
443 };
444
445 struct extent_entry {
446         u64 bytenr;
447         u64 bytes;
448         int count;
449         int broken;
450         struct list_head list;
451 };
452
453 struct root_item_info {
454         /* level of the root */
455         u8 level;
456         /* number of nodes at this level, must be 1 for a root */
457         int node_count;
458         u64 bytenr;
459         u64 gen;
460         struct cache_extent cache_extent;
461 };
462
463 /*
464  * Error bit for low memory mode check.
465  *
466  * Currently no caller cares about it yet.  Just internal use for error
467  * classification.
468  */
469 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
470 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
471 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
472 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
473 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
474 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
475 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
476 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
477 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
478 #define CHUNK_TYPE_MISMATCH     (1 << 8)
479
480 static void *print_status_check(void *p)
481 {
482         struct task_ctx *priv = p;
483         const char work_indicator[] = { '.', 'o', 'O', 'o' };
484         uint32_t count = 0;
485         static char *task_position_string[] = {
486                 "checking extents",
487                 "checking free space cache",
488                 "checking fs roots",
489         };
490
491         task_period_start(priv->info, 1000 /* 1s */);
492
493         if (priv->tp == TASK_NOTHING)
494                 return NULL;
495
496         while (1) {
497                 printf("%s [%c]\r", task_position_string[priv->tp],
498                                 work_indicator[count % 4]);
499                 count++;
500                 fflush(stdout);
501                 task_period_wait(priv->info);
502         }
503         return NULL;
504 }
505
506 static int print_status_return(void *p)
507 {
508         printf("\n");
509         fflush(stdout);
510
511         return 0;
512 }
513
514 static enum btrfs_check_mode parse_check_mode(const char *str)
515 {
516         if (strcmp(str, "lowmem") == 0)
517                 return CHECK_MODE_LOWMEM;
518         if (strcmp(str, "orig") == 0)
519                 return CHECK_MODE_ORIGINAL;
520         if (strcmp(str, "original") == 0)
521                 return CHECK_MODE_ORIGINAL;
522
523         return CHECK_MODE_UNKNOWN;
524 }
525
526 /* Compatible function to allow reuse of old codes */
527 static u64 first_extent_gap(struct rb_root *holes)
528 {
529         struct file_extent_hole *hole;
530
531         if (RB_EMPTY_ROOT(holes))
532                 return (u64)-1;
533
534         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
535         return hole->start;
536 }
537
538 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
539 {
540         struct file_extent_hole *hole1;
541         struct file_extent_hole *hole2;
542
543         hole1 = rb_entry(node1, struct file_extent_hole, node);
544         hole2 = rb_entry(node2, struct file_extent_hole, node);
545
546         if (hole1->start > hole2->start)
547                 return -1;
548         if (hole1->start < hole2->start)
549                 return 1;
550         /* Now hole1->start == hole2->start */
551         if (hole1->len >= hole2->len)
552                 /*
553                  * Hole 1 will be merge center
554                  * Same hole will be merged later
555                  */
556                 return -1;
557         /* Hole 2 will be merge center */
558         return 1;
559 }
560
561 /*
562  * Add a hole to the record
563  *
564  * This will do hole merge for copy_file_extent_holes(),
565  * which will ensure there won't be continuous holes.
566  */
567 static int add_file_extent_hole(struct rb_root *holes,
568                                 u64 start, u64 len)
569 {
570         struct file_extent_hole *hole;
571         struct file_extent_hole *prev = NULL;
572         struct file_extent_hole *next = NULL;
573
574         hole = malloc(sizeof(*hole));
575         if (!hole)
576                 return -ENOMEM;
577         hole->start = start;
578         hole->len = len;
579         /* Since compare will not return 0, no -EEXIST will happen */
580         rb_insert(holes, &hole->node, compare_hole);
581
582         /* simple merge with previous hole */
583         if (rb_prev(&hole->node))
584                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
585                                 node);
586         if (prev && prev->start + prev->len >= hole->start) {
587                 hole->len = hole->start + hole->len - prev->start;
588                 hole->start = prev->start;
589                 rb_erase(&prev->node, holes);
590                 free(prev);
591                 prev = NULL;
592         }
593
594         /* iterate merge with next holes */
595         while (1) {
596                 if (!rb_next(&hole->node))
597                         break;
598                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
599                                         node);
600                 if (hole->start + hole->len >= next->start) {
601                         if (hole->start + hole->len <= next->start + next->len)
602                                 hole->len = next->start + next->len -
603                                             hole->start;
604                         rb_erase(&next->node, holes);
605                         free(next);
606                         next = NULL;
607                 } else
608                         break;
609         }
610         return 0;
611 }
612
613 static int compare_hole_range(struct rb_node *node, void *data)
614 {
615         struct file_extent_hole *hole;
616         u64 start;
617
618         hole = (struct file_extent_hole *)data;
619         start = hole->start;
620
621         hole = rb_entry(node, struct file_extent_hole, node);
622         if (start < hole->start)
623                 return -1;
624         if (start >= hole->start && start < hole->start + hole->len)
625                 return 0;
626         return 1;
627 }
628
629 /*
630  * Delete a hole in the record
631  *
632  * This will do the hole split and is much restrict than add.
633  */
634 static int del_file_extent_hole(struct rb_root *holes,
635                                 u64 start, u64 len)
636 {
637         struct file_extent_hole *hole;
638         struct file_extent_hole tmp;
639         u64 prev_start = 0;
640         u64 prev_len = 0;
641         u64 next_start = 0;
642         u64 next_len = 0;
643         struct rb_node *node;
644         int have_prev = 0;
645         int have_next = 0;
646         int ret = 0;
647
648         tmp.start = start;
649         tmp.len = len;
650         node = rb_search(holes, &tmp, compare_hole_range, NULL);
651         if (!node)
652                 return -EEXIST;
653         hole = rb_entry(node, struct file_extent_hole, node);
654         if (start + len > hole->start + hole->len)
655                 return -EEXIST;
656
657         /*
658          * Now there will be no overlap, delete the hole and re-add the
659          * split(s) if they exists.
660          */
661         if (start > hole->start) {
662                 prev_start = hole->start;
663                 prev_len = start - hole->start;
664                 have_prev = 1;
665         }
666         if (hole->start + hole->len > start + len) {
667                 next_start = start + len;
668                 next_len = hole->start + hole->len - start - len;
669                 have_next = 1;
670         }
671         rb_erase(node, holes);
672         free(hole);
673         if (have_prev) {
674                 ret = add_file_extent_hole(holes, prev_start, prev_len);
675                 if (ret < 0)
676                         return ret;
677         }
678         if (have_next) {
679                 ret = add_file_extent_hole(holes, next_start, next_len);
680                 if (ret < 0)
681                         return ret;
682         }
683         return 0;
684 }
685
686 static int copy_file_extent_holes(struct rb_root *dst,
687                                   struct rb_root *src)
688 {
689         struct file_extent_hole *hole;
690         struct rb_node *node;
691         int ret = 0;
692
693         node = rb_first(src);
694         while (node) {
695                 hole = rb_entry(node, struct file_extent_hole, node);
696                 ret = add_file_extent_hole(dst, hole->start, hole->len);
697                 if (ret)
698                         break;
699                 node = rb_next(node);
700         }
701         return ret;
702 }
703
704 static void free_file_extent_holes(struct rb_root *holes)
705 {
706         struct rb_node *node;
707         struct file_extent_hole *hole;
708
709         node = rb_first(holes);
710         while (node) {
711                 hole = rb_entry(node, struct file_extent_hole, node);
712                 rb_erase(node, holes);
713                 free(hole);
714                 node = rb_first(holes);
715         }
716 }
717
718 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
719
720 static void record_root_in_trans(struct btrfs_trans_handle *trans,
721                                  struct btrfs_root *root)
722 {
723         if (root->last_trans != trans->transid) {
724                 root->track_dirty = 1;
725                 root->last_trans = trans->transid;
726                 root->commit_root = root->node;
727                 extent_buffer_get(root->node);
728         }
729 }
730
731 static u8 imode_to_type(u32 imode)
732 {
733 #define S_SHIFT 12
734         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
735                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
736                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
737                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
738                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
739                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
740                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
741                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
742         };
743
744         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
745 #undef S_SHIFT
746 }
747
748 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
749 {
750         struct device_record *rec1;
751         struct device_record *rec2;
752
753         rec1 = rb_entry(node1, struct device_record, node);
754         rec2 = rb_entry(node2, struct device_record, node);
755         if (rec1->devid > rec2->devid)
756                 return -1;
757         else if (rec1->devid < rec2->devid)
758                 return 1;
759         else
760                 return 0;
761 }
762
763 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
764 {
765         struct inode_record *rec;
766         struct inode_backref *backref;
767         struct inode_backref *orig;
768         struct inode_backref *tmp;
769         struct orphan_data_extent *src_orphan;
770         struct orphan_data_extent *dst_orphan;
771         struct rb_node *rb;
772         size_t size;
773         int ret;
774
775         rec = malloc(sizeof(*rec));
776         if (!rec)
777                 return ERR_PTR(-ENOMEM);
778         memcpy(rec, orig_rec, sizeof(*rec));
779         rec->refs = 1;
780         INIT_LIST_HEAD(&rec->backrefs);
781         INIT_LIST_HEAD(&rec->orphan_extents);
782         rec->holes = RB_ROOT;
783
784         list_for_each_entry(orig, &orig_rec->backrefs, list) {
785                 size = sizeof(*orig) + orig->namelen + 1;
786                 backref = malloc(size);
787                 if (!backref) {
788                         ret = -ENOMEM;
789                         goto cleanup;
790                 }
791                 memcpy(backref, orig, size);
792                 list_add_tail(&backref->list, &rec->backrefs);
793         }
794         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
795                 dst_orphan = malloc(sizeof(*dst_orphan));
796                 if (!dst_orphan) {
797                         ret = -ENOMEM;
798                         goto cleanup;
799                 }
800                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
801                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
802         }
803         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
804         if (ret < 0)
805                 goto cleanup_rb;
806
807         return rec;
808
809 cleanup_rb:
810         rb = rb_first(&rec->holes);
811         while (rb) {
812                 struct file_extent_hole *hole;
813
814                 hole = rb_entry(rb, struct file_extent_hole, node);
815                 rb = rb_next(rb);
816                 free(hole);
817         }
818
819 cleanup:
820         if (!list_empty(&rec->backrefs))
821                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
822                         list_del(&orig->list);
823                         free(orig);
824                 }
825
826         if (!list_empty(&rec->orphan_extents))
827                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
828                         list_del(&orig->list);
829                         free(orig);
830                 }
831
832         free(rec);
833
834         return ERR_PTR(ret);
835 }
836
837 static void print_orphan_data_extents(struct list_head *orphan_extents,
838                                       u64 objectid)
839 {
840         struct orphan_data_extent *orphan;
841
842         if (list_empty(orphan_extents))
843                 return;
844         printf("The following data extent is lost in tree %llu:\n",
845                objectid);
846         list_for_each_entry(orphan, orphan_extents, list) {
847                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
848                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
849                        orphan->disk_len);
850         }
851 }
852
853 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
854 {
855         u64 root_objectid = root->root_key.objectid;
856         int errors = rec->errors;
857
858         if (!errors)
859                 return;
860         /* reloc root errors, we print its corresponding fs root objectid*/
861         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
862                 root_objectid = root->root_key.offset;
863                 fprintf(stderr, "reloc");
864         }
865         fprintf(stderr, "root %llu inode %llu errors %x",
866                 (unsigned long long) root_objectid,
867                 (unsigned long long) rec->ino, rec->errors);
868
869         if (errors & I_ERR_NO_INODE_ITEM)
870                 fprintf(stderr, ", no inode item");
871         if (errors & I_ERR_NO_ORPHAN_ITEM)
872                 fprintf(stderr, ", no orphan item");
873         if (errors & I_ERR_DUP_INODE_ITEM)
874                 fprintf(stderr, ", dup inode item");
875         if (errors & I_ERR_DUP_DIR_INDEX)
876                 fprintf(stderr, ", dup dir index");
877         if (errors & I_ERR_ODD_DIR_ITEM)
878                 fprintf(stderr, ", odd dir item");
879         if (errors & I_ERR_ODD_FILE_EXTENT)
880                 fprintf(stderr, ", odd file extent");
881         if (errors & I_ERR_BAD_FILE_EXTENT)
882                 fprintf(stderr, ", bad file extent");
883         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
884                 fprintf(stderr, ", file extent overlap");
885         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
886                 fprintf(stderr, ", file extent discount");
887         if (errors & I_ERR_DIR_ISIZE_WRONG)
888                 fprintf(stderr, ", dir isize wrong");
889         if (errors & I_ERR_FILE_NBYTES_WRONG)
890                 fprintf(stderr, ", nbytes wrong");
891         if (errors & I_ERR_ODD_CSUM_ITEM)
892                 fprintf(stderr, ", odd csum item");
893         if (errors & I_ERR_SOME_CSUM_MISSING)
894                 fprintf(stderr, ", some csum missing");
895         if (errors & I_ERR_LINK_COUNT_WRONG)
896                 fprintf(stderr, ", link count wrong");
897         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898                 fprintf(stderr, ", orphan file extent");
899         fprintf(stderr, "\n");
900         /* Print the orphan extents if needed */
901         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
902                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
903
904         /* Print the holes if needed */
905         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
906                 struct file_extent_hole *hole;
907                 struct rb_node *node;
908                 int found = 0;
909
910                 node = rb_first(&rec->holes);
911                 fprintf(stderr, "Found file extent holes:\n");
912                 while (node) {
913                         found = 1;
914                         hole = rb_entry(node, struct file_extent_hole, node);
915                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
916                                 hole->start, hole->len);
917                         node = rb_next(node);
918                 }
919                 if (!found)
920                         fprintf(stderr, "\tstart: 0, len: %llu\n",
921                                 round_up(rec->isize,
922                                          root->fs_info->sectorsize));
923         }
924 }
925
926 static void print_ref_error(int errors)
927 {
928         if (errors & REF_ERR_NO_DIR_ITEM)
929                 fprintf(stderr, ", no dir item");
930         if (errors & REF_ERR_NO_DIR_INDEX)
931                 fprintf(stderr, ", no dir index");
932         if (errors & REF_ERR_NO_INODE_REF)
933                 fprintf(stderr, ", no inode ref");
934         if (errors & REF_ERR_DUP_DIR_ITEM)
935                 fprintf(stderr, ", dup dir item");
936         if (errors & REF_ERR_DUP_DIR_INDEX)
937                 fprintf(stderr, ", dup dir index");
938         if (errors & REF_ERR_DUP_INODE_REF)
939                 fprintf(stderr, ", dup inode ref");
940         if (errors & REF_ERR_INDEX_UNMATCH)
941                 fprintf(stderr, ", index mismatch");
942         if (errors & REF_ERR_FILETYPE_UNMATCH)
943                 fprintf(stderr, ", filetype mismatch");
944         if (errors & REF_ERR_NAME_TOO_LONG)
945                 fprintf(stderr, ", name too long");
946         if (errors & REF_ERR_NO_ROOT_REF)
947                 fprintf(stderr, ", no root ref");
948         if (errors & REF_ERR_NO_ROOT_BACKREF)
949                 fprintf(stderr, ", no root backref");
950         if (errors & REF_ERR_DUP_ROOT_REF)
951                 fprintf(stderr, ", dup root ref");
952         if (errors & REF_ERR_DUP_ROOT_BACKREF)
953                 fprintf(stderr, ", dup root backref");
954         fprintf(stderr, "\n");
955 }
956
957 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
958                                           u64 ino, int mod)
959 {
960         struct ptr_node *node;
961         struct cache_extent *cache;
962         struct inode_record *rec = NULL;
963         int ret;
964
965         cache = lookup_cache_extent(inode_cache, ino, 1);
966         if (cache) {
967                 node = container_of(cache, struct ptr_node, cache);
968                 rec = node->data;
969                 if (mod && rec->refs > 1) {
970                         node->data = clone_inode_rec(rec);
971                         if (IS_ERR(node->data))
972                                 return node->data;
973                         rec->refs--;
974                         rec = node->data;
975                 }
976         } else if (mod) {
977                 rec = calloc(1, sizeof(*rec));
978                 if (!rec)
979                         return ERR_PTR(-ENOMEM);
980                 rec->ino = ino;
981                 rec->extent_start = (u64)-1;
982                 rec->refs = 1;
983                 INIT_LIST_HEAD(&rec->backrefs);
984                 INIT_LIST_HEAD(&rec->orphan_extents);
985                 rec->holes = RB_ROOT;
986
987                 node = malloc(sizeof(*node));
988                 if (!node) {
989                         free(rec);
990                         return ERR_PTR(-ENOMEM);
991                 }
992                 node->cache.start = ino;
993                 node->cache.size = 1;
994                 node->data = rec;
995
996                 if (ino == BTRFS_FREE_INO_OBJECTID)
997                         rec->found_link = 1;
998
999                 ret = insert_cache_extent(inode_cache, &node->cache);
1000                 if (ret)
1001                         return ERR_PTR(-EEXIST);
1002         }
1003         return rec;
1004 }
1005
1006 static void free_orphan_data_extents(struct list_head *orphan_extents)
1007 {
1008         struct orphan_data_extent *orphan;
1009
1010         while (!list_empty(orphan_extents)) {
1011                 orphan = list_entry(orphan_extents->next,
1012                                     struct orphan_data_extent, list);
1013                 list_del(&orphan->list);
1014                 free(orphan);
1015         }
1016 }
1017
1018 static void free_inode_rec(struct inode_record *rec)
1019 {
1020         struct inode_backref *backref;
1021
1022         if (--rec->refs > 0)
1023                 return;
1024
1025         while (!list_empty(&rec->backrefs)) {
1026                 backref = to_inode_backref(rec->backrefs.next);
1027                 list_del(&backref->list);
1028                 free(backref);
1029         }
1030         free_orphan_data_extents(&rec->orphan_extents);
1031         free_file_extent_holes(&rec->holes);
1032         free(rec);
1033 }
1034
1035 static int can_free_inode_rec(struct inode_record *rec)
1036 {
1037         if (!rec->errors && rec->checked && rec->found_inode_item &&
1038             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1039                 return 1;
1040         return 0;
1041 }
1042
1043 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1044                                  struct inode_record *rec)
1045 {
1046         struct cache_extent *cache;
1047         struct inode_backref *tmp, *backref;
1048         struct ptr_node *node;
1049         u8 filetype;
1050
1051         if (!rec->found_inode_item)
1052                 return;
1053
1054         filetype = imode_to_type(rec->imode);
1055         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1056                 if (backref->found_dir_item && backref->found_dir_index) {
1057                         if (backref->filetype != filetype)
1058                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1059                         if (!backref->errors && backref->found_inode_ref &&
1060                             rec->nlink == rec->found_link) {
1061                                 list_del(&backref->list);
1062                                 free(backref);
1063                         }
1064                 }
1065         }
1066
1067         if (!rec->checked || rec->merging)
1068                 return;
1069
1070         if (S_ISDIR(rec->imode)) {
1071                 if (rec->found_size != rec->isize)
1072                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1073                 if (rec->found_file_extent)
1074                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1075         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1076                 if (rec->found_dir_item)
1077                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1078                 if (rec->found_size != rec->nbytes)
1079                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1080                 if (rec->nlink > 0 && !no_holes &&
1081                     (rec->extent_end < rec->isize ||
1082                      first_extent_gap(&rec->holes) < rec->isize))
1083                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1084         }
1085
1086         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1087                 if (rec->found_csum_item && rec->nodatasum)
1088                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1089                 if (rec->some_csum_missing && !rec->nodatasum)
1090                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1091         }
1092
1093         BUG_ON(rec->refs != 1);
1094         if (can_free_inode_rec(rec)) {
1095                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1096                 node = container_of(cache, struct ptr_node, cache);
1097                 BUG_ON(node->data != rec);
1098                 remove_cache_extent(inode_cache, &node->cache);
1099                 free(node);
1100                 free_inode_rec(rec);
1101         }
1102 }
1103
1104 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1105 {
1106         struct btrfs_path path;
1107         struct btrfs_key key;
1108         int ret;
1109
1110         key.objectid = BTRFS_ORPHAN_OBJECTID;
1111         key.type = BTRFS_ORPHAN_ITEM_KEY;
1112         key.offset = ino;
1113
1114         btrfs_init_path(&path);
1115         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1116         btrfs_release_path(&path);
1117         if (ret > 0)
1118                 ret = -ENOENT;
1119         return ret;
1120 }
1121
1122 static int process_inode_item(struct extent_buffer *eb,
1123                               int slot, struct btrfs_key *key,
1124                               struct shared_node *active_node)
1125 {
1126         struct inode_record *rec;
1127         struct btrfs_inode_item *item;
1128
1129         rec = active_node->current;
1130         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1131         if (rec->found_inode_item) {
1132                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1133                 return 1;
1134         }
1135         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1136         rec->nlink = btrfs_inode_nlink(eb, item);
1137         rec->isize = btrfs_inode_size(eb, item);
1138         rec->nbytes = btrfs_inode_nbytes(eb, item);
1139         rec->imode = btrfs_inode_mode(eb, item);
1140         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1141                 rec->nodatasum = 1;
1142         rec->found_inode_item = 1;
1143         if (rec->nlink == 0)
1144                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1145         maybe_free_inode_rec(&active_node->inode_cache, rec);
1146         return 0;
1147 }
1148
1149 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1150                                                 const char *name,
1151                                                 int namelen, u64 dir)
1152 {
1153         struct inode_backref *backref;
1154
1155         list_for_each_entry(backref, &rec->backrefs, list) {
1156                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1157                         break;
1158                 if (backref->dir != dir || backref->namelen != namelen)
1159                         continue;
1160                 if (memcmp(name, backref->name, namelen))
1161                         continue;
1162                 return backref;
1163         }
1164
1165         backref = malloc(sizeof(*backref) + namelen + 1);
1166         if (!backref)
1167                 return NULL;
1168         memset(backref, 0, sizeof(*backref));
1169         backref->dir = dir;
1170         backref->namelen = namelen;
1171         memcpy(backref->name, name, namelen);
1172         backref->name[namelen] = '\0';
1173         list_add_tail(&backref->list, &rec->backrefs);
1174         return backref;
1175 }
1176
1177 static int add_inode_backref(struct cache_tree *inode_cache,
1178                              u64 ino, u64 dir, u64 index,
1179                              const char *name, int namelen,
1180                              u8 filetype, u8 itemtype, int errors)
1181 {
1182         struct inode_record *rec;
1183         struct inode_backref *backref;
1184
1185         rec = get_inode_rec(inode_cache, ino, 1);
1186         BUG_ON(IS_ERR(rec));
1187         backref = get_inode_backref(rec, name, namelen, dir);
1188         BUG_ON(!backref);
1189         if (errors)
1190                 backref->errors |= errors;
1191         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1192                 if (backref->found_dir_index)
1193                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1194                 if (backref->found_inode_ref && backref->index != index)
1195                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1196                 if (backref->found_dir_item && backref->filetype != filetype)
1197                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1198
1199                 backref->index = index;
1200                 backref->filetype = filetype;
1201                 backref->found_dir_index = 1;
1202         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1203                 rec->found_link++;
1204                 if (backref->found_dir_item)
1205                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1206                 if (backref->found_dir_index && backref->filetype != filetype)
1207                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1208
1209                 backref->filetype = filetype;
1210                 backref->found_dir_item = 1;
1211         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1212                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1213                 if (backref->found_inode_ref)
1214                         backref->errors |= REF_ERR_DUP_INODE_REF;
1215                 if (backref->found_dir_index && backref->index != index)
1216                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1217                 else
1218                         backref->index = index;
1219
1220                 backref->ref_type = itemtype;
1221                 backref->found_inode_ref = 1;
1222         } else {
1223                 BUG_ON(1);
1224         }
1225
1226         maybe_free_inode_rec(inode_cache, rec);
1227         return 0;
1228 }
1229
1230 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1231                             struct cache_tree *dst_cache)
1232 {
1233         struct inode_backref *backref;
1234         u32 dir_count = 0;
1235         int ret = 0;
1236
1237         dst->merging = 1;
1238         list_for_each_entry(backref, &src->backrefs, list) {
1239                 if (backref->found_dir_index) {
1240                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1241                                         backref->index, backref->name,
1242                                         backref->namelen, backref->filetype,
1243                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1244                 }
1245                 if (backref->found_dir_item) {
1246                         dir_count++;
1247                         add_inode_backref(dst_cache, dst->ino,
1248                                         backref->dir, 0, backref->name,
1249                                         backref->namelen, backref->filetype,
1250                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1251                 }
1252                 if (backref->found_inode_ref) {
1253                         add_inode_backref(dst_cache, dst->ino,
1254                                         backref->dir, backref->index,
1255                                         backref->name, backref->namelen, 0,
1256                                         backref->ref_type, backref->errors);
1257                 }
1258         }
1259
1260         if (src->found_dir_item)
1261                 dst->found_dir_item = 1;
1262         if (src->found_file_extent)
1263                 dst->found_file_extent = 1;
1264         if (src->found_csum_item)
1265                 dst->found_csum_item = 1;
1266         if (src->some_csum_missing)
1267                 dst->some_csum_missing = 1;
1268         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1269                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1270                 if (ret < 0)
1271                         return ret;
1272         }
1273
1274         BUG_ON(src->found_link < dir_count);
1275         dst->found_link += src->found_link - dir_count;
1276         dst->found_size += src->found_size;
1277         if (src->extent_start != (u64)-1) {
1278                 if (dst->extent_start == (u64)-1) {
1279                         dst->extent_start = src->extent_start;
1280                         dst->extent_end = src->extent_end;
1281                 } else {
1282                         if (dst->extent_end > src->extent_start)
1283                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1284                         else if (dst->extent_end < src->extent_start) {
1285                                 ret = add_file_extent_hole(&dst->holes,
1286                                         dst->extent_end,
1287                                         src->extent_start - dst->extent_end);
1288                         }
1289                         if (dst->extent_end < src->extent_end)
1290                                 dst->extent_end = src->extent_end;
1291                 }
1292         }
1293
1294         dst->errors |= src->errors;
1295         if (src->found_inode_item) {
1296                 if (!dst->found_inode_item) {
1297                         dst->nlink = src->nlink;
1298                         dst->isize = src->isize;
1299                         dst->nbytes = src->nbytes;
1300                         dst->imode = src->imode;
1301                         dst->nodatasum = src->nodatasum;
1302                         dst->found_inode_item = 1;
1303                 } else {
1304                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1305                 }
1306         }
1307         dst->merging = 0;
1308
1309         return 0;
1310 }
1311
1312 static int splice_shared_node(struct shared_node *src_node,
1313                               struct shared_node *dst_node)
1314 {
1315         struct cache_extent *cache;
1316         struct ptr_node *node, *ins;
1317         struct cache_tree *src, *dst;
1318         struct inode_record *rec, *conflict;
1319         u64 current_ino = 0;
1320         int splice = 0;
1321         int ret;
1322
1323         if (--src_node->refs == 0)
1324                 splice = 1;
1325         if (src_node->current)
1326                 current_ino = src_node->current->ino;
1327
1328         src = &src_node->root_cache;
1329         dst = &dst_node->root_cache;
1330 again:
1331         cache = search_cache_extent(src, 0);
1332         while (cache) {
1333                 node = container_of(cache, struct ptr_node, cache);
1334                 rec = node->data;
1335                 cache = next_cache_extent(cache);
1336
1337                 if (splice) {
1338                         remove_cache_extent(src, &node->cache);
1339                         ins = node;
1340                 } else {
1341                         ins = malloc(sizeof(*ins));
1342                         BUG_ON(!ins);
1343                         ins->cache.start = node->cache.start;
1344                         ins->cache.size = node->cache.size;
1345                         ins->data = rec;
1346                         rec->refs++;
1347                 }
1348                 ret = insert_cache_extent(dst, &ins->cache);
1349                 if (ret == -EEXIST) {
1350                         conflict = get_inode_rec(dst, rec->ino, 1);
1351                         BUG_ON(IS_ERR(conflict));
1352                         merge_inode_recs(rec, conflict, dst);
1353                         if (rec->checked) {
1354                                 conflict->checked = 1;
1355                                 if (dst_node->current == conflict)
1356                                         dst_node->current = NULL;
1357                         }
1358                         maybe_free_inode_rec(dst, conflict);
1359                         free_inode_rec(rec);
1360                         free(ins);
1361                 } else {
1362                         BUG_ON(ret);
1363                 }
1364         }
1365
1366         if (src == &src_node->root_cache) {
1367                 src = &src_node->inode_cache;
1368                 dst = &dst_node->inode_cache;
1369                 goto again;
1370         }
1371
1372         if (current_ino > 0 && (!dst_node->current ||
1373             current_ino > dst_node->current->ino)) {
1374                 if (dst_node->current) {
1375                         dst_node->current->checked = 1;
1376                         maybe_free_inode_rec(dst, dst_node->current);
1377                 }
1378                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1379                 BUG_ON(IS_ERR(dst_node->current));
1380         }
1381         return 0;
1382 }
1383
1384 static void free_inode_ptr(struct cache_extent *cache)
1385 {
1386         struct ptr_node *node;
1387         struct inode_record *rec;
1388
1389         node = container_of(cache, struct ptr_node, cache);
1390         rec = node->data;
1391         free_inode_rec(rec);
1392         free(node);
1393 }
1394
1395 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1396
1397 static struct shared_node *find_shared_node(struct cache_tree *shared,
1398                                             u64 bytenr)
1399 {
1400         struct cache_extent *cache;
1401         struct shared_node *node;
1402
1403         cache = lookup_cache_extent(shared, bytenr, 1);
1404         if (cache) {
1405                 node = container_of(cache, struct shared_node, cache);
1406                 return node;
1407         }
1408         return NULL;
1409 }
1410
1411 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1412 {
1413         int ret;
1414         struct shared_node *node;
1415
1416         node = calloc(1, sizeof(*node));
1417         if (!node)
1418                 return -ENOMEM;
1419         node->cache.start = bytenr;
1420         node->cache.size = 1;
1421         cache_tree_init(&node->root_cache);
1422         cache_tree_init(&node->inode_cache);
1423         node->refs = refs;
1424
1425         ret = insert_cache_extent(shared, &node->cache);
1426
1427         return ret;
1428 }
1429
1430 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1431                              struct walk_control *wc, int level)
1432 {
1433         struct shared_node *node;
1434         struct shared_node *dest;
1435         int ret;
1436
1437         if (level == wc->active_node)
1438                 return 0;
1439
1440         BUG_ON(wc->active_node <= level);
1441         node = find_shared_node(&wc->shared, bytenr);
1442         if (!node) {
1443                 ret = add_shared_node(&wc->shared, bytenr, refs);
1444                 BUG_ON(ret);
1445                 node = find_shared_node(&wc->shared, bytenr);
1446                 wc->nodes[level] = node;
1447                 wc->active_node = level;
1448                 return 0;
1449         }
1450
1451         if (wc->root_level == wc->active_node &&
1452             btrfs_root_refs(&root->root_item) == 0) {
1453                 if (--node->refs == 0) {
1454                         free_inode_recs_tree(&node->root_cache);
1455                         free_inode_recs_tree(&node->inode_cache);
1456                         remove_cache_extent(&wc->shared, &node->cache);
1457                         free(node);
1458                 }
1459                 return 1;
1460         }
1461
1462         dest = wc->nodes[wc->active_node];
1463         splice_shared_node(node, dest);
1464         if (node->refs == 0) {
1465                 remove_cache_extent(&wc->shared, &node->cache);
1466                 free(node);
1467         }
1468         return 1;
1469 }
1470
1471 static int leave_shared_node(struct btrfs_root *root,
1472                              struct walk_control *wc, int level)
1473 {
1474         struct shared_node *node;
1475         struct shared_node *dest;
1476         int i;
1477
1478         if (level == wc->root_level)
1479                 return 0;
1480
1481         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1482                 if (wc->nodes[i])
1483                         break;
1484         }
1485         BUG_ON(i >= BTRFS_MAX_LEVEL);
1486
1487         node = wc->nodes[wc->active_node];
1488         wc->nodes[wc->active_node] = NULL;
1489         wc->active_node = i;
1490
1491         dest = wc->nodes[wc->active_node];
1492         if (wc->active_node < wc->root_level ||
1493             btrfs_root_refs(&root->root_item) > 0) {
1494                 BUG_ON(node->refs <= 1);
1495                 splice_shared_node(node, dest);
1496         } else {
1497                 BUG_ON(node->refs < 2);
1498                 node->refs--;
1499         }
1500         return 0;
1501 }
1502
1503 /*
1504  * Returns:
1505  * < 0 - on error
1506  * 1   - if the root with id child_root_id is a child of root parent_root_id
1507  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1508  *       has other root(s) as parent(s)
1509  * 2   - if the root child_root_id doesn't have any parent roots
1510  */
1511 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1512                          u64 child_root_id)
1513 {
1514         struct btrfs_path path;
1515         struct btrfs_key key;
1516         struct extent_buffer *leaf;
1517         int has_parent = 0;
1518         int ret;
1519
1520         btrfs_init_path(&path);
1521
1522         key.objectid = parent_root_id;
1523         key.type = BTRFS_ROOT_REF_KEY;
1524         key.offset = child_root_id;
1525         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1526                                 0, 0);
1527         if (ret < 0)
1528                 return ret;
1529         btrfs_release_path(&path);
1530         if (!ret)
1531                 return 1;
1532
1533         key.objectid = child_root_id;
1534         key.type = BTRFS_ROOT_BACKREF_KEY;
1535         key.offset = 0;
1536         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1537                                 0, 0);
1538         if (ret < 0)
1539                 goto out;
1540
1541         while (1) {
1542                 leaf = path.nodes[0];
1543                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1544                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1545                         if (ret)
1546                                 break;
1547                         leaf = path.nodes[0];
1548                 }
1549
1550                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1551                 if (key.objectid != child_root_id ||
1552                     key.type != BTRFS_ROOT_BACKREF_KEY)
1553                         break;
1554
1555                 has_parent = 1;
1556
1557                 if (key.offset == parent_root_id) {
1558                         btrfs_release_path(&path);
1559                         return 1;
1560                 }
1561
1562                 path.slots[0]++;
1563         }
1564 out:
1565         btrfs_release_path(&path);
1566         if (ret < 0)
1567                 return ret;
1568         return has_parent ? 0 : 2;
1569 }
1570
1571 static int process_dir_item(struct extent_buffer *eb,
1572                             int slot, struct btrfs_key *key,
1573                             struct shared_node *active_node)
1574 {
1575         u32 total;
1576         u32 cur = 0;
1577         u32 len;
1578         u32 name_len;
1579         u32 data_len;
1580         int error;
1581         int nritems = 0;
1582         u8 filetype;
1583         struct btrfs_dir_item *di;
1584         struct inode_record *rec;
1585         struct cache_tree *root_cache;
1586         struct cache_tree *inode_cache;
1587         struct btrfs_key location;
1588         char namebuf[BTRFS_NAME_LEN];
1589
1590         root_cache = &active_node->root_cache;
1591         inode_cache = &active_node->inode_cache;
1592         rec = active_node->current;
1593         rec->found_dir_item = 1;
1594
1595         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1596         total = btrfs_item_size_nr(eb, slot);
1597         while (cur < total) {
1598                 nritems++;
1599                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1600                 name_len = btrfs_dir_name_len(eb, di);
1601                 data_len = btrfs_dir_data_len(eb, di);
1602                 filetype = btrfs_dir_type(eb, di);
1603
1604                 rec->found_size += name_len;
1605                 if (cur + sizeof(*di) + name_len > total ||
1606                     name_len > BTRFS_NAME_LEN) {
1607                         error = REF_ERR_NAME_TOO_LONG;
1608
1609                         if (cur + sizeof(*di) > total)
1610                                 break;
1611                         len = min_t(u32, total - cur - sizeof(*di),
1612                                     BTRFS_NAME_LEN);
1613                 } else {
1614                         len = name_len;
1615                         error = 0;
1616                 }
1617
1618                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1619
1620                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1621                     key->offset != btrfs_name_hash(namebuf, len)) {
1622                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1623                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1624                         key->objectid, key->offset, namebuf, len, filetype,
1625                         key->offset, btrfs_name_hash(namebuf, len));
1626                 }
1627
1628                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1629                         add_inode_backref(inode_cache, location.objectid,
1630                                           key->objectid, key->offset, namebuf,
1631                                           len, filetype, key->type, error);
1632                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1633                         add_inode_backref(root_cache, location.objectid,
1634                                           key->objectid, key->offset,
1635                                           namebuf, len, filetype,
1636                                           key->type, error);
1637                 } else {
1638                         fprintf(stderr, "invalid location in dir item %u\n",
1639                                 location.type);
1640                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1641                                           key->objectid, key->offset, namebuf,
1642                                           len, filetype, key->type, error);
1643                 }
1644
1645                 len = sizeof(*di) + name_len + data_len;
1646                 di = (struct btrfs_dir_item *)((char *)di + len);
1647                 cur += len;
1648         }
1649         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1650                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1651
1652         return 0;
1653 }
1654
1655 static int process_inode_ref(struct extent_buffer *eb,
1656                              int slot, struct btrfs_key *key,
1657                              struct shared_node *active_node)
1658 {
1659         u32 total;
1660         u32 cur = 0;
1661         u32 len;
1662         u32 name_len;
1663         u64 index;
1664         int error;
1665         struct cache_tree *inode_cache;
1666         struct btrfs_inode_ref *ref;
1667         char namebuf[BTRFS_NAME_LEN];
1668
1669         inode_cache = &active_node->inode_cache;
1670
1671         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1672         total = btrfs_item_size_nr(eb, slot);
1673         while (cur < total) {
1674                 name_len = btrfs_inode_ref_name_len(eb, ref);
1675                 index = btrfs_inode_ref_index(eb, ref);
1676
1677                 /* inode_ref + namelen should not cross item boundary */
1678                 if (cur + sizeof(*ref) + name_len > total ||
1679                     name_len > BTRFS_NAME_LEN) {
1680                         if (total < cur + sizeof(*ref))
1681                                 break;
1682
1683                         /* Still try to read out the remaining part */
1684                         len = min_t(u32, total - cur - sizeof(*ref),
1685                                     BTRFS_NAME_LEN);
1686                         error = REF_ERR_NAME_TOO_LONG;
1687                 } else {
1688                         len = name_len;
1689                         error = 0;
1690                 }
1691
1692                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1693                 add_inode_backref(inode_cache, key->objectid, key->offset,
1694                                   index, namebuf, len, 0, key->type, error);
1695
1696                 len = sizeof(*ref) + name_len;
1697                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1698                 cur += len;
1699         }
1700         return 0;
1701 }
1702
1703 static int process_inode_extref(struct extent_buffer *eb,
1704                                 int slot, struct btrfs_key *key,
1705                                 struct shared_node *active_node)
1706 {
1707         u32 total;
1708         u32 cur = 0;
1709         u32 len;
1710         u32 name_len;
1711         u64 index;
1712         u64 parent;
1713         int error;
1714         struct cache_tree *inode_cache;
1715         struct btrfs_inode_extref *extref;
1716         char namebuf[BTRFS_NAME_LEN];
1717
1718         inode_cache = &active_node->inode_cache;
1719
1720         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1721         total = btrfs_item_size_nr(eb, slot);
1722         while (cur < total) {
1723                 name_len = btrfs_inode_extref_name_len(eb, extref);
1724                 index = btrfs_inode_extref_index(eb, extref);
1725                 parent = btrfs_inode_extref_parent(eb, extref);
1726                 if (name_len <= BTRFS_NAME_LEN) {
1727                         len = name_len;
1728                         error = 0;
1729                 } else {
1730                         len = BTRFS_NAME_LEN;
1731                         error = REF_ERR_NAME_TOO_LONG;
1732                 }
1733                 read_extent_buffer(eb, namebuf,
1734                                    (unsigned long)(extref + 1), len);
1735                 add_inode_backref(inode_cache, key->objectid, parent,
1736                                   index, namebuf, len, 0, key->type, error);
1737
1738                 len = sizeof(*extref) + name_len;
1739                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1740                 cur += len;
1741         }
1742         return 0;
1743
1744 }
1745
1746 static int count_csum_range(struct btrfs_root *root, u64 start,
1747                             u64 len, u64 *found)
1748 {
1749         struct btrfs_key key;
1750         struct btrfs_path path;
1751         struct extent_buffer *leaf;
1752         int ret;
1753         size_t size;
1754         *found = 0;
1755         u64 csum_end;
1756         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1757
1758         btrfs_init_path(&path);
1759
1760         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1761         key.offset = start;
1762         key.type = BTRFS_EXTENT_CSUM_KEY;
1763
1764         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1765                                 &key, &path, 0, 0);
1766         if (ret < 0)
1767                 goto out;
1768         if (ret > 0 && path.slots[0] > 0) {
1769                 leaf = path.nodes[0];
1770                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1771                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1772                     key.type == BTRFS_EXTENT_CSUM_KEY)
1773                         path.slots[0]--;
1774         }
1775
1776         while (len > 0) {
1777                 leaf = path.nodes[0];
1778                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1779                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1780                         if (ret > 0)
1781                                 break;
1782                         else if (ret < 0)
1783                                 goto out;
1784                         leaf = path.nodes[0];
1785                 }
1786
1787                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1788                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1789                     key.type != BTRFS_EXTENT_CSUM_KEY)
1790                         break;
1791
1792                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1793                 if (key.offset >= start + len)
1794                         break;
1795
1796                 if (key.offset > start)
1797                         start = key.offset;
1798
1799                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1800                 csum_end = key.offset + (size / csum_size) *
1801                            root->fs_info->sectorsize;
1802                 if (csum_end > start) {
1803                         size = min(csum_end - start, len);
1804                         len -= size;
1805                         start += size;
1806                         *found += size;
1807                 }
1808
1809                 path.slots[0]++;
1810         }
1811 out:
1812         btrfs_release_path(&path);
1813         if (ret < 0)
1814                 return ret;
1815         return 0;
1816 }
1817
1818 static int process_file_extent(struct btrfs_root *root,
1819                                 struct extent_buffer *eb,
1820                                 int slot, struct btrfs_key *key,
1821                                 struct shared_node *active_node)
1822 {
1823         struct inode_record *rec;
1824         struct btrfs_file_extent_item *fi;
1825         u64 num_bytes = 0;
1826         u64 disk_bytenr = 0;
1827         u64 extent_offset = 0;
1828         u64 mask = root->fs_info->sectorsize - 1;
1829         int extent_type;
1830         int ret;
1831
1832         rec = active_node->current;
1833         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1834         rec->found_file_extent = 1;
1835
1836         if (rec->extent_start == (u64)-1) {
1837                 rec->extent_start = key->offset;
1838                 rec->extent_end = key->offset;
1839         }
1840
1841         if (rec->extent_end > key->offset)
1842                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1843         else if (rec->extent_end < key->offset) {
1844                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1845                                            key->offset - rec->extent_end);
1846                 if (ret < 0)
1847                         return ret;
1848         }
1849
1850         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1851         extent_type = btrfs_file_extent_type(eb, fi);
1852
1853         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1854                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1855                 if (num_bytes == 0)
1856                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1857                 rec->found_size += num_bytes;
1858                 num_bytes = (num_bytes + mask) & ~mask;
1859         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1860                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1861                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1862                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1863                 extent_offset = btrfs_file_extent_offset(eb, fi);
1864                 if (num_bytes == 0 || (num_bytes & mask))
1865                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1866                 if (num_bytes + extent_offset >
1867                     btrfs_file_extent_ram_bytes(eb, fi))
1868                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1869                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1870                     (btrfs_file_extent_compression(eb, fi) ||
1871                      btrfs_file_extent_encryption(eb, fi) ||
1872                      btrfs_file_extent_other_encoding(eb, fi)))
1873                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1874                 if (disk_bytenr > 0)
1875                         rec->found_size += num_bytes;
1876         } else {
1877                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1878         }
1879         rec->extent_end = key->offset + num_bytes;
1880
1881         /*
1882          * The data reloc tree will copy full extents into its inode and then
1883          * copy the corresponding csums.  Because the extent it copied could be
1884          * a preallocated extent that hasn't been written to yet there may be no
1885          * csums to copy, ergo we won't have csums for our file extent.  This is
1886          * ok so just don't bother checking csums if the inode belongs to the
1887          * data reloc tree.
1888          */
1889         if (disk_bytenr > 0 &&
1890             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1891                 u64 found;
1892                 if (btrfs_file_extent_compression(eb, fi))
1893                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1894                 else
1895                         disk_bytenr += extent_offset;
1896
1897                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1898                 if (ret < 0)
1899                         return ret;
1900                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1901                         if (found > 0)
1902                                 rec->found_csum_item = 1;
1903                         if (found < num_bytes)
1904                                 rec->some_csum_missing = 1;
1905                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1906                         if (found > 0)
1907                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1908                 }
1909         }
1910         return 0;
1911 }
1912
1913 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1914                             struct walk_control *wc)
1915 {
1916         struct btrfs_key key;
1917         u32 nritems;
1918         int i;
1919         int ret = 0;
1920         struct cache_tree *inode_cache;
1921         struct shared_node *active_node;
1922
1923         if (wc->root_level == wc->active_node &&
1924             btrfs_root_refs(&root->root_item) == 0)
1925                 return 0;
1926
1927         active_node = wc->nodes[wc->active_node];
1928         inode_cache = &active_node->inode_cache;
1929         nritems = btrfs_header_nritems(eb);
1930         for (i = 0; i < nritems; i++) {
1931                 btrfs_item_key_to_cpu(eb, &key, i);
1932
1933                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1934                         continue;
1935                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1936                         continue;
1937
1938                 if (active_node->current == NULL ||
1939                     active_node->current->ino < key.objectid) {
1940                         if (active_node->current) {
1941                                 active_node->current->checked = 1;
1942                                 maybe_free_inode_rec(inode_cache,
1943                                                      active_node->current);
1944                         }
1945                         active_node->current = get_inode_rec(inode_cache,
1946                                                              key.objectid, 1);
1947                         BUG_ON(IS_ERR(active_node->current));
1948                 }
1949                 switch (key.type) {
1950                 case BTRFS_DIR_ITEM_KEY:
1951                 case BTRFS_DIR_INDEX_KEY:
1952                         ret = process_dir_item(eb, i, &key, active_node);
1953                         break;
1954                 case BTRFS_INODE_REF_KEY:
1955                         ret = process_inode_ref(eb, i, &key, active_node);
1956                         break;
1957                 case BTRFS_INODE_EXTREF_KEY:
1958                         ret = process_inode_extref(eb, i, &key, active_node);
1959                         break;
1960                 case BTRFS_INODE_ITEM_KEY:
1961                         ret = process_inode_item(eb, i, &key, active_node);
1962                         break;
1963                 case BTRFS_EXTENT_DATA_KEY:
1964                         ret = process_file_extent(root, eb, i, &key,
1965                                                   active_node);
1966                         break;
1967                 default:
1968                         break;
1969                 };
1970         }
1971         return ret;
1972 }
1973
1974 struct node_refs {
1975         u64 bytenr[BTRFS_MAX_LEVEL];
1976         u64 refs[BTRFS_MAX_LEVEL];
1977         int need_check[BTRFS_MAX_LEVEL];
1978         /* field for checking all trees */
1979         int checked[BTRFS_MAX_LEVEL];
1980         /* the corresponding extent should be marked as full backref or not */
1981         int full_backref[BTRFS_MAX_LEVEL];
1982 };
1983
1984 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1985                              struct extent_buffer *eb, struct node_refs *nrefs,
1986                              u64 level, int check_all);
1987 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1988                             unsigned int ext_ref);
1989
1990 /*
1991  * Returns >0  Found error, not fatal, should continue
1992  * Returns <0  Fatal error, must exit the whole check
1993  * Returns 0   No errors found
1994  */
1995 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1996                                struct node_refs *nrefs, int *level, int ext_ref)
1997 {
1998         struct extent_buffer *cur = path->nodes[0];
1999         struct btrfs_key key;
2000         u64 cur_bytenr;
2001         u32 nritems;
2002         u64 first_ino = 0;
2003         int root_level = btrfs_header_level(root->node);
2004         int i;
2005         int ret = 0; /* Final return value */
2006         int err = 0; /* Positive error bitmap */
2007
2008         cur_bytenr = cur->start;
2009
2010         /* skip to first inode item or the first inode number change */
2011         nritems = btrfs_header_nritems(cur);
2012         for (i = 0; i < nritems; i++) {
2013                 btrfs_item_key_to_cpu(cur, &key, i);
2014                 if (i == 0)
2015                         first_ino = key.objectid;
2016                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2017                     (first_ino && first_ino != key.objectid))
2018                         break;
2019         }
2020         if (i == nritems) {
2021                 path->slots[0] = nritems;
2022                 return 0;
2023         }
2024         path->slots[0] = i;
2025
2026 again:
2027         err |= check_inode_item(root, path, ext_ref);
2028
2029         /* modify cur since check_inode_item may change path */
2030         cur = path->nodes[0];
2031
2032         if (err & LAST_ITEM)
2033                 goto out;
2034
2035         /* still have inode items in thie leaf */
2036         if (cur->start == cur_bytenr)
2037                 goto again;
2038
2039         /*
2040          * we have switched to another leaf, above nodes may
2041          * have changed, here walk down the path, if a node
2042          * or leaf is shared, check whether we can skip this
2043          * node or leaf.
2044          */
2045         for (i = root_level; i >= 0; i--) {
2046                 if (path->nodes[i]->start == nrefs->bytenr[i])
2047                         continue;
2048
2049                 ret = update_nodes_refs(root, path->nodes[i]->start,
2050                                 path->nodes[i], nrefs, i, 0);
2051                 if (ret)
2052                         goto out;
2053
2054                 if (!nrefs->need_check[i]) {
2055                         *level += 1;
2056                         break;
2057                 }
2058         }
2059
2060         for (i = 0; i < *level; i++) {
2061                 free_extent_buffer(path->nodes[i]);
2062                 path->nodes[i] = NULL;
2063         }
2064 out:
2065         err &= ~LAST_ITEM;
2066         if (err && !ret)
2067                 ret = err;
2068         return ret;
2069 }
2070
2071 static void reada_walk_down(struct btrfs_root *root,
2072                             struct extent_buffer *node, int slot)
2073 {
2074         struct btrfs_fs_info *fs_info = root->fs_info;
2075         u64 bytenr;
2076         u64 ptr_gen;
2077         u32 nritems;
2078         int i;
2079         int level;
2080
2081         level = btrfs_header_level(node);
2082         if (level != 1)
2083                 return;
2084
2085         nritems = btrfs_header_nritems(node);
2086         for (i = slot; i < nritems; i++) {
2087                 bytenr = btrfs_node_blockptr(node, i);
2088                 ptr_gen = btrfs_node_ptr_generation(node, i);
2089                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2090         }
2091 }
2092
2093 /*
2094  * Check the child node/leaf by the following condition:
2095  * 1. the first item key of the node/leaf should be the same with the one
2096  *    in parent.
2097  * 2. block in parent node should match the child node/leaf.
2098  * 3. generation of parent node and child's header should be consistent.
2099  *
2100  * Or the child node/leaf pointed by the key in parent is not valid.
2101  *
2102  * We hope to check leaf owner too, but since subvol may share leaves,
2103  * which makes leaf owner check not so strong, key check should be
2104  * sufficient enough for that case.
2105  */
2106 static int check_child_node(struct extent_buffer *parent, int slot,
2107                             struct extent_buffer *child)
2108 {
2109         struct btrfs_key parent_key;
2110         struct btrfs_key child_key;
2111         int ret = 0;
2112
2113         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2114         if (btrfs_header_level(child) == 0)
2115                 btrfs_item_key_to_cpu(child, &child_key, 0);
2116         else
2117                 btrfs_node_key_to_cpu(child, &child_key, 0);
2118
2119         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2120                 ret = -EINVAL;
2121                 fprintf(stderr,
2122                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2123                         parent_key.objectid, parent_key.type, parent_key.offset,
2124                         child_key.objectid, child_key.type, child_key.offset);
2125         }
2126         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2127                 ret = -EINVAL;
2128                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2129                         btrfs_node_blockptr(parent, slot),
2130                         btrfs_header_bytenr(child));
2131         }
2132         if (btrfs_node_ptr_generation(parent, slot) !=
2133             btrfs_header_generation(child)) {
2134                 ret = -EINVAL;
2135                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2136                         btrfs_header_generation(child),
2137                         btrfs_node_ptr_generation(parent, slot));
2138         }
2139         return ret;
2140 }
2141
2142 /*
2143  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2144  * in every fs or file tree check. Here we find its all root ids, and only check
2145  * it in the fs or file tree which has the smallest root id.
2146  */
2147 static int need_check(struct btrfs_root *root, struct ulist *roots)
2148 {
2149         struct rb_node *node;
2150         struct ulist_node *u;
2151
2152         /*
2153          * @roots can be empty if it belongs to tree reloc tree
2154          * In that case, we should always check the leaf, as we can't use
2155          * the tree owner to ensure some other root will check it.
2156          */
2157         if (roots->nnodes == 1 || roots->nnodes == 0)
2158                 return 1;
2159
2160         node = rb_first(&roots->root);
2161         u = rb_entry(node, struct ulist_node, rb_node);
2162         /*
2163          * current root id is not smallest, we skip it and let it be checked
2164          * in the fs or file tree who hash the smallest root id.
2165          */
2166         if (root->objectid != u->val)
2167                 return 0;
2168
2169         return 1;
2170 }
2171
2172 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
2173                                u64 *flags_ret)
2174 {
2175         struct btrfs_root *extent_root = root->fs_info->extent_root;
2176         struct btrfs_root_item *ri = &root->root_item;
2177         struct btrfs_extent_inline_ref *iref;
2178         struct btrfs_extent_item *ei;
2179         struct btrfs_key key;
2180         struct btrfs_path *path = NULL;
2181         unsigned long ptr;
2182         unsigned long end;
2183         u64 flags;
2184         u64 owner = 0;
2185         u64 offset;
2186         int slot;
2187         int type;
2188         int ret = 0;
2189
2190         /*
2191          * Except file/reloc tree, we can not have FULL BACKREF MODE
2192          */
2193         if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
2194                 goto normal;
2195
2196         /* root node */
2197         if (eb->start == btrfs_root_bytenr(ri))
2198                 goto normal;
2199
2200         if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
2201                 goto full_backref;
2202
2203         owner = btrfs_header_owner(eb);
2204         if (owner == root->objectid)
2205                 goto normal;
2206
2207         path = btrfs_alloc_path();
2208         if (!path)
2209                 return -ENOMEM;
2210
2211         key.objectid = btrfs_header_bytenr(eb);
2212         key.type = (u8)-1;
2213         key.offset = (u64)-1;
2214
2215         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2216         if (ret <= 0) {
2217                 ret = -EIO;
2218                 goto out;
2219         }
2220
2221         if (ret > 0) {
2222                 ret = btrfs_previous_extent_item(extent_root, path,
2223                                                  key.objectid);
2224                 if (ret)
2225                         goto full_backref;
2226
2227         }
2228         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2229
2230         eb = path->nodes[0];
2231         slot = path->slots[0];
2232         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
2233
2234         flags = btrfs_extent_flags(eb, ei);
2235         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2236                 goto full_backref;
2237
2238         ptr = (unsigned long)(ei + 1);
2239         end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
2240
2241         if (key.type == BTRFS_EXTENT_ITEM_KEY)
2242                 ptr += sizeof(struct btrfs_tree_block_info);
2243
2244 next:
2245         /* Reached extent item ends normally */
2246         if (ptr == end)
2247                 goto full_backref;
2248
2249         /* Beyond extent item end, wrong item size */
2250         if (ptr > end) {
2251                 error("extent item at bytenr %llu slot %d has wrong size",
2252                         eb->start, slot);
2253                 goto full_backref;
2254         }
2255
2256         iref = (struct btrfs_extent_inline_ref *)ptr;
2257         offset = btrfs_extent_inline_ref_offset(eb, iref);
2258         type = btrfs_extent_inline_ref_type(eb, iref);
2259
2260         if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
2261                 goto normal;
2262         ptr += btrfs_extent_inline_ref_size(type);
2263         goto next;
2264
2265 normal:
2266         *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
2267         goto out;
2268
2269 full_backref:
2270         *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2271 out:
2272         btrfs_free_path(path);
2273         return ret;
2274 }
2275
2276 /*
2277  * for a tree node or leaf, we record its reference count, so later if we still
2278  * process this node or leaf, don't need to compute its reference count again.
2279  *
2280  * @bytenr  if @bytenr == (u64)-1, only update nrefs->full_backref[level]
2281  */
2282 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2283                              struct extent_buffer *eb, struct node_refs *nrefs,
2284                              u64 level, int check_all)
2285 {
2286         struct ulist *roots;
2287         u64 refs = 0;
2288         u64 flags = 0;
2289         int root_level = btrfs_header_level(root->node);
2290         int check;
2291         int ret;
2292
2293         if (nrefs->bytenr[level] == bytenr)
2294                 return 0;
2295
2296         if (bytenr != (u64)-1) {
2297                 /* the return value of this function seems a mistake */
2298                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2299                                        level, 1, &refs, &flags);
2300                 /* temporary fix */
2301                 if (ret < 0 && !check_all)
2302                         return ret;
2303
2304                 nrefs->bytenr[level] = bytenr;
2305                 nrefs->refs[level] = refs;
2306                 nrefs->full_backref[level] = 0;
2307                 nrefs->checked[level] = 0;
2308
2309                 if (refs > 1) {
2310                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2311                                                    0, &roots);
2312                         if (ret)
2313                                 return -EIO;
2314
2315                         check = need_check(root, roots);
2316                         ulist_free(roots);
2317                         nrefs->need_check[level] = check;
2318                 } else {
2319                         if (!check_all) {
2320                                 nrefs->need_check[level] = 1;
2321                         } else {
2322                                 if (level == root_level) {
2323                                         nrefs->need_check[level] = 1;
2324                                 } else {
2325                                         /*
2326                                          * The node refs may have not been
2327                                          * updated if upper needs checking (the
2328                                          * lowest root_objectid) the node can
2329                                          * be checked.
2330                                          */
2331                                         nrefs->need_check[level] =
2332                                                 nrefs->need_check[level + 1];
2333                                 }
2334                         }
2335                 }
2336         }
2337
2338         if (check_all && eb) {
2339                 calc_extent_flag_v2(root, eb, &flags);
2340                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2341                         nrefs->full_backref[level] = 1;
2342         }
2343
2344         return 0;
2345 }
2346
2347 /*
2348  * @level           if @level == -1 means extent data item
2349  *                  else normal treeblocl.
2350  */
2351 static int should_check_extent_strictly(struct btrfs_root *root,
2352                                         struct node_refs *nrefs, int level)
2353 {
2354         int root_level = btrfs_header_level(root->node);
2355
2356         if (level > root_level || level < -1)
2357                 return 1;
2358         if (level == root_level)
2359                 return 1;
2360         /*
2361          * if the upper node is marked full backref, it should contain shared
2362          * backref of the parent (except owner == root->objectid).
2363          */
2364         while (++level <= root_level)
2365                 if (nrefs->refs[level] > 1)
2366                         return 0;
2367
2368         return 1;
2369 }
2370
2371 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2372                           struct walk_control *wc, int *level,
2373                           struct node_refs *nrefs)
2374 {
2375         enum btrfs_tree_block_status status;
2376         u64 bytenr;
2377         u64 ptr_gen;
2378         struct btrfs_fs_info *fs_info = root->fs_info;
2379         struct extent_buffer *next;
2380         struct extent_buffer *cur;
2381         int ret, err = 0;
2382         u64 refs;
2383
2384         WARN_ON(*level < 0);
2385         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2386
2387         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2388                 refs = nrefs->refs[*level];
2389                 ret = 0;
2390         } else {
2391                 ret = btrfs_lookup_extent_info(NULL, root,
2392                                        path->nodes[*level]->start,
2393                                        *level, 1, &refs, NULL);
2394                 if (ret < 0) {
2395                         err = ret;
2396                         goto out;
2397                 }
2398                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2399                 nrefs->refs[*level] = refs;
2400         }
2401
2402         if (refs > 1) {
2403                 ret = enter_shared_node(root, path->nodes[*level]->start,
2404                                         refs, wc, *level);
2405                 if (ret > 0) {
2406                         err = ret;
2407                         goto out;
2408                 }
2409         }
2410
2411         while (*level >= 0) {
2412                 WARN_ON(*level < 0);
2413                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2414                 cur = path->nodes[*level];
2415
2416                 if (btrfs_header_level(cur) != *level)
2417                         WARN_ON(1);
2418
2419                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2420                         break;
2421                 if (*level == 0) {
2422                         ret = process_one_leaf(root, cur, wc);
2423                         if (ret < 0)
2424                                 err = ret;
2425                         break;
2426                 }
2427                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2428                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2429
2430                 if (bytenr == nrefs->bytenr[*level - 1]) {
2431                         refs = nrefs->refs[*level - 1];
2432                 } else {
2433                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2434                                         *level - 1, 1, &refs, NULL);
2435                         if (ret < 0) {
2436                                 refs = 0;
2437                         } else {
2438                                 nrefs->bytenr[*level - 1] = bytenr;
2439                                 nrefs->refs[*level - 1] = refs;
2440                         }
2441                 }
2442
2443                 if (refs > 1) {
2444                         ret = enter_shared_node(root, bytenr, refs,
2445                                                 wc, *level - 1);
2446                         if (ret > 0) {
2447                                 path->slots[*level]++;
2448                                 continue;
2449                         }
2450                 }
2451
2452                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2453                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2454                         free_extent_buffer(next);
2455                         reada_walk_down(root, cur, path->slots[*level]);
2456                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2457                         if (!extent_buffer_uptodate(next)) {
2458                                 struct btrfs_key node_key;
2459
2460                                 btrfs_node_key_to_cpu(path->nodes[*level],
2461                                                       &node_key,
2462                                                       path->slots[*level]);
2463                                 btrfs_add_corrupt_extent_record(root->fs_info,
2464                                                 &node_key,
2465                                                 path->nodes[*level]->start,
2466                                                 root->fs_info->nodesize,
2467                                                 *level);
2468                                 err = -EIO;
2469                                 goto out;
2470                         }
2471                 }
2472
2473                 ret = check_child_node(cur, path->slots[*level], next);
2474                 if (ret) {
2475                         free_extent_buffer(next);
2476                         err = ret;
2477                         goto out;
2478                 }
2479
2480                 if (btrfs_is_leaf(next))
2481                         status = btrfs_check_leaf(root, NULL, next);
2482                 else
2483                         status = btrfs_check_node(root, NULL, next);
2484                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2485                         free_extent_buffer(next);
2486                         err = -EIO;
2487                         goto out;
2488                 }
2489
2490                 *level = *level - 1;
2491                 free_extent_buffer(path->nodes[*level]);
2492                 path->nodes[*level] = next;
2493                 path->slots[*level] = 0;
2494         }
2495 out:
2496         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2497         return err;
2498 }
2499
2500 static int fs_root_objectid(u64 objectid);
2501
2502 /*
2503  * Update global fs information.
2504  */
2505 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2506                          int level)
2507 {
2508         u32 free_nrs;
2509         struct extent_buffer *eb = path->nodes[level];
2510
2511         total_btree_bytes += eb->len;
2512         if (fs_root_objectid(root->objectid))
2513                 total_fs_tree_bytes += eb->len;
2514         if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2515                 total_extent_tree_bytes += eb->len;
2516
2517         if (level == 0) {
2518                 btree_space_waste += btrfs_leaf_free_space(root, eb);
2519         } else {
2520                 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root) -
2521                             btrfs_header_nritems(eb));
2522                 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2523         }
2524 }
2525
2526 /*
2527  * This function only handles BACKREF_MISSING,
2528  * If corresponding extent item exists, increase the ref, else insert an extent
2529  * item and backref.
2530  *
2531  * Returns error bits after repair.
2532  */
2533 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2534                                  struct btrfs_root *root,
2535                                  struct extent_buffer *node,
2536                                  struct node_refs *nrefs, int level, int err)
2537 {
2538         struct btrfs_fs_info *fs_info = root->fs_info;
2539         struct btrfs_root *extent_root = fs_info->extent_root;
2540         struct btrfs_path path;
2541         struct btrfs_extent_item *ei;
2542         struct btrfs_tree_block_info *bi;
2543         struct btrfs_key key;
2544         struct extent_buffer *eb;
2545         u32 size = sizeof(*ei);
2546         u32 node_size = root->fs_info->nodesize;
2547         int insert_extent = 0;
2548         int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2549         int root_level = btrfs_header_level(root->node);
2550         int generation;
2551         int ret;
2552         u64 owner;
2553         u64 bytenr;
2554         u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2555         u64 parent = 0;
2556
2557         if ((err & BACKREF_MISSING) == 0)
2558                 return err;
2559
2560         WARN_ON(level > BTRFS_MAX_LEVEL);
2561         WARN_ON(level < 0);
2562
2563         btrfs_init_path(&path);
2564         bytenr = btrfs_header_bytenr(node);
2565         owner = btrfs_header_owner(node);
2566         generation = btrfs_header_generation(node);
2567
2568         key.objectid = bytenr;
2569         key.type = (u8)-1;
2570         key.offset = (u64)-1;
2571
2572         /* Search for the extent item */
2573         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2574         if (ret <= 0) {
2575                 ret = -EIO;
2576                 goto out;
2577         }
2578
2579         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2580         if (ret)
2581                 insert_extent = 1;
2582
2583         /* calculate if the extent item flag is full backref or not */
2584         if (nrefs->full_backref[level] != 0)
2585                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2586
2587         /* insert an extent item */
2588         if (insert_extent) {
2589                 struct btrfs_disk_key copy_key;
2590
2591                 generation = btrfs_header_generation(node);
2592
2593                 if (level < root_level && nrefs->full_backref[level + 1] &&
2594                     owner != root->objectid) {
2595                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2596                 }
2597
2598                 key.objectid = bytenr;
2599                 if (!skinny_metadata) {
2600                         key.type = BTRFS_EXTENT_ITEM_KEY;
2601                         key.offset = node_size;
2602                         size += sizeof(*bi);
2603                 } else {
2604                         key.type = BTRFS_METADATA_ITEM_KEY;
2605                         key.offset = level;
2606                 }
2607
2608                 btrfs_release_path(&path);
2609                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2610                                               size);
2611                 if (ret)
2612                         goto out;
2613
2614                 eb = path.nodes[0];
2615                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2616
2617                 btrfs_set_extent_refs(eb, ei, 0);
2618                 btrfs_set_extent_generation(eb, ei, generation);
2619                 btrfs_set_extent_flags(eb, ei, flags);
2620
2621                 if (!skinny_metadata) {
2622                         bi = (struct btrfs_tree_block_info *)(ei + 1);
2623                         memset_extent_buffer(eb, 0, (unsigned long)bi,
2624                                              sizeof(*bi));
2625                         btrfs_set_disk_key_objectid(&copy_key, root->objectid);
2626                         btrfs_set_disk_key_type(&copy_key, 0);
2627                         btrfs_set_disk_key_offset(&copy_key, 0);
2628
2629                         btrfs_set_tree_block_level(eb, bi, level);
2630                         btrfs_set_tree_block_key(eb, bi, &copy_key);
2631                 }
2632                 btrfs_mark_buffer_dirty(eb);
2633                 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2634                 btrfs_update_block_group(trans, extent_root, bytenr, node_size,
2635                                          1, 0);
2636
2637                 nrefs->refs[level] = 0;
2638                 nrefs->full_backref[level] =
2639                         flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2640                 btrfs_release_path(&path);
2641         }
2642
2643         if (level < root_level && nrefs->full_backref[level + 1] &&
2644             owner != root->objectid)
2645                 parent = nrefs->bytenr[level + 1];
2646
2647         /* increase the ref */
2648         ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2649                         parent, root->objectid, level, 0);
2650
2651         nrefs->refs[level]++;
2652 out:
2653         btrfs_release_path(&path);
2654         if (ret) {
2655                 error(
2656         "failed to repair tree block ref start %llu root %llu due to %s",
2657                       bytenr, root->objectid, strerror(-ret));
2658         } else {
2659                 printf("Added one tree block ref start %llu %s %llu\n",
2660                        bytenr, parent ? "parent" : "root",
2661                        parent ? parent : root->objectid);
2662                 err &= ~BACKREF_MISSING;
2663         }
2664
2665         return err;
2666 }
2667
2668 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2669                             unsigned int ext_ref);
2670 static int check_tree_block_ref(struct btrfs_root *root,
2671                                 struct extent_buffer *eb, u64 bytenr,
2672                                 int level, u64 owner, struct node_refs *nrefs);
2673 static int check_leaf_items(struct btrfs_trans_handle *trans,
2674                             struct btrfs_root *root, struct btrfs_path *path,
2675                             struct node_refs *nrefs, int account_bytes);
2676
2677 /*
2678  * @trans      just for lowmem repair mode
2679  * @check all  if not 0 then check all tree block backrefs and items
2680  *             0 then just check relationship of items in fs tree(s)
2681  *
2682  * Returns >0  Found error, should continue
2683  * Returns <0  Fatal error, must exit the whole check
2684  * Returns 0   No errors found
2685  */
2686 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2687                              struct btrfs_root *root, struct btrfs_path *path,
2688                              int *level, struct node_refs *nrefs, int ext_ref,
2689                              int check_all)
2690
2691 {
2692         enum btrfs_tree_block_status status;
2693         u64 bytenr;
2694         u64 ptr_gen;
2695         struct btrfs_fs_info *fs_info = root->fs_info;
2696         struct extent_buffer *next;
2697         struct extent_buffer *cur;
2698         int ret;
2699         int err = 0;
2700         int check;
2701         int account_file_data = 0;
2702
2703         WARN_ON(*level < 0);
2704         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2705
2706         ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2707                                 path->nodes[*level], nrefs, *level, check_all);
2708         if (ret < 0)
2709                 return ret;
2710
2711         while (*level >= 0) {
2712                 WARN_ON(*level < 0);
2713                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2714                 cur = path->nodes[*level];
2715                 bytenr = btrfs_header_bytenr(cur);
2716                 check = nrefs->need_check[*level];
2717
2718                 if (btrfs_header_level(cur) != *level)
2719                         WARN_ON(1);
2720                /*
2721                 * Update bytes accounting and check tree block ref
2722                 * NOTE: Doing accounting and check before checking nritems
2723                 * is necessary because of empty node/leaf.
2724                 */
2725                 if ((check_all && !nrefs->checked[*level]) ||
2726                     (!check_all && nrefs->need_check[*level])) {
2727                         ret = check_tree_block_ref(root, cur,
2728                            btrfs_header_bytenr(cur), btrfs_header_level(cur),
2729                            btrfs_header_owner(cur), nrefs);
2730
2731                         if (repair && ret)
2732                                 ret = repair_tree_block_ref(trans, root,
2733                                     path->nodes[*level], nrefs, *level, ret);
2734                         err |= ret;
2735
2736                         if (check_all && nrefs->need_check[*level] &&
2737                                 nrefs->refs[*level]) {
2738                                 account_bytes(root, path, *level);
2739                                 account_file_data = 1;
2740                         }
2741                         nrefs->checked[*level] = 1;
2742                 }
2743
2744                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2745                         break;
2746
2747                 /* Don't forgot to check leaf/node validation */
2748                 if (*level == 0) {
2749                         /* skip duplicate check */
2750                         if (check || !check_all) {
2751                                 ret = btrfs_check_leaf(root, NULL, cur);
2752                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2753                                         err |= -EIO;
2754                                         break;
2755                                 }
2756                         }
2757
2758                         ret = 0;
2759                         if (!check_all)
2760                                 ret = process_one_leaf_v2(root, path, nrefs,
2761                                                           level, ext_ref);
2762                         else
2763                                 ret = check_leaf_items(trans, root, path,
2764                                                nrefs, account_file_data);
2765                         err |= ret;
2766                         break;
2767                 } else {
2768                         if (check || !check_all) {
2769                                 ret = btrfs_check_node(root, NULL, cur);
2770                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2771                                         err |= -EIO;
2772                                         break;
2773                                 }
2774                         }
2775                 }
2776
2777                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2778                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2779
2780                 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2781                                         check_all);
2782                 if (ret < 0)
2783                         break;
2784                 /*
2785                  * check all trees in check_chunks_and_extent_v2
2786                  * check shared node once in check_fs_roots
2787                  */
2788                 if (!check_all && !nrefs->need_check[*level - 1]) {
2789                         path->slots[*level]++;
2790                         continue;
2791                 }
2792
2793                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2794                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2795                         free_extent_buffer(next);
2796                         reada_walk_down(root, cur, path->slots[*level]);
2797                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2798                         if (!extent_buffer_uptodate(next)) {
2799                                 struct btrfs_key node_key;
2800
2801                                 btrfs_node_key_to_cpu(path->nodes[*level],
2802                                                       &node_key,
2803                                                       path->slots[*level]);
2804                                 btrfs_add_corrupt_extent_record(fs_info,
2805                                         &node_key, path->nodes[*level]->start,
2806                                         fs_info->nodesize, *level);
2807                                 err |= -EIO;
2808                                 break;
2809                         }
2810                 }
2811
2812                 ret = check_child_node(cur, path->slots[*level], next);
2813                 err |= ret;
2814                 if (ret < 0) 
2815                         break;
2816
2817                 if (btrfs_is_leaf(next))
2818                         status = btrfs_check_leaf(root, NULL, next);
2819                 else
2820                         status = btrfs_check_node(root, NULL, next);
2821                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2822                         free_extent_buffer(next);
2823                         err |= -EIO;
2824                         break;
2825                 }
2826
2827                 *level = *level - 1;
2828                 free_extent_buffer(path->nodes[*level]);
2829                 path->nodes[*level] = next;
2830                 path->slots[*level] = 0;
2831                 account_file_data = 0;
2832
2833                 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2834         }
2835         return err;
2836 }
2837
2838 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2839                         struct walk_control *wc, int *level)
2840 {
2841         int i;
2842         struct extent_buffer *leaf;
2843
2844         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2845                 leaf = path->nodes[i];
2846                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2847                         path->slots[i]++;
2848                         *level = i;
2849                         return 0;
2850                 } else {
2851                         free_extent_buffer(path->nodes[*level]);
2852                         path->nodes[*level] = NULL;
2853                         BUG_ON(*level > wc->active_node);
2854                         if (*level == wc->active_node)
2855                                 leave_shared_node(root, wc, *level);
2856                         *level = i + 1;
2857                 }
2858         }
2859         return 1;
2860 }
2861
2862 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2863                            int *level)
2864 {
2865         int i;
2866         struct extent_buffer *leaf;
2867
2868         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2869                 leaf = path->nodes[i];
2870                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2871                         path->slots[i]++;
2872                         *level = i;
2873                         return 0;
2874                 } else {
2875                         free_extent_buffer(path->nodes[*level]);
2876                         path->nodes[*level] = NULL;
2877                         *level = i + 1;
2878                 }
2879         }
2880         return 1;
2881 }
2882
2883 static int check_root_dir(struct inode_record *rec)
2884 {
2885         struct inode_backref *backref;
2886         int ret = -1;
2887
2888         if (!rec->found_inode_item || rec->errors)
2889                 goto out;
2890         if (rec->nlink != 1 || rec->found_link != 0)
2891                 goto out;
2892         if (list_empty(&rec->backrefs))
2893                 goto out;
2894         backref = to_inode_backref(rec->backrefs.next);
2895         if (!backref->found_inode_ref)
2896                 goto out;
2897         if (backref->index != 0 || backref->namelen != 2 ||
2898             memcmp(backref->name, "..", 2))
2899                 goto out;
2900         if (backref->found_dir_index || backref->found_dir_item)
2901                 goto out;
2902         ret = 0;
2903 out:
2904         return ret;
2905 }
2906
2907 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2908                               struct btrfs_root *root, struct btrfs_path *path,
2909                               struct inode_record *rec)
2910 {
2911         struct btrfs_inode_item *ei;
2912         struct btrfs_key key;
2913         int ret;
2914
2915         key.objectid = rec->ino;
2916         key.type = BTRFS_INODE_ITEM_KEY;
2917         key.offset = (u64)-1;
2918
2919         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2920         if (ret < 0)
2921                 goto out;
2922         if (ret) {
2923                 if (!path->slots[0]) {
2924                         ret = -ENOENT;
2925                         goto out;
2926                 }
2927                 path->slots[0]--;
2928                 ret = 0;
2929         }
2930         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2931         if (key.objectid != rec->ino) {
2932                 ret = -ENOENT;
2933                 goto out;
2934         }
2935
2936         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2937                             struct btrfs_inode_item);
2938         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2939         btrfs_mark_buffer_dirty(path->nodes[0]);
2940         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2941         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2942                root->root_key.objectid);
2943 out:
2944         btrfs_release_path(path);
2945         return ret;
2946 }
2947
2948 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2949                                     struct btrfs_root *root,
2950                                     struct btrfs_path *path,
2951                                     struct inode_record *rec)
2952 {
2953         int ret;
2954
2955         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2956         btrfs_release_path(path);
2957         if (!ret)
2958                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2959         return ret;
2960 }
2961
2962 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2963                                struct btrfs_root *root,
2964                                struct btrfs_path *path,
2965                                struct inode_record *rec)
2966 {
2967         struct btrfs_inode_item *ei;
2968         struct btrfs_key key;
2969         int ret = 0;
2970
2971         key.objectid = rec->ino;
2972         key.type = BTRFS_INODE_ITEM_KEY;
2973         key.offset = 0;
2974
2975         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2976         if (ret) {
2977                 if (ret > 0)
2978                         ret = -ENOENT;
2979                 goto out;
2980         }
2981
2982         /* Since ret == 0, no need to check anything */
2983         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2984                             struct btrfs_inode_item);
2985         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2986         btrfs_mark_buffer_dirty(path->nodes[0]);
2987         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2988         printf("reset nbytes for ino %llu root %llu\n",
2989                rec->ino, root->root_key.objectid);
2990 out:
2991         btrfs_release_path(path);
2992         return ret;
2993 }
2994
2995 static int add_missing_dir_index(struct btrfs_root *root,
2996                                  struct cache_tree *inode_cache,
2997                                  struct inode_record *rec,
2998                                  struct inode_backref *backref)
2999 {
3000         struct btrfs_path path;
3001         struct btrfs_trans_handle *trans;
3002         struct btrfs_dir_item *dir_item;
3003         struct extent_buffer *leaf;
3004         struct btrfs_key key;
3005         struct btrfs_disk_key disk_key;
3006         struct inode_record *dir_rec;
3007         unsigned long name_ptr;
3008         u32 data_size = sizeof(*dir_item) + backref->namelen;
3009         int ret;
3010
3011         trans = btrfs_start_transaction(root, 1);
3012         if (IS_ERR(trans))
3013                 return PTR_ERR(trans);
3014
3015         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
3016                 (unsigned long long)rec->ino);
3017
3018         btrfs_init_path(&path);
3019         key.objectid = backref->dir;
3020         key.type = BTRFS_DIR_INDEX_KEY;
3021         key.offset = backref->index;
3022         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
3023         BUG_ON(ret);
3024
3025         leaf = path.nodes[0];
3026         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
3027
3028         disk_key.objectid = cpu_to_le64(rec->ino);
3029         disk_key.type = BTRFS_INODE_ITEM_KEY;
3030         disk_key.offset = 0;
3031
3032         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
3033         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
3034         btrfs_set_dir_data_len(leaf, dir_item, 0);
3035         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
3036         name_ptr = (unsigned long)(dir_item + 1);
3037         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
3038         btrfs_mark_buffer_dirty(leaf);
3039         btrfs_release_path(&path);
3040         btrfs_commit_transaction(trans, root);
3041
3042         backref->found_dir_index = 1;
3043         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
3044         BUG_ON(IS_ERR(dir_rec));
3045         if (!dir_rec)
3046                 return 0;
3047         dir_rec->found_size += backref->namelen;
3048         if (dir_rec->found_size == dir_rec->isize &&
3049             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
3050                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
3051         if (dir_rec->found_size != dir_rec->isize)
3052                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
3053
3054         return 0;
3055 }
3056
3057 static int delete_dir_index(struct btrfs_root *root,
3058                             struct inode_backref *backref)
3059 {
3060         struct btrfs_trans_handle *trans;
3061         struct btrfs_dir_item *di;
3062         struct btrfs_path path;
3063         int ret = 0;
3064
3065         trans = btrfs_start_transaction(root, 1);
3066         if (IS_ERR(trans))
3067                 return PTR_ERR(trans);
3068
3069         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
3070                 (unsigned long long)backref->dir,
3071                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
3072                 (unsigned long long)root->objectid);
3073
3074         btrfs_init_path(&path);
3075         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
3076                                     backref->name, backref->namelen,
3077                                     backref->index, -1);
3078         if (IS_ERR(di)) {
3079                 ret = PTR_ERR(di);
3080                 btrfs_release_path(&path);
3081                 btrfs_commit_transaction(trans, root);
3082                 if (ret == -ENOENT)
3083                         return 0;
3084                 return ret;
3085         }
3086
3087         if (!di)
3088                 ret = btrfs_del_item(trans, root, &path);
3089         else
3090                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
3091         BUG_ON(ret);
3092         btrfs_release_path(&path);
3093         btrfs_commit_transaction(trans, root);
3094         return ret;
3095 }
3096
3097 static int __create_inode_item(struct btrfs_trans_handle *trans,
3098                                struct btrfs_root *root, u64 ino, u64 size,
3099                                u64 nbytes, u64 nlink, u32 mode)
3100 {
3101         struct btrfs_inode_item ii;
3102         time_t now = time(NULL);
3103         int ret;
3104
3105         btrfs_set_stack_inode_size(&ii, size);
3106         btrfs_set_stack_inode_nbytes(&ii, nbytes);
3107         btrfs_set_stack_inode_nlink(&ii, nlink);
3108         btrfs_set_stack_inode_mode(&ii, mode);
3109         btrfs_set_stack_inode_generation(&ii, trans->transid);
3110         btrfs_set_stack_timespec_nsec(&ii.atime, 0);
3111         btrfs_set_stack_timespec_sec(&ii.ctime, now);
3112         btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
3113         btrfs_set_stack_timespec_sec(&ii.mtime, now);
3114         btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
3115         btrfs_set_stack_timespec_sec(&ii.otime, 0);
3116         btrfs_set_stack_timespec_nsec(&ii.otime, 0);
3117
3118         ret = btrfs_insert_inode(trans, root, ino, &ii);
3119         ASSERT(!ret);
3120
3121         warning("root %llu inode %llu recreating inode item, this may "
3122                 "be incomplete, please check permissions and content after "
3123                 "the fsck completes.\n", (unsigned long long)root->objectid,
3124                 (unsigned long long)ino);
3125
3126         return 0;
3127 }
3128
3129 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
3130                                     struct btrfs_root *root, u64 ino,
3131                                     u8 filetype)
3132 {
3133         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
3134
3135         return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
3136 }
3137
3138 static int create_inode_item(struct btrfs_root *root,
3139                              struct inode_record *rec, int root_dir)
3140 {
3141         struct btrfs_trans_handle *trans;
3142         u64 nlink = 0;
3143         u32 mode = 0;
3144         u64 size = 0;
3145         int ret;
3146
3147         trans = btrfs_start_transaction(root, 1);
3148         if (IS_ERR(trans)) {
3149                 ret = PTR_ERR(trans);
3150                 return ret;
3151         }
3152
3153         nlink = root_dir ? 1 : rec->found_link;
3154         if (rec->found_dir_item) {
3155                 if (rec->found_file_extent)
3156                         fprintf(stderr, "root %llu inode %llu has both a dir "
3157                                 "item and extents, unsure if it is a dir or a "
3158                                 "regular file so setting it as a directory\n",
3159                                 (unsigned long long)root->objectid,
3160                                 (unsigned long long)rec->ino);
3161                 mode = S_IFDIR | 0755;
3162                 size = rec->found_size;
3163         } else if (!rec->found_dir_item) {
3164                 size = rec->extent_end;
3165                 mode =  S_IFREG | 0755;
3166         }
3167
3168         ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
3169                                   nlink, mode);
3170         btrfs_commit_transaction(trans, root);
3171         return 0;
3172 }
3173
3174 static int repair_inode_backrefs(struct btrfs_root *root,
3175                                  struct inode_record *rec,
3176                                  struct cache_tree *inode_cache,
3177                                  int delete)
3178 {
3179         struct inode_backref *tmp, *backref;
3180         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3181         int ret = 0;
3182         int repaired = 0;
3183
3184         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3185                 if (!delete && rec->ino == root_dirid) {
3186                         if (!rec->found_inode_item) {
3187                                 ret = create_inode_item(root, rec, 1);
3188                                 if (ret)
3189                                         break;
3190                                 repaired++;
3191                         }
3192                 }
3193
3194                 /* Index 0 for root dir's are special, don't mess with it */
3195                 if (rec->ino == root_dirid && backref->index == 0)
3196                         continue;
3197
3198                 if (delete &&
3199                     ((backref->found_dir_index && !backref->found_inode_ref) ||
3200                      (backref->found_dir_index && backref->found_inode_ref &&
3201                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
3202                         ret = delete_dir_index(root, backref);
3203                         if (ret)
3204                                 break;
3205                         repaired++;
3206                         list_del(&backref->list);
3207                         free(backref);
3208                         continue;
3209                 }
3210
3211                 if (!delete && !backref->found_dir_index &&
3212                     backref->found_dir_item && backref->found_inode_ref) {
3213                         ret = add_missing_dir_index(root, inode_cache, rec,
3214                                                     backref);
3215                         if (ret)
3216                                 break;
3217                         repaired++;
3218                         if (backref->found_dir_item &&
3219                             backref->found_dir_index) {
3220                                 if (!backref->errors &&
3221                                     backref->found_inode_ref) {
3222                                         list_del(&backref->list);
3223                                         free(backref);
3224                                         continue;
3225                                 }
3226                         }
3227                 }
3228
3229                 if (!delete && (!backref->found_dir_index &&
3230                                 !backref->found_dir_item &&
3231                                 backref->found_inode_ref)) {
3232                         struct btrfs_trans_handle *trans;
3233                         struct btrfs_key location;
3234
3235                         ret = check_dir_conflict(root, backref->name,
3236                                                  backref->namelen,
3237                                                  backref->dir,
3238                                                  backref->index);
3239                         if (ret) {
3240                                 /*
3241                                  * let nlink fixing routine to handle it,
3242                                  * which can do it better.
3243                                  */
3244                                 ret = 0;
3245                                 break;
3246                         }
3247                         location.objectid = rec->ino;
3248                         location.type = BTRFS_INODE_ITEM_KEY;
3249                         location.offset = 0;
3250
3251                         trans = btrfs_start_transaction(root, 1);
3252                         if (IS_ERR(trans)) {
3253                                 ret = PTR_ERR(trans);
3254                                 break;
3255                         }
3256                         fprintf(stderr, "adding missing dir index/item pair "
3257                                 "for inode %llu\n",
3258                                 (unsigned long long)rec->ino);
3259                         ret = btrfs_insert_dir_item(trans, root, backref->name,
3260                                                     backref->namelen,
3261                                                     backref->dir, &location,
3262                                                     imode_to_type(rec->imode),
3263                                                     backref->index);
3264                         BUG_ON(ret);
3265                         btrfs_commit_transaction(trans, root);
3266                         repaired++;
3267                 }
3268
3269                 if (!delete && (backref->found_inode_ref &&
3270                                 backref->found_dir_index &&
3271                                 backref->found_dir_item &&
3272                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
3273                                 !rec->found_inode_item)) {
3274                         ret = create_inode_item(root, rec, 0);
3275                         if (ret)
3276                                 break;
3277                         repaired++;
3278                 }
3279
3280         }
3281         return ret ? ret : repaired;
3282 }
3283
3284 /*
3285  * To determine the file type for nlink/inode_item repair
3286  *
3287  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
3288  * Return -ENOENT if file type is not found.
3289  */
3290 static int find_file_type(struct inode_record *rec, u8 *type)
3291 {
3292         struct inode_backref *backref;
3293
3294         /* For inode item recovered case */
3295         if (rec->found_inode_item) {
3296                 *type = imode_to_type(rec->imode);
3297                 return 0;
3298         }
3299
3300         list_for_each_entry(backref, &rec->backrefs, list) {
3301                 if (backref->found_dir_index || backref->found_dir_item) {
3302                         *type = backref->filetype;
3303                         return 0;
3304                 }
3305         }
3306         return -ENOENT;
3307 }
3308
3309 /*
3310  * To determine the file name for nlink repair
3311  *
3312  * Return 0 if file name is found, set name and namelen.
3313  * Return -ENOENT if file name is not found.
3314  */
3315 static int find_file_name(struct inode_record *rec,
3316                           char *name, int *namelen)
3317 {
3318         struct inode_backref *backref;
3319
3320         list_for_each_entry(backref, &rec->backrefs, list) {
3321                 if (backref->found_dir_index || backref->found_dir_item ||
3322                     backref->found_inode_ref) {
3323                         memcpy(name, backref->name, backref->namelen);
3324                         *namelen = backref->namelen;
3325                         return 0;
3326                 }
3327         }
3328         return -ENOENT;
3329 }
3330
3331 /* Reset the nlink of the inode to the correct one */
3332 static int reset_nlink(struct btrfs_trans_handle *trans,
3333                        struct btrfs_root *root,
3334                        struct btrfs_path *path,
3335                        struct inode_record *rec)
3336 {
3337         struct inode_backref *backref;
3338         struct inode_backref *tmp;
3339         struct btrfs_key key;
3340         struct btrfs_inode_item *inode_item;
3341         int ret = 0;
3342
3343         /* We don't believe this either, reset it and iterate backref */
3344         rec->found_link = 0;
3345
3346         /* Remove all backref including the valid ones */
3347         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3348                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3349                                    backref->index, backref->name,
3350                                    backref->namelen, 0);
3351                 if (ret < 0)
3352                         goto out;
3353
3354                 /* remove invalid backref, so it won't be added back */
3355                 if (!(backref->found_dir_index &&
3356                       backref->found_dir_item &&
3357                       backref->found_inode_ref)) {
3358                         list_del(&backref->list);
3359                         free(backref);
3360                 } else {
3361                         rec->found_link++;
3362                 }
3363         }
3364
3365         /* Set nlink to 0 */
3366         key.objectid = rec->ino;
3367         key.type = BTRFS_INODE_ITEM_KEY;
3368         key.offset = 0;
3369         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3370         if (ret < 0)
3371                 goto out;
3372         if (ret > 0) {
3373                 ret = -ENOENT;
3374                 goto out;
3375         }
3376         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3377                                     struct btrfs_inode_item);
3378         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3379         btrfs_mark_buffer_dirty(path->nodes[0]);
3380         btrfs_release_path(path);
3381
3382         /*
3383          * Add back valid inode_ref/dir_item/dir_index,
3384          * add_link() will handle the nlink inc, so new nlink must be correct
3385          */
3386         list_for_each_entry(backref, &rec->backrefs, list) {
3387                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3388                                      backref->name, backref->namelen,
3389                                      backref->filetype, &backref->index, 1, 0);
3390                 if (ret < 0)
3391                         goto out;
3392         }
3393 out:
3394         btrfs_release_path(path);
3395         return ret;
3396 }
3397
3398 static int get_highest_inode(struct btrfs_trans_handle *trans,
3399                                 struct btrfs_root *root,
3400                                 struct btrfs_path *path,
3401                                 u64 *highest_ino)
3402 {
3403         struct btrfs_key key, found_key;
3404         int ret;
3405
3406         btrfs_init_path(path);
3407         key.objectid = BTRFS_LAST_FREE_OBJECTID;
3408         key.offset = -1;
3409         key.type = BTRFS_INODE_ITEM_KEY;
3410         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3411         if (ret == 1) {
3412                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3413                                 path->slots[0] - 1);
3414                 *highest_ino = found_key.objectid;
3415                 ret = 0;
3416         }
3417         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3418                 ret = -EOVERFLOW;
3419         btrfs_release_path(path);
3420         return ret;
3421 }
3422
3423 /*
3424  * Link inode to dir 'lost+found'. Increase @ref_count.
3425  *
3426  * Returns 0 means success.
3427  * Returns <0 means failure.
3428  */
3429 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3430                                    struct btrfs_root *root,
3431                                    struct btrfs_path *path,
3432                                    u64 ino, char *namebuf, u32 name_len,
3433                                    u8 filetype, u64 *ref_count)
3434 {
3435         char *dir_name = "lost+found";
3436         u64 lost_found_ino;
3437         int ret;
3438         u32 mode = 0700;
3439
3440         btrfs_release_path(path);
3441         ret = get_highest_inode(trans, root, path, &lost_found_ino);
3442         if (ret < 0)
3443                 goto out;
3444         lost_found_ino++;
3445
3446         ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3447                           BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3448                           mode);
3449         if (ret < 0) {
3450                 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3451                 goto out;
3452         }
3453         ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3454                              namebuf, name_len, filetype, NULL, 1, 0);
3455         /*
3456          * Add ".INO" suffix several times to handle case where
3457          * "FILENAME.INO" is already taken by another file.
3458          */
3459         while (ret == -EEXIST) {
3460                 /*
3461                  * Conflicting file name, add ".INO" as suffix * +1 for '.'
3462                  */
3463                 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3464                         ret = -EFBIG;
3465                         goto out;
3466                 }
3467                 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3468                          ".%llu", ino);
3469                 name_len += count_digits(ino) + 1;
3470                 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3471                                      name_len, filetype, NULL, 1, 0);
3472         }
3473         if (ret < 0) {
3474                 error("failed to link the inode %llu to %s dir: %s",
3475                       ino, dir_name, strerror(-ret));
3476                 goto out;
3477         }
3478
3479         ++*ref_count;
3480         printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3481                name_len, namebuf, dir_name);
3482 out:
3483         btrfs_release_path(path);
3484         if (ret)
3485                 error("failed to move file '%.*s' to '%s' dir", name_len,
3486                                 namebuf, dir_name);
3487         return ret;
3488 }
3489
3490 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3491                                struct btrfs_root *root,
3492                                struct btrfs_path *path,
3493                                struct inode_record *rec)
3494 {
3495         char namebuf[BTRFS_NAME_LEN] = {0};
3496         u8 type = 0;
3497         int namelen = 0;
3498         int name_recovered = 0;
3499         int type_recovered = 0;
3500         int ret = 0;
3501
3502         /*
3503          * Get file name and type first before these invalid inode ref
3504          * are deleted by remove_all_invalid_backref()
3505          */
3506         name_recovered = !find_file_name(rec, namebuf, &namelen);
3507         type_recovered = !find_file_type(rec, &type);
3508
3509         if (!name_recovered) {
3510                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3511                        rec->ino, rec->ino);
3512                 namelen = count_digits(rec->ino);
3513                 sprintf(namebuf, "%llu", rec->ino);
3514                 name_recovered = 1;
3515         }
3516         if (!type_recovered) {
3517                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3518                        rec->ino);
3519                 type = BTRFS_FT_REG_FILE;
3520                 type_recovered = 1;
3521         }
3522
3523         ret = reset_nlink(trans, root, path, rec);
3524         if (ret < 0) {
3525                 fprintf(stderr,
3526                         "Failed to reset nlink for inode %llu: %s\n",
3527                         rec->ino, strerror(-ret));
3528                 goto out;
3529         }
3530
3531         if (rec->found_link == 0) {
3532                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3533                                               namebuf, namelen, type,
3534                                               (u64 *)&rec->found_link);
3535                 if (ret)
3536                         goto out;
3537         }
3538         printf("Fixed the nlink of inode %llu\n", rec->ino);
3539 out:
3540         /*
3541          * Clear the flag anyway, or we will loop forever for the same inode
3542          * as it will not be removed from the bad inode list and the dead loop
3543          * happens.
3544          */
3545         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3546         btrfs_release_path(path);
3547         return ret;
3548 }
3549
3550 /*
3551  * Check if there is any normal(reg or prealloc) file extent for given
3552  * ino.
3553  * This is used to determine the file type when neither its dir_index/item or
3554  * inode_item exists.
3555  *
3556  * This will *NOT* report error, if any error happens, just consider it does
3557  * not have any normal file extent.
3558  */
3559 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3560 {
3561         struct btrfs_path path;
3562         struct btrfs_key key;
3563         struct btrfs_key found_key;
3564         struct btrfs_file_extent_item *fi;
3565         u8 type;
3566         int ret = 0;
3567
3568         btrfs_init_path(&path);
3569         key.objectid = ino;
3570         key.type = BTRFS_EXTENT_DATA_KEY;
3571         key.offset = 0;
3572
3573         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3574         if (ret < 0) {
3575                 ret = 0;
3576                 goto out;
3577         }
3578         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3579                 ret = btrfs_next_leaf(root, &path);
3580                 if (ret) {
3581                         ret = 0;
3582                         goto out;
3583                 }
3584         }
3585         while (1) {
3586                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3587                                       path.slots[0]);
3588                 if (found_key.objectid != ino ||
3589                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3590                         break;
3591                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3592                                     struct btrfs_file_extent_item);
3593                 type = btrfs_file_extent_type(path.nodes[0], fi);
3594                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3595                         ret = 1;
3596                         goto out;
3597                 }
3598         }
3599 out:
3600         btrfs_release_path(&path);
3601         return ret;
3602 }
3603
3604 static u32 btrfs_type_to_imode(u8 type)
3605 {
3606         static u32 imode_by_btrfs_type[] = {
3607                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3608                 [BTRFS_FT_DIR]          = S_IFDIR,
3609                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3610                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3611                 [BTRFS_FT_FIFO]         = S_IFIFO,
3612                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3613                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3614         };
3615
3616         return imode_by_btrfs_type[(type)];
3617 }
3618
3619 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3620                                 struct btrfs_root *root,
3621                                 struct btrfs_path *path,
3622                                 struct inode_record *rec)
3623 {
3624         u8 filetype;
3625         u32 mode = 0700;
3626         int type_recovered = 0;
3627         int ret = 0;
3628
3629         printf("Trying to rebuild inode:%llu\n", rec->ino);
3630
3631         type_recovered = !find_file_type(rec, &filetype);
3632
3633         /*
3634          * Try to determine inode type if type not found.
3635          *
3636          * For found regular file extent, it must be FILE.
3637          * For found dir_item/index, it must be DIR.
3638          *
3639          * For undetermined one, use FILE as fallback.
3640          *
3641          * TODO:
3642          * 1. If found backref(inode_index/item is already handled) to it,
3643          *    it must be DIR.
3644          *    Need new inode-inode ref structure to allow search for that.
3645          */
3646         if (!type_recovered) {
3647                 if (rec->found_file_extent &&
3648                     find_normal_file_extent(root, rec->ino)) {
3649                         type_recovered = 1;
3650                         filetype = BTRFS_FT_REG_FILE;
3651                 } else if (rec->found_dir_item) {
3652                         type_recovered = 1;
3653                         filetype = BTRFS_FT_DIR;
3654                 } else if (!list_empty(&rec->orphan_extents)) {
3655                         type_recovered = 1;
3656                         filetype = BTRFS_FT_REG_FILE;
3657                 } else{
3658                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3659                                rec->ino);
3660                         type_recovered = 1;
3661                         filetype = BTRFS_FT_REG_FILE;
3662                 }
3663         }
3664
3665         ret = btrfs_new_inode(trans, root, rec->ino,
3666                               mode | btrfs_type_to_imode(filetype));
3667         if (ret < 0)
3668                 goto out;
3669
3670         /*
3671          * Here inode rebuild is done, we only rebuild the inode item,
3672          * don't repair the nlink(like move to lost+found).
3673          * That is the job of nlink repair.
3674          *
3675          * We just fill the record and return
3676          */
3677         rec->found_dir_item = 1;
3678         rec->imode = mode | btrfs_type_to_imode(filetype);
3679         rec->nlink = 0;
3680         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3681         /* Ensure the inode_nlinks repair function will be called */
3682         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3683 out:
3684         return ret;
3685 }
3686
3687 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3688                                       struct btrfs_root *root,
3689                                       struct btrfs_path *path,
3690                                       struct inode_record *rec)
3691 {
3692         struct orphan_data_extent *orphan;
3693         struct orphan_data_extent *tmp;
3694         int ret = 0;
3695
3696         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3697                 /*
3698                  * Check for conflicting file extents
3699                  *
3700                  * Here we don't know whether the extents is compressed or not,
3701                  * so we can only assume it not compressed nor data offset,
3702                  * and use its disk_len as extent length.
3703                  */
3704                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3705                                        orphan->offset, orphan->disk_len, 0);
3706                 btrfs_release_path(path);
3707                 if (ret < 0)
3708                         goto out;
3709                 if (!ret) {
3710                         fprintf(stderr,
3711                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3712                                 orphan->disk_bytenr, orphan->disk_len);
3713                         ret = btrfs_free_extent(trans,
3714                                         root->fs_info->extent_root,
3715                                         orphan->disk_bytenr, orphan->disk_len,
3716                                         0, root->objectid, orphan->objectid,
3717                                         orphan->offset);
3718                         if (ret < 0)
3719                                 goto out;
3720                 }
3721                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3722                                 orphan->offset, orphan->disk_bytenr,
3723                                 orphan->disk_len, orphan->disk_len);
3724                 if (ret < 0)
3725                         goto out;
3726
3727                 /* Update file size info */
3728                 rec->found_size += orphan->disk_len;
3729                 if (rec->found_size == rec->nbytes)
3730                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3731
3732                 /* Update the file extent hole info too */
3733                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3734                                            orphan->disk_len);
3735                 if (ret < 0)
3736                         goto out;
3737                 if (RB_EMPTY_ROOT(&rec->holes))
3738                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3739
3740                 list_del(&orphan->list);
3741                 free(orphan);
3742         }
3743         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3744 out:
3745         return ret;
3746 }
3747
3748 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3749                                         struct btrfs_root *root,
3750                                         struct btrfs_path *path,
3751                                         struct inode_record *rec)
3752 {
3753         struct rb_node *node;
3754         struct file_extent_hole *hole;
3755         int found = 0;
3756         int ret = 0;
3757
3758         node = rb_first(&rec->holes);
3759
3760         while (node) {
3761                 found = 1;
3762                 hole = rb_entry(node, struct file_extent_hole, node);
3763                 ret = btrfs_punch_hole(trans, root, rec->ino,
3764                                        hole->start, hole->len);
3765                 if (ret < 0)
3766                         goto out;
3767                 ret = del_file_extent_hole(&rec->holes, hole->start,
3768                                            hole->len);
3769                 if (ret < 0)
3770                         goto out;
3771                 if (RB_EMPTY_ROOT(&rec->holes))
3772                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3773                 node = rb_first(&rec->holes);
3774         }
3775         /* special case for a file losing all its file extent */
3776         if (!found) {
3777                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3778                                        round_up(rec->isize,
3779                                                 root->fs_info->sectorsize));
3780                 if (ret < 0)
3781                         goto out;
3782         }
3783         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3784                rec->ino, root->objectid);
3785 out:
3786         return ret;
3787 }
3788
3789 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3790 {
3791         struct btrfs_trans_handle *trans;
3792         struct btrfs_path path;
3793         int ret = 0;
3794
3795         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3796                              I_ERR_NO_ORPHAN_ITEM |
3797                              I_ERR_LINK_COUNT_WRONG |
3798                              I_ERR_NO_INODE_ITEM |
3799                              I_ERR_FILE_EXTENT_ORPHAN |
3800                              I_ERR_FILE_EXTENT_DISCOUNT|
3801                              I_ERR_FILE_NBYTES_WRONG)))
3802                 return rec->errors;
3803
3804         /*
3805          * For nlink repair, it may create a dir and add link, so
3806          * 2 for parent(256)'s dir_index and dir_item
3807          * 2 for lost+found dir's inode_item and inode_ref
3808          * 1 for the new inode_ref of the file
3809          * 2 for lost+found dir's dir_index and dir_item for the file
3810          */
3811         trans = btrfs_start_transaction(root, 7);
3812         if (IS_ERR(trans))
3813                 return PTR_ERR(trans);
3814
3815         btrfs_init_path(&path);
3816         if (rec->errors & I_ERR_NO_INODE_ITEM)
3817                 ret = repair_inode_no_item(trans, root, &path, rec);
3818         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3819                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3820         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3821                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3822         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3823                 ret = repair_inode_isize(trans, root, &path, rec);
3824         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3825                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3826         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3827                 ret = repair_inode_nlinks(trans, root, &path, rec);
3828         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3829                 ret = repair_inode_nbytes(trans, root, &path, rec);
3830         btrfs_commit_transaction(trans, root);
3831         btrfs_release_path(&path);
3832         return ret;
3833 }
3834
3835 static int check_inode_recs(struct btrfs_root *root,
3836                             struct cache_tree *inode_cache)
3837 {
3838         struct cache_extent *cache;
3839         struct ptr_node *node;
3840         struct inode_record *rec;
3841         struct inode_backref *backref;
3842         int stage = 0;
3843         int ret = 0;
3844         int err = 0;
3845         u64 error = 0;
3846         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3847
3848         if (btrfs_root_refs(&root->root_item) == 0) {
3849                 if (!cache_tree_empty(inode_cache))
3850                         fprintf(stderr, "warning line %d\n", __LINE__);
3851                 return 0;
3852         }
3853
3854         /*
3855          * We need to repair backrefs first because we could change some of the
3856          * errors in the inode recs.
3857          *
3858          * We also need to go through and delete invalid backrefs first and then
3859          * add the correct ones second.  We do this because we may get EEXIST
3860          * when adding back the correct index because we hadn't yet deleted the
3861          * invalid index.
3862          *
3863          * For example, if we were missing a dir index then the directories
3864          * isize would be wrong, so if we fixed the isize to what we thought it
3865          * would be and then fixed the backref we'd still have a invalid fs, so
3866          * we need to add back the dir index and then check to see if the isize
3867          * is still wrong.
3868          */
3869         while (stage < 3) {
3870                 stage++;
3871                 if (stage == 3 && !err)
3872                         break;
3873
3874                 cache = search_cache_extent(inode_cache, 0);
3875                 while (repair && cache) {
3876                         node = container_of(cache, struct ptr_node, cache);
3877                         rec = node->data;
3878                         cache = next_cache_extent(cache);
3879
3880                         /* Need to free everything up and rescan */
3881                         if (stage == 3) {
3882                                 remove_cache_extent(inode_cache, &node->cache);
3883                                 free(node);
3884                                 free_inode_rec(rec);
3885                                 continue;
3886                         }
3887
3888                         if (list_empty(&rec->backrefs))
3889                                 continue;
3890
3891                         ret = repair_inode_backrefs(root, rec, inode_cache,
3892                                                     stage == 1);
3893                         if (ret < 0) {
3894                                 err = ret;
3895                                 stage = 2;
3896                                 break;
3897                         } if (ret > 0) {
3898                                 err = -EAGAIN;
3899                         }
3900                 }
3901         }
3902         if (err)
3903                 return err;
3904
3905         rec = get_inode_rec(inode_cache, root_dirid, 0);
3906         BUG_ON(IS_ERR(rec));
3907         if (rec) {
3908                 ret = check_root_dir(rec);
3909                 if (ret) {
3910                         fprintf(stderr, "root %llu root dir %llu error\n",
3911                                 (unsigned long long)root->root_key.objectid,
3912                                 (unsigned long long)root_dirid);
3913                         print_inode_error(root, rec);
3914                         error++;
3915                 }
3916         } else {
3917                 if (repair) {
3918                         struct btrfs_trans_handle *trans;
3919
3920                         trans = btrfs_start_transaction(root, 1);
3921                         if (IS_ERR(trans)) {
3922                                 err = PTR_ERR(trans);
3923                                 return err;
3924                         }
3925
3926                         fprintf(stderr,
3927                                 "root %llu missing its root dir, recreating\n",
3928                                 (unsigned long long)root->objectid);
3929
3930                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3931                         BUG_ON(ret);
3932
3933                         btrfs_commit_transaction(trans, root);
3934                         return -EAGAIN;
3935                 }
3936
3937                 fprintf(stderr, "root %llu root dir %llu not found\n",
3938                         (unsigned long long)root->root_key.objectid,
3939                         (unsigned long long)root_dirid);
3940         }
3941
3942         while (1) {
3943                 cache = search_cache_extent(inode_cache, 0);
3944                 if (!cache)
3945                         break;
3946                 node = container_of(cache, struct ptr_node, cache);
3947                 rec = node->data;
3948                 remove_cache_extent(inode_cache, &node->cache);
3949                 free(node);
3950                 if (rec->ino == root_dirid ||
3951                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3952                         free_inode_rec(rec);
3953                         continue;
3954                 }
3955
3956                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3957                         ret = check_orphan_item(root, rec->ino);
3958                         if (ret == 0)
3959                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3960                         if (can_free_inode_rec(rec)) {
3961                                 free_inode_rec(rec);
3962                                 continue;
3963                         }
3964                 }
3965
3966                 if (!rec->found_inode_item)
3967                         rec->errors |= I_ERR_NO_INODE_ITEM;
3968                 if (rec->found_link != rec->nlink)
3969                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3970                 if (repair) {
3971                         ret = try_repair_inode(root, rec);
3972                         if (ret == 0 && can_free_inode_rec(rec)) {
3973                                 free_inode_rec(rec);
3974                                 continue;
3975                         }
3976                         ret = 0;
3977                 }
3978
3979                 if (!(repair && ret == 0))
3980                         error++;
3981                 print_inode_error(root, rec);
3982                 list_for_each_entry(backref, &rec->backrefs, list) {
3983                         if (!backref->found_dir_item)
3984                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3985                         if (!backref->found_dir_index)
3986                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3987                         if (!backref->found_inode_ref)
3988                                 backref->errors |= REF_ERR_NO_INODE_REF;
3989                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3990                                 " namelen %u name %s filetype %d errors %x",
3991                                 (unsigned long long)backref->dir,
3992                                 (unsigned long long)backref->index,
3993                                 backref->namelen, backref->name,
3994                                 backref->filetype, backref->errors);
3995                         print_ref_error(backref->errors);
3996                 }
3997                 free_inode_rec(rec);
3998         }
3999         return (error > 0) ? -1 : 0;
4000 }
4001
4002 static struct root_record *get_root_rec(struct cache_tree *root_cache,
4003                                         u64 objectid)
4004 {
4005         struct cache_extent *cache;
4006         struct root_record *rec = NULL;
4007         int ret;
4008
4009         cache = lookup_cache_extent(root_cache, objectid, 1);
4010         if (cache) {
4011                 rec = container_of(cache, struct root_record, cache);
4012         } else {
4013                 rec = calloc(1, sizeof(*rec));
4014                 if (!rec)
4015                         return ERR_PTR(-ENOMEM);
4016                 rec->objectid = objectid;
4017                 INIT_LIST_HEAD(&rec->backrefs);
4018                 rec->cache.start = objectid;
4019                 rec->cache.size = 1;
4020
4021                 ret = insert_cache_extent(root_cache, &rec->cache);
4022                 if (ret)
4023                         return ERR_PTR(-EEXIST);
4024         }
4025         return rec;
4026 }
4027
4028 static struct root_backref *get_root_backref(struct root_record *rec,
4029                                              u64 ref_root, u64 dir, u64 index,
4030                                              const char *name, int namelen)
4031 {
4032         struct root_backref *backref;
4033
4034         list_for_each_entry(backref, &rec->backrefs, list) {
4035                 if (backref->ref_root != ref_root || backref->dir != dir ||
4036                     backref->namelen != namelen)
4037                         continue;
4038                 if (memcmp(name, backref->name, namelen))
4039                         continue;
4040                 return backref;
4041         }
4042
4043         backref = calloc(1, sizeof(*backref) + namelen + 1);
4044         if (!backref)
4045                 return NULL;
4046         backref->ref_root = ref_root;
4047         backref->dir = dir;
4048         backref->index = index;
4049         backref->namelen = namelen;
4050         memcpy(backref->name, name, namelen);
4051         backref->name[namelen] = '\0';
4052         list_add_tail(&backref->list, &rec->backrefs);
4053         return backref;
4054 }
4055
4056 static void free_root_record(struct cache_extent *cache)
4057 {
4058         struct root_record *rec;
4059         struct root_backref *backref;
4060
4061         rec = container_of(cache, struct root_record, cache);
4062         while (!list_empty(&rec->backrefs)) {
4063                 backref = to_root_backref(rec->backrefs.next);
4064                 list_del(&backref->list);
4065                 free(backref);
4066         }
4067
4068         free(rec);
4069 }
4070
4071 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
4072
4073 static int add_root_backref(struct cache_tree *root_cache,
4074                             u64 root_id, u64 ref_root, u64 dir, u64 index,
4075                             const char *name, int namelen,
4076                             int item_type, int errors)
4077 {
4078         struct root_record *rec;
4079         struct root_backref *backref;
4080
4081         rec = get_root_rec(root_cache, root_id);
4082         BUG_ON(IS_ERR(rec));
4083         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
4084         BUG_ON(!backref);
4085
4086         backref->errors |= errors;
4087
4088         if (item_type != BTRFS_DIR_ITEM_KEY) {
4089                 if (backref->found_dir_index || backref->found_back_ref ||
4090                     backref->found_forward_ref) {
4091                         if (backref->index != index)
4092                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
4093                 } else {
4094                         backref->index = index;
4095                 }
4096         }
4097
4098         if (item_type == BTRFS_DIR_ITEM_KEY) {
4099                 if (backref->found_forward_ref)
4100                         rec->found_ref++;
4101                 backref->found_dir_item = 1;
4102         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
4103                 backref->found_dir_index = 1;
4104         } else if (item_type == BTRFS_ROOT_REF_KEY) {
4105                 if (backref->found_forward_ref)
4106                         backref->errors |= REF_ERR_DUP_ROOT_REF;
4107                 else if (backref->found_dir_item)
4108                         rec->found_ref++;
4109                 backref->found_forward_ref = 1;
4110         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
4111                 if (backref->found_back_ref)
4112                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
4113                 backref->found_back_ref = 1;
4114         } else {
4115                 BUG_ON(1);
4116         }
4117
4118         if (backref->found_forward_ref && backref->found_dir_item)
4119                 backref->reachable = 1;
4120         return 0;
4121 }
4122
4123 static int merge_root_recs(struct btrfs_root *root,
4124                            struct cache_tree *src_cache,
4125                            struct cache_tree *dst_cache)
4126 {
4127         struct cache_extent *cache;
4128         struct ptr_node *node;
4129         struct inode_record *rec;
4130         struct inode_backref *backref;
4131         int ret = 0;
4132
4133         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4134                 free_inode_recs_tree(src_cache);
4135                 return 0;
4136         }
4137
4138         while (1) {
4139                 cache = search_cache_extent(src_cache, 0);
4140                 if (!cache)
4141                         break;
4142                 node = container_of(cache, struct ptr_node, cache);
4143                 rec = node->data;
4144                 remove_cache_extent(src_cache, &node->cache);
4145                 free(node);
4146
4147                 ret = is_child_root(root, root->objectid, rec->ino);
4148                 if (ret < 0)
4149                         break;
4150                 else if (ret == 0)
4151                         goto skip;
4152
4153                 list_for_each_entry(backref, &rec->backrefs, list) {
4154                         BUG_ON(backref->found_inode_ref);
4155                         if (backref->found_dir_item)
4156                                 add_root_backref(dst_cache, rec->ino,
4157                                         root->root_key.objectid, backref->dir,
4158                                         backref->index, backref->name,
4159                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
4160                                         backref->errors);
4161                         if (backref->found_dir_index)
4162                                 add_root_backref(dst_cache, rec->ino,
4163                                         root->root_key.objectid, backref->dir,
4164                                         backref->index, backref->name,
4165                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
4166                                         backref->errors);
4167                 }
4168 skip:
4169                 free_inode_rec(rec);
4170         }
4171         if (ret < 0)
4172                 return ret;
4173         return 0;
4174 }
4175
4176 static int check_root_refs(struct btrfs_root *root,
4177                            struct cache_tree *root_cache)
4178 {
4179         struct root_record *rec;
4180         struct root_record *ref_root;
4181         struct root_backref *backref;
4182         struct cache_extent *cache;
4183         int loop = 1;
4184         int ret;
4185         int error;
4186         int errors = 0;
4187
4188         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
4189         BUG_ON(IS_ERR(rec));
4190         rec->found_ref = 1;
4191
4192         /* fixme: this can not detect circular references */
4193         while (loop) {
4194                 loop = 0;
4195                 cache = search_cache_extent(root_cache, 0);
4196                 while (1) {
4197                         if (!cache)
4198                                 break;
4199                         rec = container_of(cache, struct root_record, cache);
4200                         cache = next_cache_extent(cache);
4201
4202                         if (rec->found_ref == 0)
4203                                 continue;
4204
4205                         list_for_each_entry(backref, &rec->backrefs, list) {
4206                                 if (!backref->reachable)
4207                                         continue;
4208
4209                                 ref_root = get_root_rec(root_cache,
4210                                                         backref->ref_root);
4211                                 BUG_ON(IS_ERR(ref_root));
4212                                 if (ref_root->found_ref > 0)
4213                                         continue;
4214
4215                                 backref->reachable = 0;
4216                                 rec->found_ref--;
4217                                 if (rec->found_ref == 0)
4218                                         loop = 1;
4219                         }
4220                 }
4221         }
4222
4223         cache = search_cache_extent(root_cache, 0);
4224         while (1) {
4225                 if (!cache)
4226                         break;
4227                 rec = container_of(cache, struct root_record, cache);
4228                 cache = next_cache_extent(cache);
4229
4230                 if (rec->found_ref == 0 &&
4231                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
4232                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
4233                         ret = check_orphan_item(root->fs_info->tree_root,
4234                                                 rec->objectid);
4235                         if (ret == 0)
4236                                 continue;
4237
4238                         /*
4239                          * If we don't have a root item then we likely just have
4240                          * a dir item in a snapshot for this root but no actual
4241                          * ref key or anything so it's meaningless.
4242                          */
4243                         if (!rec->found_root_item)
4244                                 continue;
4245                         errors++;
4246                         fprintf(stderr, "fs tree %llu not referenced\n",
4247                                 (unsigned long long)rec->objectid);
4248                 }
4249
4250                 error = 0;
4251                 if (rec->found_ref > 0 && !rec->found_root_item)
4252                         error = 1;
4253                 list_for_each_entry(backref, &rec->backrefs, list) {
4254                         if (!backref->found_dir_item)
4255                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
4256                         if (!backref->found_dir_index)
4257                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
4258                         if (!backref->found_back_ref)
4259                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
4260                         if (!backref->found_forward_ref)
4261                                 backref->errors |= REF_ERR_NO_ROOT_REF;
4262                         if (backref->reachable && backref->errors)
4263                                 error = 1;
4264                 }
4265                 if (!error)
4266                         continue;
4267
4268                 errors++;
4269                 fprintf(stderr, "fs tree %llu refs %u %s\n",
4270                         (unsigned long long)rec->objectid, rec->found_ref,
4271                          rec->found_root_item ? "" : "not found");
4272
4273                 list_for_each_entry(backref, &rec->backrefs, list) {
4274                         if (!backref->reachable)
4275                                 continue;
4276                         if (!backref->errors && rec->found_root_item)
4277                                 continue;
4278                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
4279                                 " index %llu namelen %u name %s errors %x\n",
4280                                 (unsigned long long)backref->ref_root,
4281                                 (unsigned long long)backref->dir,
4282                                 (unsigned long long)backref->index,
4283                                 backref->namelen, backref->name,
4284                                 backref->errors);
4285                         print_ref_error(backref->errors);
4286                 }
4287         }
4288         return errors > 0 ? 1 : 0;
4289 }
4290
4291 static int process_root_ref(struct extent_buffer *eb, int slot,
4292                             struct btrfs_key *key,
4293                             struct cache_tree *root_cache)
4294 {
4295         u64 dirid;
4296         u64 index;
4297         u32 len;
4298         u32 name_len;
4299         struct btrfs_root_ref *ref;
4300         char namebuf[BTRFS_NAME_LEN];
4301         int error;
4302
4303         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
4304
4305         dirid = btrfs_root_ref_dirid(eb, ref);
4306         index = btrfs_root_ref_sequence(eb, ref);
4307         name_len = btrfs_root_ref_name_len(eb, ref);
4308
4309         if (name_len <= BTRFS_NAME_LEN) {
4310                 len = name_len;
4311                 error = 0;
4312         } else {
4313                 len = BTRFS_NAME_LEN;
4314                 error = REF_ERR_NAME_TOO_LONG;
4315         }
4316         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
4317
4318         if (key->type == BTRFS_ROOT_REF_KEY) {
4319                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
4320                                  index, namebuf, len, key->type, error);
4321         } else {
4322                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
4323                                  index, namebuf, len, key->type, error);
4324         }
4325         return 0;
4326 }
4327
4328 static void free_corrupt_block(struct cache_extent *cache)
4329 {
4330         struct btrfs_corrupt_block *corrupt;
4331
4332         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4333         free(corrupt);
4334 }
4335
4336 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4337
4338 /*
4339  * Repair the btree of the given root.
4340  *
4341  * The fix is to remove the node key in corrupt_blocks cache_tree.
4342  * and rebalance the tree.
4343  * After the fix, the btree should be writeable.
4344  */
4345 static int repair_btree(struct btrfs_root *root,
4346                         struct cache_tree *corrupt_blocks)
4347 {
4348         struct btrfs_trans_handle *trans;
4349         struct btrfs_path path;
4350         struct btrfs_corrupt_block *corrupt;
4351         struct cache_extent *cache;
4352         struct btrfs_key key;
4353         u64 offset;
4354         int level;
4355         int ret = 0;
4356
4357         if (cache_tree_empty(corrupt_blocks))
4358                 return 0;
4359
4360         trans = btrfs_start_transaction(root, 1);
4361         if (IS_ERR(trans)) {
4362                 ret = PTR_ERR(trans);
4363                 fprintf(stderr, "Error starting transaction: %s\n",
4364                         strerror(-ret));
4365                 return ret;
4366         }
4367         btrfs_init_path(&path);
4368         cache = first_cache_extent(corrupt_blocks);
4369         while (cache) {
4370                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4371                                        cache);
4372                 level = corrupt->level;
4373                 path.lowest_level = level;
4374                 key.objectid = corrupt->key.objectid;
4375                 key.type = corrupt->key.type;
4376                 key.offset = corrupt->key.offset;
4377
4378                 /*
4379                  * Here we don't want to do any tree balance, since it may
4380                  * cause a balance with corrupted brother leaf/node,
4381                  * so ins_len set to 0 here.
4382                  * Balance will be done after all corrupt node/leaf is deleted.
4383                  */
4384                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4385                 if (ret < 0)
4386                         goto out;
4387                 offset = btrfs_node_blockptr(path.nodes[level],
4388                                              path.slots[level]);
4389
4390                 /* Remove the ptr */
4391                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4392                 if (ret < 0)
4393                         goto out;
4394                 /*
4395                  * Remove the corresponding extent
4396                  * return value is not concerned.
4397                  */
4398                 btrfs_release_path(&path);
4399                 ret = btrfs_free_extent(trans, root, offset,
4400                                 root->fs_info->nodesize, 0,
4401                                 root->root_key.objectid, level - 1, 0);
4402                 cache = next_cache_extent(cache);
4403         }
4404
4405         /* Balance the btree using btrfs_search_slot() */
4406         cache = first_cache_extent(corrupt_blocks);
4407         while (cache) {
4408                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4409                                        cache);
4410                 memcpy(&key, &corrupt->key, sizeof(key));
4411                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4412                 if (ret < 0)
4413                         goto out;
4414                 /* return will always >0 since it won't find the item */
4415                 ret = 0;
4416                 btrfs_release_path(&path);
4417                 cache = next_cache_extent(cache);
4418         }
4419 out:
4420         btrfs_commit_transaction(trans, root);
4421         btrfs_release_path(&path);
4422         return ret;
4423 }
4424
4425 static int check_fs_root(struct btrfs_root *root,
4426                          struct cache_tree *root_cache,
4427                          struct walk_control *wc)
4428 {
4429         int ret = 0;
4430         int err = 0;
4431         int wret;
4432         int level;
4433         struct btrfs_path path;
4434         struct shared_node root_node;
4435         struct root_record *rec;
4436         struct btrfs_root_item *root_item = &root->root_item;
4437         struct cache_tree corrupt_blocks;
4438         struct orphan_data_extent *orphan;
4439         struct orphan_data_extent *tmp;
4440         enum btrfs_tree_block_status status;
4441         struct node_refs nrefs;
4442
4443         /*
4444          * Reuse the corrupt_block cache tree to record corrupted tree block
4445          *
4446          * Unlike the usage in extent tree check, here we do it in a per
4447          * fs/subvol tree base.
4448          */
4449         cache_tree_init(&corrupt_blocks);
4450         root->fs_info->corrupt_blocks = &corrupt_blocks;
4451
4452         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4453                 rec = get_root_rec(root_cache, root->root_key.objectid);
4454                 BUG_ON(IS_ERR(rec));
4455                 if (btrfs_root_refs(root_item) > 0)
4456                         rec->found_root_item = 1;
4457         }
4458
4459         btrfs_init_path(&path);
4460         memset(&root_node, 0, sizeof(root_node));
4461         cache_tree_init(&root_node.root_cache);
4462         cache_tree_init(&root_node.inode_cache);
4463         memset(&nrefs, 0, sizeof(nrefs));
4464
4465         /* Move the orphan extent record to corresponding inode_record */
4466         list_for_each_entry_safe(orphan, tmp,
4467                                  &root->orphan_data_extents, list) {
4468                 struct inode_record *inode;
4469
4470                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4471                                       1);
4472                 BUG_ON(IS_ERR(inode));
4473                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4474                 list_move(&orphan->list, &inode->orphan_extents);
4475         }
4476
4477         level = btrfs_header_level(root->node);
4478         memset(wc->nodes, 0, sizeof(wc->nodes));
4479         wc->nodes[level] = &root_node;
4480         wc->active_node = level;
4481         wc->root_level = level;
4482
4483         /* We may not have checked the root block, lets do that now */
4484         if (btrfs_is_leaf(root->node))
4485                 status = btrfs_check_leaf(root, NULL, root->node);
4486         else
4487                 status = btrfs_check_node(root, NULL, root->node);
4488         if (status != BTRFS_TREE_BLOCK_CLEAN)
4489                 return -EIO;
4490
4491         if (btrfs_root_refs(root_item) > 0 ||
4492             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4493                 path.nodes[level] = root->node;
4494                 extent_buffer_get(root->node);
4495                 path.slots[level] = 0;
4496         } else {
4497                 struct btrfs_key key;
4498                 struct btrfs_disk_key found_key;
4499
4500                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4501                 level = root_item->drop_level;
4502                 path.lowest_level = level;
4503                 if (level > btrfs_header_level(root->node) ||
4504                     level >= BTRFS_MAX_LEVEL) {
4505                         error("ignoring invalid drop level: %u", level);
4506                         goto skip_walking;
4507                 }
4508                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4509                 if (wret < 0)
4510                         goto skip_walking;
4511                 btrfs_node_key(path.nodes[level], &found_key,
4512                                 path.slots[level]);
4513                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4514                                         sizeof(found_key)));
4515         }
4516
4517         while (1) {
4518                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4519                 if (wret < 0)
4520                         ret = wret;
4521                 if (wret != 0)
4522                         break;
4523
4524                 wret = walk_up_tree(root, &path, wc, &level);
4525                 if (wret < 0)
4526                         ret = wret;
4527                 if (wret != 0)
4528                         break;
4529         }
4530 skip_walking:
4531         btrfs_release_path(&path);
4532
4533         if (!cache_tree_empty(&corrupt_blocks)) {
4534                 struct cache_extent *cache;
4535                 struct btrfs_corrupt_block *corrupt;
4536
4537                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4538                        root->root_key.objectid);
4539                 cache = first_cache_extent(&corrupt_blocks);
4540                 while (cache) {
4541                         corrupt = container_of(cache,
4542                                                struct btrfs_corrupt_block,
4543                                                cache);
4544                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4545                                cache->start, corrupt->level,
4546                                corrupt->key.objectid, corrupt->key.type,
4547                                corrupt->key.offset);
4548                         cache = next_cache_extent(cache);
4549                 }
4550                 if (repair) {
4551                         printf("Try to repair the btree for root %llu\n",
4552                                root->root_key.objectid);
4553                         ret = repair_btree(root, &corrupt_blocks);
4554                         if (ret < 0)
4555                                 fprintf(stderr, "Failed to repair btree: %s\n",
4556                                         strerror(-ret));
4557                         if (!ret)
4558                                 printf("Btree for root %llu is fixed\n",
4559                                        root->root_key.objectid);
4560                 }
4561         }
4562
4563         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4564         if (err < 0)
4565                 ret = err;
4566
4567         if (root_node.current) {
4568                 root_node.current->checked = 1;
4569                 maybe_free_inode_rec(&root_node.inode_cache,
4570                                 root_node.current);
4571         }
4572
4573         err = check_inode_recs(root, &root_node.inode_cache);
4574         if (!ret)
4575                 ret = err;
4576
4577         free_corrupt_blocks_tree(&corrupt_blocks);
4578         root->fs_info->corrupt_blocks = NULL;
4579         free_orphan_data_extents(&root->orphan_data_extents);
4580         return ret;
4581 }
4582
4583 static int fs_root_objectid(u64 objectid)
4584 {
4585         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4586             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4587                 return 1;
4588         return is_fstree(objectid);
4589 }
4590
4591 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4592                           struct cache_tree *root_cache)
4593 {
4594         struct btrfs_path path;
4595         struct btrfs_key key;
4596         struct walk_control wc;
4597         struct extent_buffer *leaf, *tree_node;
4598         struct btrfs_root *tmp_root;
4599         struct btrfs_root *tree_root = fs_info->tree_root;
4600         int ret;
4601         int err = 0;
4602
4603         if (ctx.progress_enabled) {
4604                 ctx.tp = TASK_FS_ROOTS;
4605                 task_start(ctx.info);
4606         }
4607
4608         /*
4609          * Just in case we made any changes to the extent tree that weren't
4610          * reflected into the free space cache yet.
4611          */
4612         if (repair)
4613                 reset_cached_block_groups(fs_info);
4614         memset(&wc, 0, sizeof(wc));
4615         cache_tree_init(&wc.shared);
4616         btrfs_init_path(&path);
4617
4618 again:
4619         key.offset = 0;
4620         key.objectid = 0;
4621         key.type = BTRFS_ROOT_ITEM_KEY;
4622         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4623         if (ret < 0) {
4624                 err = 1;
4625                 goto out;
4626         }
4627         tree_node = tree_root->node;
4628         while (1) {
4629                 if (tree_node != tree_root->node) {
4630                         free_root_recs_tree(root_cache);
4631                         btrfs_release_path(&path);
4632                         goto again;
4633                 }
4634                 leaf = path.nodes[0];
4635                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4636                         ret = btrfs_next_leaf(tree_root, &path);
4637                         if (ret) {
4638                                 if (ret < 0)
4639                                         err = 1;
4640                                 break;
4641                         }
4642                         leaf = path.nodes[0];
4643                 }
4644                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4645                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4646                     fs_root_objectid(key.objectid)) {
4647                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4648                                 tmp_root = btrfs_read_fs_root_no_cache(
4649                                                 fs_info, &key);
4650                         } else {
4651                                 key.offset = (u64)-1;
4652                                 tmp_root = btrfs_read_fs_root(
4653                                                 fs_info, &key);
4654                         }
4655                         if (IS_ERR(tmp_root)) {
4656                                 err = 1;
4657                                 goto next;
4658                         }
4659                         ret = check_fs_root(tmp_root, root_cache, &wc);
4660                         if (ret == -EAGAIN) {
4661                                 free_root_recs_tree(root_cache);
4662                                 btrfs_release_path(&path);
4663                                 goto again;
4664                         }
4665                         if (ret)
4666                                 err = 1;
4667                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4668                                 btrfs_free_fs_root(tmp_root);
4669                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4670                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4671                         process_root_ref(leaf, path.slots[0], &key,
4672                                          root_cache);
4673                 }
4674 next:
4675                 path.slots[0]++;
4676         }
4677 out:
4678         btrfs_release_path(&path);
4679         if (err)
4680                 free_extent_cache_tree(&wc.shared);
4681         if (!cache_tree_empty(&wc.shared))
4682                 fprintf(stderr, "warning line %d\n", __LINE__);
4683
4684         task_stop(ctx.info);
4685
4686         return err;
4687 }
4688
4689 /*
4690  * Find the @index according by @ino and name.
4691  * Notice:time efficiency is O(N)
4692  *
4693  * @root:       the root of the fs/file tree
4694  * @index_ret:  the index as return value
4695  * @namebuf:    the name to match
4696  * @name_len:   the length of name to match
4697  * @file_type:  the file_type of INODE_ITEM to match
4698  *
4699  * Returns 0 if found and *@index_ret will be modified with right value
4700  * Returns< 0 not found and *@index_ret will be (u64)-1
4701  */
4702 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4703                           u64 *index_ret, char *namebuf, u32 name_len,
4704                           u8 file_type)
4705 {
4706         struct btrfs_path path;
4707         struct extent_buffer *node;
4708         struct btrfs_dir_item *di;
4709         struct btrfs_key key;
4710         struct btrfs_key location;
4711         char name[BTRFS_NAME_LEN] = {0};
4712
4713         u32 total;
4714         u32 cur = 0;
4715         u32 len;
4716         u32 data_len;
4717         u8 filetype;
4718         int slot;
4719         int ret;
4720
4721         ASSERT(index_ret);
4722
4723         /* search from the last index */
4724         key.objectid = dirid;
4725         key.offset = (u64)-1;
4726         key.type = BTRFS_DIR_INDEX_KEY;
4727
4728         btrfs_init_path(&path);
4729         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4730         if (ret < 0)
4731                 return ret;
4732
4733 loop:
4734         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4735         if (ret) {
4736                 ret = -ENOENT;
4737                 *index_ret = (64)-1;
4738                 goto out;
4739         }
4740         /* Check whether inode_id/filetype/name match */
4741         node = path.nodes[0];
4742         slot = path.slots[0];
4743         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4744         total = btrfs_item_size_nr(node, slot);
4745         while (cur < total) {
4746                 ret = -ENOENT;
4747                 len = btrfs_dir_name_len(node, di);
4748                 data_len = btrfs_dir_data_len(node, di);
4749
4750                 btrfs_dir_item_key_to_cpu(node, di, &location);
4751                 if (location.objectid != location_id ||
4752                     location.type != BTRFS_INODE_ITEM_KEY ||
4753                     location.offset != 0)
4754                         goto next;
4755
4756                 filetype = btrfs_dir_type(node, di);
4757                 if (file_type != filetype)
4758                         goto next;
4759
4760                 if (len > BTRFS_NAME_LEN)
4761                         len = BTRFS_NAME_LEN;
4762
4763                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4764                 if (len != name_len || strncmp(namebuf, name, len))
4765                         goto next;
4766
4767                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4768                 *index_ret = key.offset;
4769                 ret = 0;
4770                 goto out;
4771 next:
4772                 len += sizeof(*di) + data_len;
4773                 di = (struct btrfs_dir_item *)((char *)di + len);
4774                 cur += len;
4775         }
4776         goto loop;
4777
4778 out:
4779         btrfs_release_path(&path);
4780         return ret;
4781 }
4782
4783 /*
4784  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4785  * INODE_REF/INODE_EXTREF match.
4786  *
4787  * @root:       the root of the fs/file tree
4788  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4789  *              value while find index
4790  * @location_key: location key of the struct btrfs_dir_item to match
4791  * @name:       the name to match
4792  * @namelen:    the length of name
4793  * @file_type:  the type of file to math
4794  *
4795  * Return 0 if no error occurred.
4796  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4797  * DIR_ITEM/DIR_INDEX
4798  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4799  * and DIR_ITEM/DIR_INDEX mismatch
4800  */
4801 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4802                          struct btrfs_key *location_key, char *name,
4803                          u32 namelen, u8 file_type)
4804 {
4805         struct btrfs_path path;
4806         struct extent_buffer *node;
4807         struct btrfs_dir_item *di;
4808         struct btrfs_key location;
4809         char namebuf[BTRFS_NAME_LEN] = {0};
4810         u32 total;
4811         u32 cur = 0;
4812         u32 len;
4813         u32 data_len;
4814         u8 filetype;
4815         int slot;
4816         int ret;
4817
4818         /* get the index by traversing all index */
4819         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4820                 ret = find_dir_index(root, key->objectid,
4821                                      location_key->objectid, &key->offset,
4822                                      name, namelen, file_type);
4823                 if (ret)
4824                         ret = DIR_INDEX_MISSING;
4825                 return ret;
4826         }
4827
4828         btrfs_init_path(&path);
4829         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4830         if (ret) {
4831                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4832                         DIR_INDEX_MISSING;
4833                 goto out;
4834         }
4835
4836         /* Check whether inode_id/filetype/name match */
4837         node = path.nodes[0];
4838         slot = path.slots[0];
4839         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4840         total = btrfs_item_size_nr(node, slot);
4841         while (cur < total) {
4842                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4843                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4844
4845                 len = btrfs_dir_name_len(node, di);
4846                 data_len = btrfs_dir_data_len(node, di);
4847
4848                 btrfs_dir_item_key_to_cpu(node, di, &location);
4849                 if (location.objectid != location_key->objectid ||
4850                     location.type != location_key->type ||
4851                     location.offset != location_key->offset)
4852                         goto next;
4853
4854                 filetype = btrfs_dir_type(node, di);
4855                 if (file_type != filetype)
4856                         goto next;
4857
4858                 if (len > BTRFS_NAME_LEN) {
4859                         len = BTRFS_NAME_LEN;
4860                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4861                         root->objectid,
4862                         key->type == BTRFS_DIR_ITEM_KEY ?
4863                         "DIR_ITEM" : "DIR_INDEX",
4864                         key->objectid, key->offset, len);
4865                 }
4866                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4867                                    len);
4868                 if (len != namelen || strncmp(namebuf, name, len))
4869                         goto next;
4870
4871                 ret = 0;
4872                 goto out;
4873 next:
4874                 len += sizeof(*di) + data_len;
4875                 di = (struct btrfs_dir_item *)((char *)di + len);
4876                 cur += len;
4877         }
4878
4879 out:
4880         btrfs_release_path(&path);
4881         return ret;
4882 }
4883
4884 /*
4885  * Prints inode ref error message
4886  */
4887 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4888                                 u64 index, const char *namebuf, int name_len,
4889                                 u8 filetype, int err)
4890 {
4891         if (!err)
4892                 return;
4893
4894         /* root dir error */
4895         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4896                 error(
4897         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4898                       root->objectid, key->objectid, key->offset, namebuf);
4899                 return;
4900         }
4901
4902         /* normal error */
4903         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4904                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4905                       root->objectid, key->offset,
4906                       btrfs_name_hash(namebuf, name_len),
4907                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4908                       namebuf, filetype);
4909         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4910                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4911                       root->objectid, key->offset, index,
4912                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4913                       namebuf, filetype);
4914 }
4915
4916 /*
4917  * Insert the missing inode item.
4918  *
4919  * Returns 0 means success.
4920  * Returns <0 means error.
4921  */
4922 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4923                                      u8 filetype)
4924 {
4925         struct btrfs_key key;
4926         struct btrfs_trans_handle *trans;
4927         struct btrfs_path path;
4928         int ret;
4929
4930         key.objectid = ino;
4931         key.type = BTRFS_INODE_ITEM_KEY;
4932         key.offset = 0;
4933
4934         btrfs_init_path(&path);
4935         trans = btrfs_start_transaction(root, 1);
4936         if (IS_ERR(trans)) {
4937                 ret = -EIO;
4938                 goto out;
4939         }
4940
4941         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4942         if (ret < 0 || !ret)
4943                 goto fail;
4944
4945         /* insert inode item */
4946         create_inode_item_lowmem(trans, root, ino, filetype);
4947         ret = 0;
4948 fail:
4949         btrfs_commit_transaction(trans, root);
4950 out:
4951         if (ret)
4952                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4953                       root->objectid, ino);
4954         btrfs_release_path(&path);
4955         return ret;
4956 }
4957
4958 /*
4959  * The ternary means dir item, dir index and relative inode ref.
4960  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4961  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4962  * strategy:
4963  * If two of three is missing or mismatched, delete the existing one.
4964  * If one of three is missing or mismatched, add the missing one.
4965  *
4966  * returns 0 means success.
4967  * returns not 0 means on error;
4968  */
4969 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4970                           u64 index, char *name, int name_len, u8 filetype,
4971                           int err)
4972 {
4973         struct btrfs_trans_handle *trans;
4974         int stage = 0;
4975         int ret = 0;
4976
4977         /*
4978          * stage shall be one of following valild values:
4979          *      0: Fine, nothing to do.
4980          *      1: One of three is wrong, so add missing one.
4981          *      2: Two of three is wrong, so delete existed one.
4982          */
4983         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4984                 stage++;
4985         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4986                 stage++;
4987         if (err & (INODE_REF_MISSING))
4988                 stage++;
4989
4990         /* stage must be smllarer than 3 */
4991         ASSERT(stage < 3);
4992
4993         trans = btrfs_start_transaction(root, 1);
4994         if (stage == 2) {
4995                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4996                                    name_len, 0);
4997                 goto out;
4998         }
4999         if (stage == 1) {
5000                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
5001                                filetype, &index, 1, 1);
5002                 goto out;
5003         }
5004 out:
5005         btrfs_commit_transaction(trans, root);
5006
5007         if (ret)
5008                 error("fail to repair inode %llu name %s filetype %u",
5009                       ino, name, filetype);
5010         else
5011                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
5012                        stage == 2 ? "Delete" : "Add",
5013                        ino, name, filetype);
5014
5015         return ret;
5016 }
5017
5018 /*
5019  * Traverse the given INODE_REF and call find_dir_item() to find related
5020  * DIR_ITEM/DIR_INDEX.
5021  *
5022  * @root:       the root of the fs/file tree
5023  * @ref_key:    the key of the INODE_REF
5024  * @path        the path provides node and slot
5025  * @refs:       the count of INODE_REF
5026  * @mode:       the st_mode of INODE_ITEM
5027  * @name_ret:   returns with the first ref's name
5028  * @name_len_ret:    len of the name_ret
5029  *
5030  * Return 0 if no error occurred.
5031  */
5032 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5033                            struct btrfs_path *path, char *name_ret,
5034                            u32 *namelen_ret, u64 *refs_ret, int mode)
5035 {
5036         struct btrfs_key key;
5037         struct btrfs_key location;
5038         struct btrfs_inode_ref *ref;
5039         struct extent_buffer *node;
5040         char namebuf[BTRFS_NAME_LEN] = {0};
5041         u32 total;
5042         u32 cur = 0;
5043         u32 len;
5044         u32 name_len;
5045         u64 index;
5046         int ret;
5047         int err = 0;
5048         int tmp_err;
5049         int slot;
5050         int need_research = 0;
5051         u64 refs;
5052
5053 begin:
5054         err = 0;
5055         cur = 0;
5056         refs = *refs_ret;
5057
5058         /* since after repair, path and the dir item may be changed */
5059         if (need_research) {
5060                 need_research = 0;
5061                 btrfs_release_path(path);
5062                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
5063                 /* the item was deleted, let path point to the last checked item */
5064                 if (ret > 0) {
5065                         if (path->slots[0] == 0)
5066                                 btrfs_prev_leaf(root, path);
5067                         else
5068                                 path->slots[0]--;
5069                 }
5070                 if (ret)
5071                         goto out;
5072         }
5073
5074         location.objectid = ref_key->objectid;
5075         location.type = BTRFS_INODE_ITEM_KEY;
5076         location.offset = 0;
5077         node = path->nodes[0];
5078         slot = path->slots[0];
5079
5080         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5081         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5082         total = btrfs_item_size_nr(node, slot);
5083
5084 next:
5085         /* Update inode ref count */
5086         refs++;
5087         tmp_err = 0;
5088         index = btrfs_inode_ref_index(node, ref);
5089         name_len = btrfs_inode_ref_name_len(node, ref);
5090
5091         if (name_len <= BTRFS_NAME_LEN) {
5092                 len = name_len;
5093         } else {
5094                 len = BTRFS_NAME_LEN;
5095                 warning("root %llu INODE_REF[%llu %llu] name too long",
5096                         root->objectid, ref_key->objectid, ref_key->offset);
5097         }
5098
5099         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
5100
5101         /* copy the first name found to name_ret */
5102         if (refs == 1 && name_ret) {
5103                 memcpy(name_ret, namebuf, len);
5104                 *namelen_ret = len;
5105         }
5106
5107         /* Check root dir ref */
5108         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
5109                 if (index != 0 || len != strlen("..") ||
5110                     strncmp("..", namebuf, len) ||
5111                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
5112                         /* set err bits then repair will delete the ref */
5113                         err |= DIR_INDEX_MISSING;
5114                         err |= DIR_ITEM_MISSING;
5115                 }
5116                 goto end;
5117         }
5118
5119         /* Find related DIR_INDEX */
5120         key.objectid = ref_key->offset;
5121         key.type = BTRFS_DIR_INDEX_KEY;
5122         key.offset = index;
5123         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5124                             imode_to_type(mode));
5125
5126         /* Find related dir_item */
5127         key.objectid = ref_key->offset;
5128         key.type = BTRFS_DIR_ITEM_KEY;
5129         key.offset = btrfs_name_hash(namebuf, len);
5130         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5131                             imode_to_type(mode));
5132 end:
5133         if (tmp_err && repair) {
5134                 ret = repair_ternary_lowmem(root, ref_key->offset,
5135                                             ref_key->objectid, index, namebuf,
5136                                             name_len, imode_to_type(mode),
5137                                             tmp_err);
5138                 if (!ret) {
5139                         need_research = 1;
5140                         goto begin;
5141                 }
5142         }
5143         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
5144                             imode_to_type(mode), tmp_err);
5145         err |= tmp_err;
5146         len = sizeof(*ref) + name_len;
5147         ref = (struct btrfs_inode_ref *)((char *)ref + len);
5148         cur += len;
5149         if (cur < total)
5150                 goto next;
5151
5152 out:
5153         *refs_ret = refs;
5154         return err;
5155 }
5156
5157 /*
5158  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
5159  * DIR_ITEM/DIR_INDEX.
5160  *
5161  * @root:       the root of the fs/file tree
5162  * @ref_key:    the key of the INODE_EXTREF
5163  * @refs:       the count of INODE_EXTREF
5164  * @mode:       the st_mode of INODE_ITEM
5165  *
5166  * Return 0 if no error occurred.
5167  */
5168 static int check_inode_extref(struct btrfs_root *root,
5169                               struct btrfs_key *ref_key,
5170                               struct extent_buffer *node, int slot, u64 *refs,
5171                               int mode)
5172 {
5173         struct btrfs_key key;
5174         struct btrfs_key location;
5175         struct btrfs_inode_extref *extref;
5176         char namebuf[BTRFS_NAME_LEN] = {0};
5177         u32 total;
5178         u32 cur = 0;
5179         u32 len;
5180         u32 name_len;
5181         u64 index;
5182         u64 parent;
5183         int ret;
5184         int err = 0;
5185
5186         location.objectid = ref_key->objectid;
5187         location.type = BTRFS_INODE_ITEM_KEY;
5188         location.offset = 0;
5189
5190         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5191         total = btrfs_item_size_nr(node, slot);
5192
5193 next:
5194         /* update inode ref count */
5195         (*refs)++;
5196         name_len = btrfs_inode_extref_name_len(node, extref);
5197         index = btrfs_inode_extref_index(node, extref);
5198         parent = btrfs_inode_extref_parent(node, extref);
5199         if (name_len <= BTRFS_NAME_LEN) {
5200                 len = name_len;
5201         } else {
5202                 len = BTRFS_NAME_LEN;
5203                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
5204                         root->objectid, ref_key->objectid, ref_key->offset);
5205         }
5206         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
5207
5208         /* Check root dir ref name */
5209         if (index == 0 && strncmp(namebuf, "..", name_len)) {
5210                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
5211                       root->objectid, ref_key->objectid, ref_key->offset,
5212                       namebuf);
5213                 err |= ROOT_DIR_ERROR;
5214         }
5215
5216         /* find related dir_index */
5217         key.objectid = parent;
5218         key.type = BTRFS_DIR_INDEX_KEY;
5219         key.offset = index;
5220         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5221         err |= ret;
5222
5223         /* find related dir_item */
5224         key.objectid = parent;
5225         key.type = BTRFS_DIR_ITEM_KEY;
5226         key.offset = btrfs_name_hash(namebuf, len);
5227         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5228         err |= ret;
5229
5230         len = sizeof(*extref) + name_len;
5231         extref = (struct btrfs_inode_extref *)((char *)extref + len);
5232         cur += len;
5233
5234         if (cur < total)
5235                 goto next;
5236
5237         return err;
5238 }
5239
5240 /*
5241  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
5242  * DIR_ITEM/DIR_INDEX match.
5243  * Return with @index_ret.
5244  *
5245  * @root:       the root of the fs/file tree
5246  * @key:        the key of the INODE_REF/INODE_EXTREF
5247  * @name:       the name in the INODE_REF/INODE_EXTREF
5248  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
5249  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
5250  *              value (64)-1 means do not check index
5251  * @ext_ref:    the EXTENDED_IREF feature
5252  *
5253  * Return 0 if no error occurred.
5254  * Return >0 for error bitmap
5255  */
5256 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
5257                           char *name, int namelen, u64 *index_ret,
5258                           unsigned int ext_ref)
5259 {
5260         struct btrfs_path path;
5261         struct btrfs_inode_ref *ref;
5262         struct btrfs_inode_extref *extref;
5263         struct extent_buffer *node;
5264         char ref_namebuf[BTRFS_NAME_LEN] = {0};
5265         u32 total;
5266         u32 cur = 0;
5267         u32 len;
5268         u32 ref_namelen;
5269         u64 ref_index;
5270         u64 parent;
5271         u64 dir_id;
5272         int slot;
5273         int ret;
5274
5275         ASSERT(index_ret);
5276
5277         btrfs_init_path(&path);
5278         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5279         if (ret) {
5280                 ret = INODE_REF_MISSING;
5281                 goto extref;
5282         }
5283
5284         node = path.nodes[0];
5285         slot = path.slots[0];
5286
5287         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5288         total = btrfs_item_size_nr(node, slot);
5289
5290         /* Iterate all entry of INODE_REF */
5291         while (cur < total) {
5292                 ret = INODE_REF_MISSING;
5293
5294                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
5295                 ref_index = btrfs_inode_ref_index(node, ref);
5296                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5297                         goto next_ref;
5298
5299                 if (cur + sizeof(*ref) + ref_namelen > total ||
5300                     ref_namelen > BTRFS_NAME_LEN) {
5301                         warning("root %llu INODE %s[%llu %llu] name too long",
5302                                 root->objectid,
5303                                 key->type == BTRFS_INODE_REF_KEY ?
5304                                         "REF" : "EXTREF",
5305                                 key->objectid, key->offset);
5306
5307                         if (cur + sizeof(*ref) > total)
5308                                 break;
5309                         len = min_t(u32, total - cur - sizeof(*ref),
5310                                     BTRFS_NAME_LEN);
5311                 } else {
5312                         len = ref_namelen;
5313                 }
5314
5315                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
5316                                    len);
5317
5318                 if (len != namelen || strncmp(ref_namebuf, name, len))
5319                         goto next_ref;
5320
5321                 *index_ret = ref_index;
5322                 ret = 0;
5323                 goto out;
5324 next_ref:
5325                 len = sizeof(*ref) + ref_namelen;
5326                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5327                 cur += len;
5328         }
5329
5330 extref:
5331         /* Skip if not support EXTENDED_IREF feature */
5332         if (!ext_ref)
5333                 goto out;
5334
5335         btrfs_release_path(&path);
5336         btrfs_init_path(&path);
5337
5338         dir_id = key->offset;
5339         key->type = BTRFS_INODE_EXTREF_KEY;
5340         key->offset = btrfs_extref_hash(dir_id, name, namelen);
5341
5342         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5343         if (ret) {
5344                 ret = INODE_REF_MISSING;
5345                 goto out;
5346         }
5347
5348         node = path.nodes[0];
5349         slot = path.slots[0];
5350
5351         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5352         cur = 0;
5353         total = btrfs_item_size_nr(node, slot);
5354
5355         /* Iterate all entry of INODE_EXTREF */
5356         while (cur < total) {
5357                 ret = INODE_REF_MISSING;
5358
5359                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5360                 ref_index = btrfs_inode_extref_index(node, extref);
5361                 parent = btrfs_inode_extref_parent(node, extref);
5362                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5363                         goto next_extref;
5364
5365                 if (parent != dir_id)
5366                         goto next_extref;
5367
5368                 if (ref_namelen <= BTRFS_NAME_LEN) {
5369                         len = ref_namelen;
5370                 } else {
5371                         len = BTRFS_NAME_LEN;
5372                         warning("root %llu INODE %s[%llu %llu] name too long",
5373                                 root->objectid,
5374                                 key->type == BTRFS_INODE_REF_KEY ?
5375                                         "REF" : "EXTREF",
5376                                 key->objectid, key->offset);
5377                 }
5378                 read_extent_buffer(node, ref_namebuf,
5379                                    (unsigned long)(extref + 1), len);
5380
5381                 if (len != namelen || strncmp(ref_namebuf, name, len))
5382                         goto next_extref;
5383
5384                 *index_ret = ref_index;
5385                 ret = 0;
5386                 goto out;
5387
5388 next_extref:
5389                 len = sizeof(*extref) + ref_namelen;
5390                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5391                 cur += len;
5392
5393         }
5394 out:
5395         btrfs_release_path(&path);
5396         return ret;
5397 }
5398
5399 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5400                                u64 ino, u64 index, const char *namebuf,
5401                                int name_len, u8 filetype, int err)
5402 {
5403         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5404                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5405                       root->objectid, key->objectid, key->offset, namebuf,
5406                       filetype,
5407                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5408         }
5409
5410         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5411                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5412                       root->objectid, key->objectid, index, namebuf, filetype,
5413                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5414         }
5415
5416         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5417                 error(
5418                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5419                       root->objectid, ino, index, namebuf, filetype,
5420                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5421         }
5422
5423         if (err & INODE_REF_MISSING)
5424                 error(
5425                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5426                       root->objectid, ino, key->objectid, namebuf, filetype);
5427
5428 }
5429
5430 /*
5431  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5432  *
5433  * Returns error after repair
5434  */
5435 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5436                            u64 index, u8 filetype, char *namebuf, u32 name_len,
5437                            int err)
5438 {
5439         int ret;
5440
5441         if (err & INODE_ITEM_MISSING) {
5442                 ret = repair_inode_item_missing(root, ino, filetype);
5443                 if (!ret)
5444                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5445         }
5446
5447         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5448                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5449                                             name_len, filetype, err);
5450                 if (!ret) {
5451                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5452                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5453                         err &= ~(INODE_REF_MISSING);
5454                 }
5455         }
5456         return err;
5457 }
5458
5459 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5460                 u64 *size_ret)
5461 {
5462         struct btrfs_key key;
5463         struct btrfs_path path;
5464         u32 len;
5465         struct btrfs_dir_item *di;
5466         int ret;
5467         int cur = 0;
5468         int total = 0;
5469
5470         ASSERT(size_ret);
5471         *size_ret = 0;
5472
5473         key.objectid = ino;
5474         key.type = type;
5475         key.offset = (u64)-1;
5476
5477         btrfs_init_path(&path);
5478         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5479         if (ret < 0) {
5480                 ret = -EIO;
5481                 goto out;
5482         }
5483         /* if found, go to spacial case */
5484         if (ret == 0)
5485                 goto special_case;
5486
5487 loop:
5488         ret = btrfs_previous_item(root, &path, ino, type);
5489
5490         if (ret) {
5491                 ret = 0;
5492                 goto out;
5493         }
5494
5495 special_case:
5496         di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5497         cur = 0;
5498         total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5499
5500         while (cur < total) {
5501                 len = btrfs_dir_name_len(path.nodes[0], di);
5502                 if (len > BTRFS_NAME_LEN)
5503                         len = BTRFS_NAME_LEN;
5504                 *size_ret += len;
5505
5506                 len += btrfs_dir_data_len(path.nodes[0], di);
5507                 len += sizeof(*di);
5508                 di = (struct btrfs_dir_item *)((char *)di + len);
5509                 cur += len;
5510         }
5511         goto loop;
5512
5513 out:
5514         btrfs_release_path(&path);
5515         return ret;
5516 }
5517
5518 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5519 {
5520         u64 item_size;
5521         u64 index_size;
5522         int ret;
5523
5524         ASSERT(size);
5525         ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5526         if (ret)
5527                 goto out;
5528
5529         ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5530         if (ret)
5531                 goto out;
5532
5533         *size = item_size + index_size;
5534
5535 out:
5536         if (ret)
5537                 error("failed to count root %llu INODE[%llu] root size",
5538                       root->objectid, ino);
5539         return ret;
5540 }
5541
5542 /*
5543  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5544  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5545  *
5546  * @root:       the root of the fs/file tree
5547  * @key:        the key of the INODE_REF/INODE_EXTREF
5548  * @path:       the path
5549  * @size:       the st_size of the INODE_ITEM
5550  * @ext_ref:    the EXTENDED_IREF feature
5551  *
5552  * Return 0 if no error occurred.
5553  * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5554  */
5555 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5556                           struct btrfs_path *path, u64 *size,
5557                           unsigned int ext_ref)
5558 {
5559         struct btrfs_dir_item *di;
5560         struct btrfs_inode_item *ii;
5561         struct btrfs_key key;
5562         struct btrfs_key location;
5563         struct extent_buffer *node;
5564         int slot;
5565         char namebuf[BTRFS_NAME_LEN] = {0};
5566         u32 total;
5567         u32 cur = 0;
5568         u32 len;
5569         u32 name_len;
5570         u32 data_len;
5571         u8 filetype;
5572         u32 mode = 0;
5573         u64 index;
5574         int ret;
5575         int err;
5576         int tmp_err;
5577         int need_research = 0;
5578
5579         /*
5580          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5581          * ignore index check.
5582          */
5583         if (di_key->type == BTRFS_DIR_INDEX_KEY)
5584                 index = di_key->offset;
5585         else
5586                 index = (u64)-1;
5587 begin:
5588         err = 0;
5589         cur = 0;
5590
5591         /* since after repair, path and the dir item may be changed */
5592         if (need_research) {
5593                 need_research = 0;
5594                 err |= DIR_COUNT_AGAIN;
5595                 btrfs_release_path(path);
5596                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5597                 /* the item was deleted, let path point the last checked item */
5598                 if (ret > 0) {
5599                         if (path->slots[0] == 0)
5600                                 btrfs_prev_leaf(root, path);
5601                         else
5602                                 path->slots[0]--;
5603                 }
5604                 if (ret)
5605                         goto out;
5606         }
5607
5608         node = path->nodes[0];
5609         slot = path->slots[0];
5610
5611         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5612         total = btrfs_item_size_nr(node, slot);
5613         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5614
5615         while (cur < total) {
5616                 data_len = btrfs_dir_data_len(node, di);
5617                 tmp_err = 0;
5618                 if (data_len)
5619                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5620                               root->objectid,
5621               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5622                               di_key->objectid, di_key->offset, data_len);
5623
5624                 name_len = btrfs_dir_name_len(node, di);
5625                 if (name_len <= BTRFS_NAME_LEN) {
5626                         len = name_len;
5627                 } else {
5628                         len = BTRFS_NAME_LEN;
5629                         warning("root %llu %s[%llu %llu] name too long",
5630                                 root->objectid,
5631                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5632                                 di_key->objectid, di_key->offset);
5633                 }
5634                 (*size) += name_len;
5635                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5636                                    len);
5637                 filetype = btrfs_dir_type(node, di);
5638
5639                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5640                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5641                         err |= -EIO;
5642                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5643                         root->objectid, di_key->objectid, di_key->offset,
5644                         namebuf, len, filetype, di_key->offset,
5645                         btrfs_name_hash(namebuf, len));
5646                 }
5647
5648                 btrfs_dir_item_key_to_cpu(node, di, &location);
5649                 /* Ignore related ROOT_ITEM check */
5650                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5651                         goto next;
5652
5653                 btrfs_release_path(path);
5654                 /* Check relative INODE_ITEM(existence/filetype) */
5655                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5656                 if (ret) {
5657                         tmp_err |= INODE_ITEM_MISSING;
5658                         goto next;
5659                 }
5660
5661                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5662                                     struct btrfs_inode_item);
5663                 mode = btrfs_inode_mode(path->nodes[0], ii);
5664                 if (imode_to_type(mode) != filetype) {
5665                         tmp_err |= INODE_ITEM_MISMATCH;
5666                         goto next;
5667                 }
5668
5669                 /* Check relative INODE_REF/INODE_EXTREF */
5670                 key.objectid = location.objectid;
5671                 key.type = BTRFS_INODE_REF_KEY;
5672                 key.offset = di_key->objectid;
5673                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5674                                           &index, ext_ref);
5675
5676                 /* check relative INDEX/ITEM */
5677                 key.objectid = di_key->objectid;
5678                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5679                         key.type = BTRFS_DIR_INDEX_KEY;
5680                         key.offset = index;
5681                 } else {
5682                         key.type = BTRFS_DIR_ITEM_KEY;
5683                         key.offset = btrfs_name_hash(namebuf, name_len);
5684                 }
5685
5686                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5687                                          name_len, filetype);
5688                 /* find_dir_item may find index */
5689                 if (key.type == BTRFS_DIR_INDEX_KEY)
5690                         index = key.offset;
5691 next:
5692
5693                 if (tmp_err && repair) {
5694                         ret = repair_dir_item(root, di_key->objectid,
5695                                               location.objectid, index,
5696                                               imode_to_type(mode), namebuf,
5697                                               name_len, tmp_err);
5698                         if (ret != tmp_err) {
5699                                 need_research = 1;
5700                                 goto begin;
5701                         }
5702                 }
5703                 btrfs_release_path(path);
5704                 print_dir_item_err(root, di_key, location.objectid, index,
5705                                    namebuf, name_len, filetype, tmp_err);
5706                 err |= tmp_err;
5707                 len = sizeof(*di) + name_len + data_len;
5708                 di = (struct btrfs_dir_item *)((char *)di + len);
5709                 cur += len;
5710
5711                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5712                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5713                               root->objectid, di_key->objectid,
5714                               di_key->offset);
5715                         break;
5716                 }
5717         }
5718 out:
5719         /* research path */
5720         btrfs_release_path(path);
5721         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5722         if (ret)
5723                 err |= ret > 0 ? -ENOENT : ret;
5724         return err;
5725 }
5726
5727 /*
5728  * Wrapper function of btrfs_punch_hole.
5729  *
5730  * Returns 0 means success.
5731  * Returns not 0 means error.
5732  */
5733 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5734                              u64 len)
5735 {
5736         struct btrfs_trans_handle *trans;
5737         int ret = 0;
5738
5739         trans = btrfs_start_transaction(root, 1);
5740         if (IS_ERR(trans))
5741                 return PTR_ERR(trans);
5742
5743         ret = btrfs_punch_hole(trans, root, ino, start, len);
5744         if (ret)
5745                 error("failed to add hole [%llu, %llu] in inode [%llu]",
5746                       start, len, ino);
5747         else
5748                 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5749                        ino);
5750
5751         btrfs_commit_transaction(trans, root);
5752         return ret;
5753 }
5754
5755 /*
5756  * Check file extent datasum/hole, update the size of the file extents,
5757  * check and update the last offset of the file extent.
5758  *
5759  * @root:       the root of fs/file tree.
5760  * @fkey:       the key of the file extent.
5761  * @nodatasum:  INODE_NODATASUM feature.
5762  * @size:       the sum of all EXTENT_DATA items size for this inode.
5763  * @end:        the offset of the last extent.
5764  *
5765  * Return 0 if no error occurred.
5766  */
5767 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5768                              struct extent_buffer *node, int slot,
5769                              unsigned int nodatasum, u64 *size, u64 *end)
5770 {
5771         struct btrfs_file_extent_item *fi;
5772         u64 disk_bytenr;
5773         u64 disk_num_bytes;
5774         u64 extent_num_bytes;
5775         u64 extent_offset;
5776         u64 csum_found;         /* In byte size, sectorsize aligned */
5777         u64 search_start;       /* Logical range start we search for csum */
5778         u64 search_len;         /* Logical range len we search for csum */
5779         unsigned int extent_type;
5780         unsigned int is_hole;
5781         int compressed = 0;
5782         int ret;
5783         int err = 0;
5784
5785         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5786
5787         /* Check inline extent */
5788         extent_type = btrfs_file_extent_type(node, fi);
5789         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5790                 struct btrfs_item *e = btrfs_item_nr(slot);
5791                 u32 item_inline_len;
5792
5793                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5794                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5795                 compressed = btrfs_file_extent_compression(node, fi);
5796                 if (extent_num_bytes == 0) {
5797                         error(
5798                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5799                                 root->objectid, fkey->objectid, fkey->offset);
5800                         err |= FILE_EXTENT_ERROR;
5801                 }
5802                 if (!compressed && extent_num_bytes != item_inline_len) {
5803                         error(
5804                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5805                                 root->objectid, fkey->objectid, fkey->offset,
5806                                 extent_num_bytes, item_inline_len);
5807                         err |= FILE_EXTENT_ERROR;
5808                 }
5809                 *end += extent_num_bytes;
5810                 *size += extent_num_bytes;
5811                 return err;
5812         }
5813
5814         /* Check extent type */
5815         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5816                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5817                 err |= FILE_EXTENT_ERROR;
5818                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5819                       root->objectid, fkey->objectid, fkey->offset);
5820                 return err;
5821         }
5822
5823         /* Check REG_EXTENT/PREALLOC_EXTENT */
5824         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5825         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5826         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5827         extent_offset = btrfs_file_extent_offset(node, fi);
5828         compressed = btrfs_file_extent_compression(node, fi);
5829         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5830
5831         /*
5832          * Check EXTENT_DATA csum
5833          *
5834          * For plain (uncompressed) extent, we should only check the range
5835          * we're referring to, as it's possible that part of prealloc extent
5836          * has been written, and has csum:
5837          *
5838          * |<--- Original large preallocated extent A ---->|
5839          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5840          *      No csum                         Has csum
5841          *
5842          * For compressed extent, we should check the whole range.
5843          */
5844         if (!compressed) {
5845                 search_start = disk_bytenr + extent_offset;
5846                 search_len = extent_num_bytes;
5847         } else {
5848                 search_start = disk_bytenr;
5849                 search_len = disk_num_bytes;
5850         }
5851         ret = count_csum_range(root, search_start, search_len, &csum_found);
5852         if (csum_found > 0 && nodatasum) {
5853                 err |= ODD_CSUM_ITEM;
5854                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5855                       root->objectid, fkey->objectid, fkey->offset);
5856         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5857                    !is_hole && (ret < 0 || csum_found < search_len)) {
5858                 err |= CSUM_ITEM_MISSING;
5859                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5860                       root->objectid, fkey->objectid, fkey->offset,
5861                       csum_found, search_len);
5862         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5863                 err |= ODD_CSUM_ITEM;
5864                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5865                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5866         }
5867
5868         /* Check EXTENT_DATA hole */
5869         if (!no_holes && *end != fkey->offset) {
5870                 if (repair)
5871                         ret = punch_extent_hole(root, fkey->objectid,
5872                                                 *end, fkey->offset - *end);
5873                 if (!repair || ret) {
5874                         err |= FILE_EXTENT_ERROR;
5875                         error(
5876                 "root %llu EXTENT_DATA[%llu %llu] interrupt, should start at %llu",
5877                         root->objectid, fkey->objectid, fkey->offset, *end);
5878                 }
5879         }
5880
5881         *end += extent_num_bytes;
5882         if (!is_hole)
5883                 *size += extent_num_bytes;
5884
5885         return err;
5886 }
5887
5888 /*
5889  * Set inode item nbytes to @nbytes
5890  *
5891  * Returns  0     on success
5892  * Returns  != 0  on error
5893  */
5894 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5895                                       struct btrfs_path *path,
5896                                       u64 ino, u64 nbytes)
5897 {
5898         struct btrfs_trans_handle *trans;
5899         struct btrfs_inode_item *ii;
5900         struct btrfs_key key;
5901         struct btrfs_key research_key;
5902         int err = 0;
5903         int ret;
5904
5905         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5906
5907         key.objectid = ino;
5908         key.type = BTRFS_INODE_ITEM_KEY;
5909         key.offset = 0;
5910
5911         trans = btrfs_start_transaction(root, 1);
5912         if (IS_ERR(trans)) {
5913                 ret = PTR_ERR(trans);
5914                 err |= ret;
5915                 goto out;
5916         }
5917
5918         btrfs_release_path(path);
5919         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5920         if (ret > 0)
5921                 ret = -ENOENT;
5922         if (ret) {
5923                 err |= ret;
5924                 goto fail;
5925         }
5926
5927         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5928                             struct btrfs_inode_item);
5929         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5930         btrfs_mark_buffer_dirty(path->nodes[0]);
5931 fail:
5932         btrfs_commit_transaction(trans, root);
5933 out:
5934         if (ret)
5935                 error("failed to set nbytes in inode %llu root %llu",
5936                       ino, root->root_key.objectid);
5937         else
5938                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5939                        root->root_key.objectid, nbytes);
5940
5941         /* research path */
5942         btrfs_release_path(path);
5943         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5944         err |= ret;
5945
5946         return err;
5947 }
5948
5949 /*
5950  * Set directory inode isize to @isize.
5951  *
5952  * Returns 0     on success.
5953  * Returns != 0  on error.
5954  */
5955 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5956                                    struct btrfs_path *path,
5957                                    u64 ino, u64 isize)
5958 {
5959         struct btrfs_trans_handle *trans;
5960         struct btrfs_inode_item *ii;
5961         struct btrfs_key key;
5962         struct btrfs_key research_key;
5963         int ret;
5964         int err = 0;
5965
5966         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5967
5968         key.objectid = ino;
5969         key.type = BTRFS_INODE_ITEM_KEY;
5970         key.offset = 0;
5971
5972         trans = btrfs_start_transaction(root, 1);
5973         if (IS_ERR(trans)) {
5974                 ret = PTR_ERR(trans);
5975                 err |= ret;
5976                 goto out;
5977         }
5978
5979         btrfs_release_path(path);
5980         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5981         if (ret > 0)
5982                 ret = -ENOENT;
5983         if (ret) {
5984                 err |= ret;
5985                 goto fail;
5986         }
5987
5988         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5989                             struct btrfs_inode_item);
5990         btrfs_set_inode_size(path->nodes[0], ii, isize);
5991         btrfs_mark_buffer_dirty(path->nodes[0]);
5992 fail:
5993         btrfs_commit_transaction(trans, root);
5994 out:
5995         if (ret)
5996                 error("failed to set isize in inode %llu root %llu",
5997                       ino, root->root_key.objectid);
5998         else
5999                 printf("Set isize in inode %llu root %llu to %llu\n",
6000                        ino, root->root_key.objectid, isize);
6001
6002         btrfs_release_path(path);
6003         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6004         err |= ret;
6005
6006         return err;
6007 }
6008
6009 /*
6010  * Wrapper function for btrfs_add_orphan_item().
6011  *
6012  * Returns 0     on success.
6013  * Returns != 0  on error.
6014  */
6015 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
6016                                            struct btrfs_path *path, u64 ino)
6017 {
6018         struct btrfs_trans_handle *trans;
6019         struct btrfs_key research_key;
6020         int ret;
6021         int err = 0;
6022
6023         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
6024
6025         trans = btrfs_start_transaction(root, 1);
6026         if (IS_ERR(trans)) {
6027                 ret = PTR_ERR(trans);
6028                 err |= ret;
6029                 goto out;
6030         }
6031
6032         btrfs_release_path(path);
6033         ret = btrfs_add_orphan_item(trans, root, path, ino);
6034         err |= ret;
6035         btrfs_commit_transaction(trans, root);
6036 out:
6037         if (ret)
6038                 error("failed to add inode %llu as orphan item root %llu",
6039                       ino, root->root_key.objectid);
6040         else
6041                 printf("Added inode %llu as orphan item root %llu\n",
6042                        ino, root->root_key.objectid);
6043
6044         btrfs_release_path(path);
6045         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6046         err |= ret;
6047
6048         return err;
6049 }
6050
6051 /* Set inode_item nlink to @ref_count.
6052  * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
6053  *
6054  * Returns 0 on success
6055  */
6056 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
6057                                       struct btrfs_path *path, u64 ino,
6058                                       const char *name, u32 namelen,
6059                                       u64 ref_count, u8 filetype, u64 *nlink)
6060 {
6061         struct btrfs_trans_handle *trans;
6062         struct btrfs_inode_item *ii;
6063         struct btrfs_key key;
6064         struct btrfs_key old_key;
6065         char namebuf[BTRFS_NAME_LEN] = {0};
6066         int name_len;
6067         int ret;
6068         int ret2;
6069
6070         /* save the key */
6071         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
6072
6073         if (name && namelen) {
6074                 ASSERT(namelen <= BTRFS_NAME_LEN);
6075                 memcpy(namebuf, name, namelen);
6076                 name_len = namelen;
6077         } else {
6078                 sprintf(namebuf, "%llu", ino);
6079                 name_len = count_digits(ino);
6080                 printf("Can't find file name for inode %llu, use %s instead\n",
6081                        ino, namebuf);
6082         }
6083
6084         trans = btrfs_start_transaction(root, 1);
6085         if (IS_ERR(trans)) {
6086                 ret = PTR_ERR(trans);
6087                 goto out;
6088         }
6089
6090         btrfs_release_path(path);
6091         /* if refs is 0, put it into lostfound */
6092         if (ref_count == 0) {
6093                 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
6094                                               name_len, filetype, &ref_count);
6095                 if (ret)
6096                         goto fail;
6097         }
6098
6099         /* reset inode_item's nlink to ref_count */
6100         key.objectid = ino;
6101         key.type = BTRFS_INODE_ITEM_KEY;
6102         key.offset = 0;
6103
6104         btrfs_release_path(path);
6105         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6106         if (ret > 0)
6107                 ret = -ENOENT;
6108         if (ret)
6109                 goto fail;
6110
6111         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
6112                             struct btrfs_inode_item);
6113         btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
6114         btrfs_mark_buffer_dirty(path->nodes[0]);
6115
6116         if (nlink)
6117                 *nlink = ref_count;
6118 fail:
6119         btrfs_commit_transaction(trans, root);
6120 out:
6121         if (ret)
6122                 error(
6123         "fail to repair nlink of inode %llu root %llu name %s filetype %u",
6124                        root->objectid, ino, namebuf, filetype);
6125         else
6126                 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
6127                        root->objectid, ino, namebuf, filetype);
6128
6129         /* research */
6130         btrfs_release_path(path);
6131         ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
6132         if (ret2 < 0)
6133                 return ret |= ret2;
6134         return ret;
6135 }
6136
6137 /*
6138  * Check INODE_ITEM and related ITEMs (the same inode number)
6139  * 1. check link count
6140  * 2. check inode ref/extref
6141  * 3. check dir item/index
6142  *
6143  * @ext_ref:    the EXTENDED_IREF feature
6144  *
6145  * Return 0 if no error occurred.
6146  * Return >0 for error or hit the traversal is done(by error bitmap)
6147  */
6148 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
6149                             unsigned int ext_ref)
6150 {
6151         struct extent_buffer *node;
6152         struct btrfs_inode_item *ii;
6153         struct btrfs_key key;
6154         struct btrfs_key last_key;
6155         u64 inode_id;
6156         u32 mode;
6157         u64 nlink;
6158         u64 nbytes;
6159         u64 isize;
6160         u64 size = 0;
6161         u64 refs = 0;
6162         u64 extent_end = 0;
6163         u64 extent_size = 0;
6164         unsigned int dir;
6165         unsigned int nodatasum;
6166         int slot;
6167         int ret;
6168         int err = 0;
6169         char namebuf[BTRFS_NAME_LEN] = {0};
6170         u32 name_len = 0;
6171
6172         node = path->nodes[0];
6173         slot = path->slots[0];
6174
6175         btrfs_item_key_to_cpu(node, &key, slot);
6176         inode_id = key.objectid;
6177
6178         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
6179                 ret = btrfs_next_item(root, path);
6180                 if (ret > 0)
6181                         err |= LAST_ITEM;
6182                 return err;
6183         }
6184
6185         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
6186         isize = btrfs_inode_size(node, ii);
6187         nbytes = btrfs_inode_nbytes(node, ii);
6188         mode = btrfs_inode_mode(node, ii);
6189         dir = imode_to_type(mode) == BTRFS_FT_DIR;
6190         nlink = btrfs_inode_nlink(node, ii);
6191         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
6192
6193         while (1) {
6194                 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
6195                 ret = btrfs_next_item(root, path);
6196                 if (ret < 0) {
6197                         /* out will fill 'err' rusing current statistics */
6198                         goto out;
6199                 } else if (ret > 0) {
6200                         err |= LAST_ITEM;
6201                         goto out;
6202                 }
6203
6204                 node = path->nodes[0];
6205                 slot = path->slots[0];
6206                 btrfs_item_key_to_cpu(node, &key, slot);
6207                 if (key.objectid != inode_id)
6208                         goto out;
6209
6210                 switch (key.type) {
6211                 case BTRFS_INODE_REF_KEY:
6212                         ret = check_inode_ref(root, &key, path, namebuf,
6213                                               &name_len, &refs, mode);
6214                         err |= ret;
6215                         break;
6216                 case BTRFS_INODE_EXTREF_KEY:
6217                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
6218                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
6219                                         root->objectid, key.objectid,
6220                                         key.offset);
6221                         ret = check_inode_extref(root, &key, node, slot, &refs,
6222                                                  mode);
6223                         err |= ret;
6224                         break;
6225                 case BTRFS_DIR_ITEM_KEY:
6226                 case BTRFS_DIR_INDEX_KEY:
6227                         if (!dir) {
6228                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
6229                                         root->objectid, inode_id,
6230                                         imode_to_type(mode), key.objectid,
6231                                         key.offset);
6232                         }
6233                         ret = check_dir_item(root, &key, path, &size, ext_ref);
6234                         err |= ret;
6235                         break;
6236                 case BTRFS_EXTENT_DATA_KEY:
6237                         if (dir) {
6238                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
6239                                         root->objectid, inode_id, key.objectid,
6240                                         key.offset);
6241                         }
6242                         ret = check_file_extent(root, &key, node, slot,
6243                                                 nodatasum, &extent_size,
6244                                                 &extent_end);
6245                         err |= ret;
6246                         break;
6247                 case BTRFS_XATTR_ITEM_KEY:
6248                         break;
6249                 default:
6250                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
6251                               key.objectid, key.type, key.offset);
6252                 }
6253         }
6254
6255 out:
6256         if (err & LAST_ITEM) {
6257                 btrfs_release_path(path);
6258                 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
6259                 if (ret)
6260                         return err;
6261         }
6262
6263         /* verify INODE_ITEM nlink/isize/nbytes */
6264         if (dir) {
6265                 if (repair && (err & DIR_COUNT_AGAIN)) {
6266                         err &= ~DIR_COUNT_AGAIN;
6267                         count_dir_isize(root, inode_id, &size);
6268                 }
6269
6270                 if ((nlink != 1 || refs != 1) && repair) {
6271                         ret = repair_inode_nlinks_lowmem(root, path, inode_id,
6272                                 namebuf, name_len, refs, imode_to_type(mode),
6273                                 &nlink);
6274                 }
6275
6276                 if (nlink != 1) {
6277                         err |= LINK_COUNT_ERROR;
6278                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
6279                               root->objectid, inode_id, nlink);
6280                 }
6281
6282                 /*
6283                  * Just a warning, as dir inode nbytes is just an
6284                  * instructive value.
6285                  */
6286                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
6287                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
6288                                 root->objectid, inode_id,
6289                                 root->fs_info->nodesize);
6290                 }
6291
6292                 if (isize != size) {
6293                         if (repair)
6294                                 ret = repair_dir_isize_lowmem(root, path,
6295                                                               inode_id, size);
6296                         if (!repair || ret) {
6297                                 err |= ISIZE_ERROR;
6298                                 error(
6299                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
6300                                       root->objectid, inode_id, isize, size);
6301                         }
6302                 }
6303         } else {
6304                 if (nlink != refs) {
6305                         if (repair)
6306                                 ret = repair_inode_nlinks_lowmem(root, path,
6307                                          inode_id, namebuf, name_len, refs,
6308                                          imode_to_type(mode), &nlink);
6309                         if (!repair || ret) {
6310                                 err |= LINK_COUNT_ERROR;
6311                                 error(
6312                 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
6313                                       root->objectid, inode_id, nlink, refs);
6314                         }
6315                 } else if (!nlink) {
6316                         if (repair)
6317                                 ret = repair_inode_orphan_item_lowmem(root,
6318                                                               path, inode_id);
6319                         if (!repair || ret) {
6320                                 err |= ORPHAN_ITEM;
6321                                 error("root %llu INODE[%llu] is orphan item",
6322                                       root->objectid, inode_id);
6323                         }
6324                 }
6325
6326                 if (!nbytes && !no_holes && extent_end < isize) {
6327                         if (repair)
6328                                 ret = punch_extent_hole(root, inode_id,
6329                                                 extent_end, isize - extent_end);
6330                         if (!repair || ret) {
6331                                 err |= NBYTES_ERROR;
6332                                 error(
6333         "root %llu INODE[%llu] size %llu should have a file extent hole",
6334                                       root->objectid, inode_id, isize);
6335                         }
6336                 }
6337
6338                 if (nbytes != extent_size) {
6339                         if (repair)
6340                                 ret = repair_inode_nbytes_lowmem(root, path,
6341                                                          inode_id, extent_size);
6342                         if (!repair || ret) {
6343                                 err |= NBYTES_ERROR;
6344                                 error(
6345         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6346                                       root->objectid, inode_id, nbytes,
6347                                       extent_size);
6348                         }
6349                 }
6350         }
6351
6352         if (err & LAST_ITEM)
6353                 btrfs_next_item(root, path);
6354         return err;
6355 }
6356
6357 /*
6358  * Insert the missing inode item and inode ref.
6359  *
6360  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6361  * Root dir should be handled specially because root dir is the root of fs.
6362  *
6363  * returns err (>0 or 0) after repair
6364  */
6365 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6366 {
6367         struct btrfs_trans_handle *trans;
6368         struct btrfs_key key;
6369         struct btrfs_path path;
6370         int filetype = BTRFS_FT_DIR;
6371         int ret = 0;
6372
6373         btrfs_init_path(&path);
6374
6375         if (err & INODE_REF_MISSING) {
6376                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6377                 key.type = BTRFS_INODE_REF_KEY;
6378                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6379
6380                 trans = btrfs_start_transaction(root, 1);
6381                 if (IS_ERR(trans)) {
6382                         ret = PTR_ERR(trans);
6383                         goto out;
6384                 }
6385
6386                 btrfs_release_path(&path);
6387                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6388                 if (ret)
6389                         goto trans_fail;
6390
6391                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6392                                              BTRFS_FIRST_FREE_OBJECTID,
6393                                              BTRFS_FIRST_FREE_OBJECTID, 0);
6394                 if (ret)
6395                         goto trans_fail;
6396
6397                 printf("Add INODE_REF[%llu %llu] name %s\n",
6398                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6399                        "..");
6400                 err &= ~INODE_REF_MISSING;
6401 trans_fail:
6402                 if (ret)
6403                         error("fail to insert first inode's ref");
6404                 btrfs_commit_transaction(trans, root);
6405         }
6406
6407         if (err & INODE_ITEM_MISSING) {
6408                 ret = repair_inode_item_missing(root,
6409                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
6410                 if (ret)
6411                         goto out;
6412                 err &= ~INODE_ITEM_MISSING;
6413         }
6414 out:
6415         if (ret)
6416                 error("fail to repair first inode");
6417         btrfs_release_path(&path);
6418         return err;
6419 }
6420
6421 /*
6422  * check first root dir's inode_item and inode_ref
6423  *
6424  * returns 0 means no error
6425  * returns >0 means error
6426  * returns <0 means fatal error
6427  */
6428 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6429 {
6430         struct btrfs_path path;
6431         struct btrfs_key key;
6432         struct btrfs_inode_item *ii;
6433         u64 index;
6434         u32 mode;
6435         int err = 0;
6436         int ret;
6437
6438         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6439         key.type = BTRFS_INODE_ITEM_KEY;
6440         key.offset = 0;
6441
6442         /* For root being dropped, we don't need to check first inode */
6443         if (btrfs_root_refs(&root->root_item) == 0 &&
6444             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6445             BTRFS_FIRST_FREE_OBJECTID)
6446                 return 0;
6447
6448         btrfs_init_path(&path);
6449         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6450         if (ret < 0)
6451                 goto out;
6452         if (ret > 0) {
6453                 ret = 0;
6454                 err |= INODE_ITEM_MISSING;
6455         } else {
6456                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6457                                     struct btrfs_inode_item);
6458                 mode = btrfs_inode_mode(path.nodes[0], ii);
6459                 if (imode_to_type(mode) != BTRFS_FT_DIR)
6460                         err |= INODE_ITEM_MISMATCH;
6461         }
6462
6463         /* lookup first inode ref */
6464         key.offset = BTRFS_FIRST_FREE_OBJECTID;
6465         key.type = BTRFS_INODE_REF_KEY;
6466         /* special index value */
6467         index = 0;
6468
6469         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6470         if (ret < 0)
6471                 goto out;
6472         err |= ret;
6473
6474 out:
6475         btrfs_release_path(&path);
6476
6477         if (err && repair)
6478                 err = repair_fs_first_inode(root, err);
6479
6480         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6481                 error("root dir INODE_ITEM is %s",
6482                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6483         if (err & INODE_REF_MISSING)
6484                 error("root dir INODE_REF is missing");
6485
6486         return ret < 0 ? ret : err;
6487 }
6488
6489 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6490                                                 u64 parent, u64 root)
6491 {
6492         struct rb_node *node;
6493         struct tree_backref *back = NULL;
6494         struct tree_backref match = {
6495                 .node = {
6496                         .is_data = 0,
6497                 },
6498         };
6499
6500         if (parent) {
6501                 match.parent = parent;
6502                 match.node.full_backref = 1;
6503         } else {
6504                 match.root = root;
6505         }
6506
6507         node = rb_search(&rec->backref_tree, &match.node.node,
6508                          (rb_compare_keys)compare_extent_backref, NULL);
6509         if (node)
6510                 back = to_tree_backref(rb_node_to_extent_backref(node));
6511
6512         return back;
6513 }
6514
6515 static struct data_backref *find_data_backref(struct extent_record *rec,
6516                                                 u64 parent, u64 root,
6517                                                 u64 owner, u64 offset,
6518                                                 int found_ref,
6519                                                 u64 disk_bytenr, u64 bytes)
6520 {
6521         struct rb_node *node;
6522         struct data_backref *back = NULL;
6523         struct data_backref match = {
6524                 .node = {
6525                         .is_data = 1,
6526                 },
6527                 .owner = owner,
6528                 .offset = offset,
6529                 .bytes = bytes,
6530                 .found_ref = found_ref,
6531                 .disk_bytenr = disk_bytenr,
6532         };
6533
6534         if (parent) {
6535                 match.parent = parent;
6536                 match.node.full_backref = 1;
6537         } else {
6538                 match.root = root;
6539         }
6540
6541         node = rb_search(&rec->backref_tree, &match.node.node,
6542                          (rb_compare_keys)compare_extent_backref, NULL);
6543         if (node)
6544                 back = to_data_backref(rb_node_to_extent_backref(node));
6545
6546         return back;
6547 }
6548 /*
6549  * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6550  * blocks and integrity of fs tree items.
6551  *
6552  * @root:         the root of the tree to be checked.
6553  * @ext_ref       feature EXTENDED_IREF is enable or not.
6554  * @account       if NOT 0 means check the tree (including tree)'s treeblocks.
6555  *                otherwise means check fs tree(s) items relationship and
6556  *                @root MUST be a fs tree root.
6557  * Returns 0      represents OK.
6558  * Returns not 0  represents error.
6559  */
6560 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6561                             struct btrfs_root *root, unsigned int ext_ref,
6562                             int check_all)
6563
6564 {
6565         struct btrfs_path path;
6566         struct node_refs nrefs;
6567         struct btrfs_root_item *root_item = &root->root_item;
6568         int ret;
6569         int level;
6570         int err = 0;
6571
6572         memset(&nrefs, 0, sizeof(nrefs));
6573         if (!check_all) {
6574                 /*
6575                  * We need to manually check the first inode item (256)
6576                  * As the following traversal function will only start from
6577                  * the first inode item in the leaf, if inode item (256) is
6578                  * missing we will skip it forever.
6579                  */
6580                 ret = check_fs_first_inode(root, ext_ref);
6581                 if (ret < 0)
6582                         return ret;
6583         }
6584
6585
6586         level = btrfs_header_level(root->node);
6587         btrfs_init_path(&path);
6588
6589         if (btrfs_root_refs(root_item) > 0 ||
6590             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6591                 path.nodes[level] = root->node;
6592                 path.slots[level] = 0;
6593                 extent_buffer_get(root->node);
6594         } else {
6595                 struct btrfs_key key;
6596
6597                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6598                 level = root_item->drop_level;
6599                 path.lowest_level = level;
6600                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6601                 if (ret < 0)
6602                         goto out;
6603                 ret = 0;
6604         }
6605
6606         while (1) {
6607                 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6608                                         ext_ref, check_all);
6609
6610                 err |= !!ret;
6611
6612                 /* if ret is negative, walk shall stop */
6613                 if (ret < 0) {
6614                         ret = err;
6615                         break;
6616                 }
6617
6618                 ret = walk_up_tree_v2(root, &path, &level);
6619                 if (ret != 0) {
6620                         /* Normal exit, reset ret to err */
6621                         ret = err;
6622                         break;
6623                 }
6624         }
6625
6626 out:
6627         btrfs_release_path(&path);
6628         return ret;
6629 }
6630
6631 /*
6632  * Iterate all items in the tree and call check_inode_item() to check.
6633  *
6634  * @root:       the root of the tree to be checked.
6635  * @ext_ref:    the EXTENDED_IREF feature
6636  *
6637  * Return 0 if no error found.
6638  * Return <0 for error.
6639  */
6640 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6641 {
6642         reset_cached_block_groups(root->fs_info);
6643         return check_btrfs_root(NULL, root, ext_ref, 0);
6644 }
6645
6646 /*
6647  * Find the relative ref for root_ref and root_backref.
6648  *
6649  * @root:       the root of the root tree.
6650  * @ref_key:    the key of the root ref.
6651  *
6652  * Return 0 if no error occurred.
6653  */
6654 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6655                           struct extent_buffer *node, int slot)
6656 {
6657         struct btrfs_path path;
6658         struct btrfs_key key;
6659         struct btrfs_root_ref *ref;
6660         struct btrfs_root_ref *backref;
6661         char ref_name[BTRFS_NAME_LEN] = {0};
6662         char backref_name[BTRFS_NAME_LEN] = {0};
6663         u64 ref_dirid;
6664         u64 ref_seq;
6665         u32 ref_namelen;
6666         u64 backref_dirid;
6667         u64 backref_seq;
6668         u32 backref_namelen;
6669         u32 len;
6670         int ret;
6671         int err = 0;
6672
6673         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6674         ref_dirid = btrfs_root_ref_dirid(node, ref);
6675         ref_seq = btrfs_root_ref_sequence(node, ref);
6676         ref_namelen = btrfs_root_ref_name_len(node, ref);
6677
6678         if (ref_namelen <= BTRFS_NAME_LEN) {
6679                 len = ref_namelen;
6680         } else {
6681                 len = BTRFS_NAME_LEN;
6682                 warning("%s[%llu %llu] ref_name too long",
6683                         ref_key->type == BTRFS_ROOT_REF_KEY ?
6684                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6685                         ref_key->offset);
6686         }
6687         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6688
6689         /* Find relative root_ref */
6690         key.objectid = ref_key->offset;
6691         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6692         key.offset = ref_key->objectid;
6693
6694         btrfs_init_path(&path);
6695         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6696         if (ret) {
6697                 err |= ROOT_REF_MISSING;
6698                 error("%s[%llu %llu] couldn't find relative ref",
6699                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6700                       "ROOT_REF" : "ROOT_BACKREF",
6701                       ref_key->objectid, ref_key->offset);
6702                 goto out;
6703         }
6704
6705         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6706                                  struct btrfs_root_ref);
6707         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6708         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6709         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6710
6711         if (backref_namelen <= BTRFS_NAME_LEN) {
6712                 len = backref_namelen;
6713         } else {
6714                 len = BTRFS_NAME_LEN;
6715                 warning("%s[%llu %llu] ref_name too long",
6716                         key.type == BTRFS_ROOT_REF_KEY ?
6717                         "ROOT_REF" : "ROOT_BACKREF",
6718                         key.objectid, key.offset);
6719         }
6720         read_extent_buffer(path.nodes[0], backref_name,
6721                            (unsigned long)(backref + 1), len);
6722
6723         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6724             ref_namelen != backref_namelen ||
6725             strncmp(ref_name, backref_name, len)) {
6726                 err |= ROOT_REF_MISMATCH;
6727                 error("%s[%llu %llu] mismatch relative ref",
6728                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6729                       "ROOT_REF" : "ROOT_BACKREF",
6730                       ref_key->objectid, ref_key->offset);
6731         }
6732 out:
6733         btrfs_release_path(&path);
6734         return err;
6735 }
6736
6737 /*
6738  * Check all fs/file tree in low_memory mode.
6739  *
6740  * 1. for fs tree root item, call check_fs_root_v2()
6741  * 2. for fs tree root ref/backref, call check_root_ref()
6742  *
6743  * Return 0 if no error occurred.
6744  */
6745 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6746 {
6747         struct btrfs_root *tree_root = fs_info->tree_root;
6748         struct btrfs_root *cur_root = NULL;
6749         struct btrfs_path path;
6750         struct btrfs_key key;
6751         struct extent_buffer *node;
6752         unsigned int ext_ref;
6753         int slot;
6754         int ret;
6755         int err = 0;
6756
6757         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6758
6759         btrfs_init_path(&path);
6760         key.objectid = BTRFS_FS_TREE_OBJECTID;
6761         key.offset = 0;
6762         key.type = BTRFS_ROOT_ITEM_KEY;
6763
6764         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6765         if (ret < 0) {
6766                 err = ret;
6767                 goto out;
6768         } else if (ret > 0) {
6769                 err = -ENOENT;
6770                 goto out;
6771         }
6772
6773         while (1) {
6774                 node = path.nodes[0];
6775                 slot = path.slots[0];
6776                 btrfs_item_key_to_cpu(node, &key, slot);
6777                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6778                         goto out;
6779                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6780                     fs_root_objectid(key.objectid)) {
6781                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6782                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6783                                                                        &key);
6784                         } else {
6785                                 key.offset = (u64)-1;
6786                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6787                         }
6788
6789                         if (IS_ERR(cur_root)) {
6790                                 error("Fail to read fs/subvol tree: %lld",
6791                                       key.objectid);
6792                                 err = -EIO;
6793                                 goto next;
6794                         }
6795
6796                         ret = check_fs_root_v2(cur_root, ext_ref);
6797                         err |= ret;
6798
6799                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6800                                 btrfs_free_fs_root(cur_root);
6801                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6802                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6803                         ret = check_root_ref(tree_root, &key, node, slot);
6804                         err |= ret;
6805                 }
6806 next:
6807                 ret = btrfs_next_item(tree_root, &path);
6808                 if (ret > 0)
6809                         goto out;
6810                 if (ret < 0) {
6811                         err = ret;
6812                         goto out;
6813                 }
6814         }
6815
6816 out:
6817         btrfs_release_path(&path);
6818         return err;
6819 }
6820
6821 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6822                           struct cache_tree *root_cache)
6823 {
6824         int ret;
6825
6826         if (!ctx.progress_enabled)
6827                 fprintf(stderr, "checking fs roots\n");
6828         if (check_mode == CHECK_MODE_LOWMEM)
6829                 ret = check_fs_roots_v2(fs_info);
6830         else
6831                 ret = check_fs_roots(fs_info, root_cache);
6832
6833         return ret;
6834 }
6835
6836 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6837 {
6838         struct extent_backref *back, *tmp;
6839         struct tree_backref *tback;
6840         struct data_backref *dback;
6841         u64 found = 0;
6842         int err = 0;
6843
6844         rbtree_postorder_for_each_entry_safe(back, tmp,
6845                                              &rec->backref_tree, node) {
6846                 if (!back->found_extent_tree) {
6847                         err = 1;
6848                         if (!print_errs)
6849                                 goto out;
6850                         if (back->is_data) {
6851                                 dback = to_data_backref(back);
6852                                 fprintf(stderr, "Data backref %llu %s %llu"
6853                                         " owner %llu offset %llu num_refs %lu"
6854                                         " not found in extent tree\n",
6855                                         (unsigned long long)rec->start,
6856                                         back->full_backref ?
6857                                         "parent" : "root",
6858                                         back->full_backref ?
6859                                         (unsigned long long)dback->parent:
6860                                         (unsigned long long)dback->root,
6861                                         (unsigned long long)dback->owner,
6862                                         (unsigned long long)dback->offset,
6863                                         (unsigned long)dback->num_refs);
6864                         } else {
6865                                 tback = to_tree_backref(back);
6866                                 fprintf(stderr, "Tree backref %llu parent %llu"
6867                                         " root %llu not found in extent tree\n",
6868                                         (unsigned long long)rec->start,
6869                                         (unsigned long long)tback->parent,
6870                                         (unsigned long long)tback->root);
6871                         }
6872                 }
6873                 if (!back->is_data && !back->found_ref) {
6874                         err = 1;
6875                         if (!print_errs)
6876                                 goto out;
6877                         tback = to_tree_backref(back);
6878                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6879                                 (unsigned long long)rec->start,
6880                                 back->full_backref ? "parent" : "root",
6881                                 back->full_backref ?
6882                                 (unsigned long long)tback->parent :
6883                                 (unsigned long long)tback->root, back);
6884                 }
6885                 if (back->is_data) {
6886                         dback = to_data_backref(back);
6887                         if (dback->found_ref != dback->num_refs) {
6888                                 err = 1;
6889                                 if (!print_errs)
6890                                         goto out;
6891                                 fprintf(stderr, "Incorrect local backref count"
6892                                         " on %llu %s %llu owner %llu"
6893                                         " offset %llu found %u wanted %u back %p\n",
6894                                         (unsigned long long)rec->start,
6895                                         back->full_backref ?
6896                                         "parent" : "root",
6897                                         back->full_backref ?
6898                                         (unsigned long long)dback->parent:
6899                                         (unsigned long long)dback->root,
6900                                         (unsigned long long)dback->owner,
6901                                         (unsigned long long)dback->offset,
6902                                         dback->found_ref, dback->num_refs, back);
6903                         }
6904                         if (dback->disk_bytenr != rec->start) {
6905                                 err = 1;
6906                                 if (!print_errs)
6907                                         goto out;
6908                                 fprintf(stderr, "Backref disk bytenr does not"
6909                                         " match extent record, bytenr=%llu, "
6910                                         "ref bytenr=%llu\n",
6911                                         (unsigned long long)rec->start,
6912                                         (unsigned long long)dback->disk_bytenr);
6913                         }
6914
6915                         if (dback->bytes != rec->nr) {
6916                                 err = 1;
6917                                 if (!print_errs)
6918                                         goto out;
6919                                 fprintf(stderr, "Backref bytes do not match "
6920                                         "extent backref, bytenr=%llu, ref "
6921                                         "bytes=%llu, backref bytes=%llu\n",
6922                                         (unsigned long long)rec->start,
6923                                         (unsigned long long)rec->nr,
6924                                         (unsigned long long)dback->bytes);
6925                         }
6926                 }
6927                 if (!back->is_data) {
6928                         found += 1;
6929                 } else {
6930                         dback = to_data_backref(back);
6931                         found += dback->found_ref;
6932                 }
6933         }
6934         if (found != rec->refs) {
6935                 err = 1;
6936                 if (!print_errs)
6937                         goto out;
6938                 fprintf(stderr, "Incorrect global backref count "
6939                         "on %llu found %llu wanted %llu\n",
6940                         (unsigned long long)rec->start,
6941                         (unsigned long long)found,
6942                         (unsigned long long)rec->refs);
6943         }
6944 out:
6945         return err;
6946 }
6947
6948 static void __free_one_backref(struct rb_node *node)
6949 {
6950         struct extent_backref *back = rb_node_to_extent_backref(node);
6951
6952         free(back);
6953 }
6954
6955 static void free_all_extent_backrefs(struct extent_record *rec)
6956 {
6957         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6958 }
6959
6960 static void free_extent_record_cache(struct cache_tree *extent_cache)
6961 {
6962         struct cache_extent *cache;
6963         struct extent_record *rec;
6964
6965         while (1) {
6966                 cache = first_cache_extent(extent_cache);
6967                 if (!cache)
6968                         break;
6969                 rec = container_of(cache, struct extent_record, cache);
6970                 remove_cache_extent(extent_cache, cache);
6971                 free_all_extent_backrefs(rec);
6972                 free(rec);
6973         }
6974 }
6975
6976 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6977                                  struct extent_record *rec)
6978 {
6979         if (rec->content_checked && rec->owner_ref_checked &&
6980             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6981             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6982             !rec->bad_full_backref && !rec->crossing_stripes &&
6983             !rec->wrong_chunk_type) {
6984                 remove_cache_extent(extent_cache, &rec->cache);
6985                 free_all_extent_backrefs(rec);
6986                 list_del_init(&rec->list);
6987                 free(rec);
6988         }
6989         return 0;
6990 }
6991
6992 static int check_owner_ref(struct btrfs_root *root,
6993                             struct extent_record *rec,
6994                             struct extent_buffer *buf)
6995 {
6996         struct extent_backref *node, *tmp;
6997         struct tree_backref *back;
6998         struct btrfs_root *ref_root;
6999         struct btrfs_key key;
7000         struct btrfs_path path;
7001         struct extent_buffer *parent;
7002         int level;
7003         int found = 0;
7004         int ret;
7005
7006         rbtree_postorder_for_each_entry_safe(node, tmp,
7007                                              &rec->backref_tree, node) {
7008                 if (node->is_data)
7009                         continue;
7010                 if (!node->found_ref)
7011                         continue;
7012                 if (node->full_backref)
7013                         continue;
7014                 back = to_tree_backref(node);
7015                 if (btrfs_header_owner(buf) == back->root)
7016                         return 0;
7017         }
7018         BUG_ON(rec->is_root);
7019
7020         /* try to find the block by search corresponding fs tree */
7021         key.objectid = btrfs_header_owner(buf);
7022         key.type = BTRFS_ROOT_ITEM_KEY;
7023         key.offset = (u64)-1;
7024
7025         ref_root = btrfs_read_fs_root(root->fs_info, &key);
7026         if (IS_ERR(ref_root))
7027                 return 1;
7028
7029         level = btrfs_header_level(buf);
7030         if (level == 0)
7031                 btrfs_item_key_to_cpu(buf, &key, 0);
7032         else
7033                 btrfs_node_key_to_cpu(buf, &key, 0);
7034
7035         btrfs_init_path(&path);
7036         path.lowest_level = level + 1;
7037         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
7038         if (ret < 0)
7039                 return 0;
7040
7041         parent = path.nodes[level + 1];
7042         if (parent && buf->start == btrfs_node_blockptr(parent,
7043                                                         path.slots[level + 1]))
7044                 found = 1;
7045
7046         btrfs_release_path(&path);
7047         return found ? 0 : 1;
7048 }
7049
7050 static int is_extent_tree_record(struct extent_record *rec)
7051 {
7052         struct extent_backref *node, *tmp;
7053         struct tree_backref *back;
7054         int is_extent = 0;
7055
7056         rbtree_postorder_for_each_entry_safe(node, tmp,
7057                                              &rec->backref_tree, node) {
7058                 if (node->is_data)
7059                         return 0;
7060                 back = to_tree_backref(node);
7061                 if (node->full_backref)
7062                         return 0;
7063                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
7064                         is_extent = 1;
7065         }
7066         return is_extent;
7067 }
7068
7069
7070 static int record_bad_block_io(struct btrfs_fs_info *info,
7071                                struct cache_tree *extent_cache,
7072                                u64 start, u64 len)
7073 {
7074         struct extent_record *rec;
7075         struct cache_extent *cache;
7076         struct btrfs_key key;
7077
7078         cache = lookup_cache_extent(extent_cache, start, len);
7079         if (!cache)
7080                 return 0;
7081
7082         rec = container_of(cache, struct extent_record, cache);
7083         if (!is_extent_tree_record(rec))
7084                 return 0;
7085
7086         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
7087         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
7088 }
7089
7090 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
7091                        struct extent_buffer *buf, int slot)
7092 {
7093         if (btrfs_header_level(buf)) {
7094                 struct btrfs_key_ptr ptr1, ptr2;
7095
7096                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
7097                                    sizeof(struct btrfs_key_ptr));
7098                 read_extent_buffer(buf, &ptr2,
7099                                    btrfs_node_key_ptr_offset(slot + 1),
7100                                    sizeof(struct btrfs_key_ptr));
7101                 write_extent_buffer(buf, &ptr1,
7102                                     btrfs_node_key_ptr_offset(slot + 1),
7103                                     sizeof(struct btrfs_key_ptr));
7104                 write_extent_buffer(buf, &ptr2,
7105                                     btrfs_node_key_ptr_offset(slot),
7106                                     sizeof(struct btrfs_key_ptr));
7107                 if (slot == 0) {
7108                         struct btrfs_disk_key key;
7109                         btrfs_node_key(buf, &key, 0);
7110                         btrfs_fixup_low_keys(root, path, &key,
7111                                              btrfs_header_level(buf) + 1);
7112                 }
7113         } else {
7114                 struct btrfs_item *item1, *item2;
7115                 struct btrfs_key k1, k2;
7116                 char *item1_data, *item2_data;
7117                 u32 item1_offset, item2_offset, item1_size, item2_size;
7118
7119                 item1 = btrfs_item_nr(slot);
7120                 item2 = btrfs_item_nr(slot + 1);
7121                 btrfs_item_key_to_cpu(buf, &k1, slot);
7122                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
7123                 item1_offset = btrfs_item_offset(buf, item1);
7124                 item2_offset = btrfs_item_offset(buf, item2);
7125                 item1_size = btrfs_item_size(buf, item1);
7126                 item2_size = btrfs_item_size(buf, item2);
7127
7128                 item1_data = malloc(item1_size);
7129                 if (!item1_data)
7130                         return -ENOMEM;
7131                 item2_data = malloc(item2_size);
7132                 if (!item2_data) {
7133                         free(item1_data);
7134                         return -ENOMEM;
7135                 }
7136
7137                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
7138                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
7139
7140                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
7141                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
7142                 free(item1_data);
7143                 free(item2_data);
7144
7145                 btrfs_set_item_offset(buf, item1, item2_offset);
7146                 btrfs_set_item_offset(buf, item2, item1_offset);
7147                 btrfs_set_item_size(buf, item1, item2_size);
7148                 btrfs_set_item_size(buf, item2, item1_size);
7149
7150                 path->slots[0] = slot;
7151                 btrfs_set_item_key_unsafe(root, path, &k2);
7152                 path->slots[0] = slot + 1;
7153                 btrfs_set_item_key_unsafe(root, path, &k1);
7154         }
7155         return 0;
7156 }
7157
7158 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
7159 {
7160         struct extent_buffer *buf;
7161         struct btrfs_key k1, k2;
7162         int i;
7163         int level = path->lowest_level;
7164         int ret = -EIO;
7165
7166         buf = path->nodes[level];
7167         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
7168                 if (level) {
7169                         btrfs_node_key_to_cpu(buf, &k1, i);
7170                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
7171                 } else {
7172                         btrfs_item_key_to_cpu(buf, &k1, i);
7173                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
7174                 }
7175                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
7176                         continue;
7177                 ret = swap_values(root, path, buf, i);
7178                 if (ret)
7179                         break;
7180                 btrfs_mark_buffer_dirty(buf);
7181                 i = 0;
7182         }
7183         return ret;
7184 }
7185
7186 static int delete_bogus_item(struct btrfs_root *root,
7187                              struct btrfs_path *path,
7188                              struct extent_buffer *buf, int slot)
7189 {
7190         struct btrfs_key key;
7191         int nritems = btrfs_header_nritems(buf);
7192
7193         btrfs_item_key_to_cpu(buf, &key, slot);
7194
7195         /* These are all the keys we can deal with missing. */
7196         if (key.type != BTRFS_DIR_INDEX_KEY &&
7197             key.type != BTRFS_EXTENT_ITEM_KEY &&
7198             key.type != BTRFS_METADATA_ITEM_KEY &&
7199             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7200             key.type != BTRFS_EXTENT_DATA_REF_KEY)
7201                 return -1;
7202
7203         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
7204                (unsigned long long)key.objectid, key.type,
7205                (unsigned long long)key.offset, slot, buf->start);
7206         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
7207                               btrfs_item_nr_offset(slot + 1),
7208                               sizeof(struct btrfs_item) *
7209                               (nritems - slot - 1));
7210         btrfs_set_header_nritems(buf, nritems - 1);
7211         if (slot == 0) {
7212                 struct btrfs_disk_key disk_key;
7213
7214                 btrfs_item_key(buf, &disk_key, 0);
7215                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
7216         }
7217         btrfs_mark_buffer_dirty(buf);
7218         return 0;
7219 }
7220
7221 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
7222 {
7223         struct extent_buffer *buf;
7224         int i;
7225         int ret = 0;
7226
7227         /* We should only get this for leaves */
7228         BUG_ON(path->lowest_level);
7229         buf = path->nodes[0];
7230 again:
7231         for (i = 0; i < btrfs_header_nritems(buf); i++) {
7232                 unsigned int shift = 0, offset;
7233
7234                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
7235                     BTRFS_LEAF_DATA_SIZE(root)) {
7236                         if (btrfs_item_end_nr(buf, i) >
7237                             BTRFS_LEAF_DATA_SIZE(root)) {
7238                                 ret = delete_bogus_item(root, path, buf, i);
7239                                 if (!ret)
7240                                         goto again;
7241                                 fprintf(stderr, "item is off the end of the "
7242                                         "leaf, can't fix\n");
7243                                 ret = -EIO;
7244                                 break;
7245                         }
7246                         shift = BTRFS_LEAF_DATA_SIZE(root) -
7247                                 btrfs_item_end_nr(buf, i);
7248                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
7249                            btrfs_item_offset_nr(buf, i - 1)) {
7250                         if (btrfs_item_end_nr(buf, i) >
7251                             btrfs_item_offset_nr(buf, i - 1)) {
7252                                 ret = delete_bogus_item(root, path, buf, i);
7253                                 if (!ret)
7254                                         goto again;
7255                                 fprintf(stderr, "items overlap, can't fix\n");
7256                                 ret = -EIO;
7257                                 break;
7258                         }
7259                         shift = btrfs_item_offset_nr(buf, i - 1) -
7260                                 btrfs_item_end_nr(buf, i);
7261                 }
7262                 if (!shift)
7263                         continue;
7264
7265                 printf("Shifting item nr %d by %u bytes in block %llu\n",
7266                        i, shift, (unsigned long long)buf->start);
7267                 offset = btrfs_item_offset_nr(buf, i);
7268                 memmove_extent_buffer(buf,
7269                                       btrfs_leaf_data(buf) + offset + shift,
7270                                       btrfs_leaf_data(buf) + offset,
7271                                       btrfs_item_size_nr(buf, i));
7272                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
7273                                       offset + shift);
7274                 btrfs_mark_buffer_dirty(buf);
7275         }
7276
7277         /*
7278          * We may have moved things, in which case we want to exit so we don't
7279          * write those changes out.  Once we have proper abort functionality in
7280          * progs this can be changed to something nicer.
7281          */
7282         BUG_ON(ret);
7283         return ret;
7284 }
7285
7286 /*
7287  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
7288  * then just return -EIO.
7289  */
7290 static int try_to_fix_bad_block(struct btrfs_root *root,
7291                                 struct extent_buffer *buf,
7292                                 enum btrfs_tree_block_status status)
7293 {
7294         struct btrfs_trans_handle *trans;
7295         struct ulist *roots;
7296         struct ulist_node *node;
7297         struct btrfs_root *search_root;
7298         struct btrfs_path path;
7299         struct ulist_iterator iter;
7300         struct btrfs_key root_key, key;
7301         int ret;
7302
7303         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
7304             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7305                 return -EIO;
7306
7307         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
7308         if (ret)
7309                 return -EIO;
7310
7311         btrfs_init_path(&path);
7312         ULIST_ITER_INIT(&iter);
7313         while ((node = ulist_next(roots, &iter))) {
7314                 root_key.objectid = node->val;
7315                 root_key.type = BTRFS_ROOT_ITEM_KEY;
7316                 root_key.offset = (u64)-1;
7317
7318                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
7319                 if (IS_ERR(root)) {
7320                         ret = -EIO;
7321                         break;
7322                 }
7323
7324
7325                 trans = btrfs_start_transaction(search_root, 0);
7326                 if (IS_ERR(trans)) {
7327                         ret = PTR_ERR(trans);
7328                         break;
7329                 }
7330
7331                 path.lowest_level = btrfs_header_level(buf);
7332                 path.skip_check_block = 1;
7333                 if (path.lowest_level)
7334                         btrfs_node_key_to_cpu(buf, &key, 0);
7335                 else
7336                         btrfs_item_key_to_cpu(buf, &key, 0);
7337                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7338                 if (ret) {
7339                         ret = -EIO;
7340                         btrfs_commit_transaction(trans, search_root);
7341                         break;
7342                 }
7343                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7344                         ret = fix_key_order(search_root, &path);
7345                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7346                         ret = fix_item_offset(search_root, &path);
7347                 if (ret) {
7348                         btrfs_commit_transaction(trans, search_root);
7349                         break;
7350                 }
7351                 btrfs_release_path(&path);
7352                 btrfs_commit_transaction(trans, search_root);
7353         }
7354         ulist_free(roots);
7355         btrfs_release_path(&path);
7356         return ret;
7357 }
7358
7359 static int check_block(struct btrfs_root *root,
7360                        struct cache_tree *extent_cache,
7361                        struct extent_buffer *buf, u64 flags)
7362 {
7363         struct extent_record *rec;
7364         struct cache_extent *cache;
7365         struct btrfs_key key;
7366         enum btrfs_tree_block_status status;
7367         int ret = 0;
7368         int level;
7369
7370         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7371         if (!cache)
7372                 return 1;
7373         rec = container_of(cache, struct extent_record, cache);
7374         rec->generation = btrfs_header_generation(buf);
7375
7376         level = btrfs_header_level(buf);
7377         if (btrfs_header_nritems(buf) > 0) {
7378
7379                 if (level == 0)
7380                         btrfs_item_key_to_cpu(buf, &key, 0);
7381                 else
7382                         btrfs_node_key_to_cpu(buf, &key, 0);
7383
7384                 rec->info_objectid = key.objectid;
7385         }
7386         rec->info_level = level;
7387
7388         if (btrfs_is_leaf(buf))
7389                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7390         else
7391                 status = btrfs_check_node(root, &rec->parent_key, buf);
7392
7393         if (status != BTRFS_TREE_BLOCK_CLEAN) {
7394                 if (repair)
7395                         status = try_to_fix_bad_block(root, buf, status);
7396                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7397                         ret = -EIO;
7398                         fprintf(stderr, "bad block %llu\n",
7399                                 (unsigned long long)buf->start);
7400                 } else {
7401                         /*
7402                          * Signal to callers we need to start the scan over
7403                          * again since we'll have cowed blocks.
7404                          */
7405                         ret = -EAGAIN;
7406                 }
7407         } else {
7408                 rec->content_checked = 1;
7409                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7410                         rec->owner_ref_checked = 1;
7411                 else {
7412                         ret = check_owner_ref(root, rec, buf);
7413                         if (!ret)
7414                                 rec->owner_ref_checked = 1;
7415                 }
7416         }
7417         if (!ret)
7418                 maybe_free_extent_rec(extent_cache, rec);
7419         return ret;
7420 }
7421
7422 #if 0
7423 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7424                                                 u64 parent, u64 root)
7425 {
7426         struct list_head *cur = rec->backrefs.next;
7427         struct extent_backref *node;
7428         struct tree_backref *back;
7429
7430         while(cur != &rec->backrefs) {
7431                 node = to_extent_backref(cur);
7432                 cur = cur->next;
7433                 if (node->is_data)
7434                         continue;
7435                 back = to_tree_backref(node);
7436                 if (parent > 0) {
7437                         if (!node->full_backref)
7438                                 continue;
7439                         if (parent == back->parent)
7440                                 return back;
7441                 } else {
7442                         if (node->full_backref)
7443                                 continue;
7444                         if (back->root == root)
7445                                 return back;
7446                 }
7447         }
7448         return NULL;
7449 }
7450 #endif
7451
7452 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7453                                                 u64 parent, u64 root)
7454 {
7455         struct tree_backref *ref = malloc(sizeof(*ref));
7456
7457         if (!ref)
7458                 return NULL;
7459         memset(&ref->node, 0, sizeof(ref->node));
7460         if (parent > 0) {
7461                 ref->parent = parent;
7462                 ref->node.full_backref = 1;
7463         } else {
7464                 ref->root = root;
7465                 ref->node.full_backref = 0;
7466         }
7467
7468         return ref;
7469 }
7470
7471 #if 0
7472 static struct data_backref *find_data_backref(struct extent_record *rec,
7473                                                 u64 parent, u64 root,
7474                                                 u64 owner, u64 offset,
7475                                                 int found_ref,
7476                                                 u64 disk_bytenr, u64 bytes)
7477 {
7478         struct list_head *cur = rec->backrefs.next;
7479         struct extent_backref *node;
7480         struct data_backref *back;
7481
7482         while(cur != &rec->backrefs) {
7483                 node = to_extent_backref(cur);
7484                 cur = cur->next;
7485                 if (!node->is_data)
7486                         continue;
7487                 back = to_data_backref(node);
7488                 if (parent > 0) {
7489                         if (!node->full_backref)
7490                                 continue;
7491                         if (parent == back->parent)
7492                                 return back;
7493                 } else {
7494                         if (node->full_backref)
7495                                 continue;
7496                         if (back->root == root && back->owner == owner &&
7497                             back->offset == offset) {
7498                                 if (found_ref && node->found_ref &&
7499                                     (back->bytes != bytes ||
7500                                     back->disk_bytenr != disk_bytenr))
7501                                         continue;
7502                                 return back;
7503                         }
7504                 }
7505         }
7506         return NULL;
7507 }
7508 #endif
7509
7510 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7511                                                 u64 parent, u64 root,
7512                                                 u64 owner, u64 offset,
7513                                                 u64 max_size)
7514 {
7515         struct data_backref *ref = malloc(sizeof(*ref));
7516
7517         if (!ref)
7518                 return NULL;
7519         memset(&ref->node, 0, sizeof(ref->node));
7520         ref->node.is_data = 1;
7521
7522         if (parent > 0) {
7523                 ref->parent = parent;
7524                 ref->owner = 0;
7525                 ref->offset = 0;
7526                 ref->node.full_backref = 1;
7527         } else {
7528                 ref->root = root;
7529                 ref->owner = owner;
7530                 ref->offset = offset;
7531                 ref->node.full_backref = 0;
7532         }
7533         ref->bytes = max_size;
7534         ref->found_ref = 0;
7535         ref->num_refs = 0;
7536         if (max_size > rec->max_size)
7537                 rec->max_size = max_size;
7538         return ref;
7539 }
7540
7541 /* Check if the type of extent matches with its chunk */
7542 static void check_extent_type(struct extent_record *rec)
7543 {
7544         struct btrfs_block_group_cache *bg_cache;
7545
7546         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7547         if (!bg_cache)
7548                 return;
7549
7550         /* data extent, check chunk directly*/
7551         if (!rec->metadata) {
7552                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7553                         rec->wrong_chunk_type = 1;
7554                 return;
7555         }
7556
7557         /* metadata extent, check the obvious case first */
7558         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7559                                  BTRFS_BLOCK_GROUP_METADATA))) {
7560                 rec->wrong_chunk_type = 1;
7561                 return;
7562         }
7563
7564         /*
7565          * Check SYSTEM extent, as it's also marked as metadata, we can only
7566          * make sure it's a SYSTEM extent by its backref
7567          */
7568         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7569                 struct extent_backref *node;
7570                 struct tree_backref *tback;
7571                 u64 bg_type;
7572
7573                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7574                 if (node->is_data) {
7575                         /* tree block shouldn't have data backref */
7576                         rec->wrong_chunk_type = 1;
7577                         return;
7578                 }
7579                 tback = container_of(node, struct tree_backref, node);
7580
7581                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7582                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7583                 else
7584                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
7585                 if (!(bg_cache->flags & bg_type))
7586                         rec->wrong_chunk_type = 1;
7587         }
7588 }
7589
7590 /*
7591  * Allocate a new extent record, fill default values from @tmpl and insert int
7592  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7593  * the cache, otherwise it fails.
7594  */
7595 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7596                 struct extent_record *tmpl)
7597 {
7598         struct extent_record *rec;
7599         int ret = 0;
7600
7601         BUG_ON(tmpl->max_size == 0);
7602         rec = malloc(sizeof(*rec));
7603         if (!rec)
7604                 return -ENOMEM;
7605         rec->start = tmpl->start;
7606         rec->max_size = tmpl->max_size;
7607         rec->nr = max(tmpl->nr, tmpl->max_size);
7608         rec->found_rec = tmpl->found_rec;
7609         rec->content_checked = tmpl->content_checked;
7610         rec->owner_ref_checked = tmpl->owner_ref_checked;
7611         rec->num_duplicates = 0;
7612         rec->metadata = tmpl->metadata;
7613         rec->flag_block_full_backref = FLAG_UNSET;
7614         rec->bad_full_backref = 0;
7615         rec->crossing_stripes = 0;
7616         rec->wrong_chunk_type = 0;
7617         rec->is_root = tmpl->is_root;
7618         rec->refs = tmpl->refs;
7619         rec->extent_item_refs = tmpl->extent_item_refs;
7620         rec->parent_generation = tmpl->parent_generation;
7621         INIT_LIST_HEAD(&rec->backrefs);
7622         INIT_LIST_HEAD(&rec->dups);
7623         INIT_LIST_HEAD(&rec->list);
7624         rec->backref_tree = RB_ROOT;
7625         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7626         rec->cache.start = tmpl->start;
7627         rec->cache.size = tmpl->nr;
7628         ret = insert_cache_extent(extent_cache, &rec->cache);
7629         if (ret) {
7630                 free(rec);
7631                 return ret;
7632         }
7633         bytes_used += rec->nr;
7634
7635         if (tmpl->metadata)
7636                 rec->crossing_stripes = check_crossing_stripes(global_info,
7637                                 rec->start, global_info->nodesize);
7638         check_extent_type(rec);
7639         return ret;
7640 }
7641
7642 /*
7643  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7644  * some are hints:
7645  * - refs              - if found, increase refs
7646  * - is_root           - if found, set
7647  * - content_checked   - if found, set
7648  * - owner_ref_checked - if found, set
7649  *
7650  * If not found, create a new one, initialize and insert.
7651  */
7652 static int add_extent_rec(struct cache_tree *extent_cache,
7653                 struct extent_record *tmpl)
7654 {
7655         struct extent_record *rec;
7656         struct cache_extent *cache;
7657         int ret = 0;
7658         int dup = 0;
7659
7660         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7661         if (cache) {
7662                 rec = container_of(cache, struct extent_record, cache);
7663                 if (tmpl->refs)
7664                         rec->refs++;
7665                 if (rec->nr == 1)
7666                         rec->nr = max(tmpl->nr, tmpl->max_size);
7667
7668                 /*
7669                  * We need to make sure to reset nr to whatever the extent
7670                  * record says was the real size, this way we can compare it to
7671                  * the backrefs.
7672                  */
7673                 if (tmpl->found_rec) {
7674                         if (tmpl->start != rec->start || rec->found_rec) {
7675                                 struct extent_record *tmp;
7676
7677                                 dup = 1;
7678                                 if (list_empty(&rec->list))
7679                                         list_add_tail(&rec->list,
7680                                                       &duplicate_extents);
7681
7682                                 /*
7683                                  * We have to do this song and dance in case we
7684                                  * find an extent record that falls inside of
7685                                  * our current extent record but does not have
7686                                  * the same objectid.
7687                                  */
7688                                 tmp = malloc(sizeof(*tmp));
7689                                 if (!tmp)
7690                                         return -ENOMEM;
7691                                 tmp->start = tmpl->start;
7692                                 tmp->max_size = tmpl->max_size;
7693                                 tmp->nr = tmpl->nr;
7694                                 tmp->found_rec = 1;
7695                                 tmp->metadata = tmpl->metadata;
7696                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7697                                 INIT_LIST_HEAD(&tmp->list);
7698                                 list_add_tail(&tmp->list, &rec->dups);
7699                                 rec->num_duplicates++;
7700                         } else {
7701                                 rec->nr = tmpl->nr;
7702                                 rec->found_rec = 1;
7703                         }
7704                 }
7705
7706                 if (tmpl->extent_item_refs && !dup) {
7707                         if (rec->extent_item_refs) {
7708                                 fprintf(stderr, "block %llu rec "
7709                                         "extent_item_refs %llu, passed %llu\n",
7710                                         (unsigned long long)tmpl->start,
7711                                         (unsigned long long)
7712                                                         rec->extent_item_refs,
7713                                         (unsigned long long)tmpl->extent_item_refs);
7714                         }
7715                         rec->extent_item_refs = tmpl->extent_item_refs;
7716                 }
7717                 if (tmpl->is_root)
7718                         rec->is_root = 1;
7719                 if (tmpl->content_checked)
7720                         rec->content_checked = 1;
7721                 if (tmpl->owner_ref_checked)
7722                         rec->owner_ref_checked = 1;
7723                 memcpy(&rec->parent_key, &tmpl->parent_key,
7724                                 sizeof(tmpl->parent_key));
7725                 if (tmpl->parent_generation)
7726                         rec->parent_generation = tmpl->parent_generation;
7727                 if (rec->max_size < tmpl->max_size)
7728                         rec->max_size = tmpl->max_size;
7729
7730                 /*
7731                  * A metadata extent can't cross stripe_len boundary, otherwise
7732                  * kernel scrub won't be able to handle it.
7733                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7734                  * it.
7735                  */
7736                 if (tmpl->metadata)
7737                         rec->crossing_stripes = check_crossing_stripes(
7738                                         global_info, rec->start,
7739                                         global_info->nodesize);
7740                 check_extent_type(rec);
7741                 maybe_free_extent_rec(extent_cache, rec);
7742                 return ret;
7743         }
7744
7745         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7746
7747         return ret;
7748 }
7749
7750 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7751                             u64 parent, u64 root, int found_ref)
7752 {
7753         struct extent_record *rec;
7754         struct tree_backref *back;
7755         struct cache_extent *cache;
7756         int ret;
7757         bool insert = false;
7758
7759         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7760         if (!cache) {
7761                 struct extent_record tmpl;
7762
7763                 memset(&tmpl, 0, sizeof(tmpl));
7764                 tmpl.start = bytenr;
7765                 tmpl.nr = 1;
7766                 tmpl.metadata = 1;
7767                 tmpl.max_size = 1;
7768
7769                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7770                 if (ret)
7771                         return ret;
7772
7773                 /* really a bug in cache_extent implement now */
7774                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7775                 if (!cache)
7776                         return -ENOENT;
7777         }
7778
7779         rec = container_of(cache, struct extent_record, cache);
7780         if (rec->start != bytenr) {
7781                 /*
7782                  * Several cause, from unaligned bytenr to over lapping extents
7783                  */
7784                 return -EEXIST;
7785         }
7786
7787         back = find_tree_backref(rec, parent, root);
7788         if (!back) {
7789                 back = alloc_tree_backref(rec, parent, root);
7790                 if (!back)
7791                         return -ENOMEM;
7792                 insert = true;
7793         }
7794
7795         if (found_ref) {
7796                 if (back->node.found_ref) {
7797                         fprintf(stderr, "Extent back ref already exists "
7798                                 "for %llu parent %llu root %llu \n",
7799                                 (unsigned long long)bytenr,
7800                                 (unsigned long long)parent,
7801                                 (unsigned long long)root);
7802                 }
7803                 back->node.found_ref = 1;
7804         } else {
7805                 if (back->node.found_extent_tree) {
7806                         fprintf(stderr, "Extent back ref already exists "
7807                                 "for %llu parent %llu root %llu \n",
7808                                 (unsigned long long)bytenr,
7809                                 (unsigned long long)parent,
7810                                 (unsigned long long)root);
7811                 }
7812                 back->node.found_extent_tree = 1;
7813         }
7814         if (insert)
7815                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7816                         compare_extent_backref));
7817         check_extent_type(rec);
7818         maybe_free_extent_rec(extent_cache, rec);
7819         return 0;
7820 }
7821
7822 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7823                             u64 parent, u64 root, u64 owner, u64 offset,
7824                             u32 num_refs, int found_ref, u64 max_size)
7825 {
7826         struct extent_record *rec;
7827         struct data_backref *back;
7828         struct cache_extent *cache;
7829         int ret;
7830         bool insert = false;
7831
7832         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7833         if (!cache) {
7834                 struct extent_record tmpl;
7835
7836                 memset(&tmpl, 0, sizeof(tmpl));
7837                 tmpl.start = bytenr;
7838                 tmpl.nr = 1;
7839                 tmpl.max_size = max_size;
7840
7841                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7842                 if (ret)
7843                         return ret;
7844
7845                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7846                 if (!cache)
7847                         abort();
7848         }
7849
7850         rec = container_of(cache, struct extent_record, cache);
7851         if (rec->max_size < max_size)
7852                 rec->max_size = max_size;
7853
7854         /*
7855          * If found_ref is set then max_size is the real size and must match the
7856          * existing refs.  So if we have already found a ref then we need to
7857          * make sure that this ref matches the existing one, otherwise we need
7858          * to add a new backref so we can notice that the backrefs don't match
7859          * and we need to figure out who is telling the truth.  This is to
7860          * account for that awful fsync bug I introduced where we'd end up with
7861          * a btrfs_file_extent_item that would have its length include multiple
7862          * prealloc extents or point inside of a prealloc extent.
7863          */
7864         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7865                                  bytenr, max_size);
7866         if (!back) {
7867                 back = alloc_data_backref(rec, parent, root, owner, offset,
7868                                           max_size);
7869                 BUG_ON(!back);
7870                 insert = true;
7871         }
7872
7873         if (found_ref) {
7874                 BUG_ON(num_refs != 1);
7875                 if (back->node.found_ref)
7876                         BUG_ON(back->bytes != max_size);
7877                 back->node.found_ref = 1;
7878                 back->found_ref += 1;
7879                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7880                         back->bytes = max_size;
7881                         back->disk_bytenr = bytenr;
7882
7883                         /* Need to reinsert if not already in the tree */
7884                         if (!insert) {
7885                                 rb_erase(&back->node.node, &rec->backref_tree);
7886                                 insert = true;
7887                         }
7888                 }
7889                 rec->refs += 1;
7890                 rec->content_checked = 1;
7891                 rec->owner_ref_checked = 1;
7892         } else {
7893                 if (back->node.found_extent_tree) {
7894                         fprintf(stderr, "Extent back ref already exists "
7895                                 "for %llu parent %llu root %llu "
7896                                 "owner %llu offset %llu num_refs %lu\n",
7897                                 (unsigned long long)bytenr,
7898                                 (unsigned long long)parent,
7899                                 (unsigned long long)root,
7900                                 (unsigned long long)owner,
7901                                 (unsigned long long)offset,
7902                                 (unsigned long)num_refs);
7903                 }
7904                 back->num_refs = num_refs;
7905                 back->node.found_extent_tree = 1;
7906         }
7907         if (insert)
7908                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7909                         compare_extent_backref));
7910
7911         maybe_free_extent_rec(extent_cache, rec);
7912         return 0;
7913 }
7914
7915 static int add_pending(struct cache_tree *pending,
7916                        struct cache_tree *seen, u64 bytenr, u32 size)
7917 {
7918         int ret;
7919         ret = add_cache_extent(seen, bytenr, size);
7920         if (ret)
7921                 return ret;
7922         add_cache_extent(pending, bytenr, size);
7923         return 0;
7924 }
7925
7926 static int pick_next_pending(struct cache_tree *pending,
7927                         struct cache_tree *reada,
7928                         struct cache_tree *nodes,
7929                         u64 last, struct block_info *bits, int bits_nr,
7930                         int *reada_bits)
7931 {
7932         unsigned long node_start = last;
7933         struct cache_extent *cache;
7934         int ret;
7935
7936         cache = search_cache_extent(reada, 0);
7937         if (cache) {
7938                 bits[0].start = cache->start;
7939                 bits[0].size = cache->size;
7940                 *reada_bits = 1;
7941                 return 1;
7942         }
7943         *reada_bits = 0;
7944         if (node_start > 32768)
7945                 node_start -= 32768;
7946
7947         cache = search_cache_extent(nodes, node_start);
7948         if (!cache)
7949                 cache = search_cache_extent(nodes, 0);
7950
7951         if (!cache) {
7952                  cache = search_cache_extent(pending, 0);
7953                  if (!cache)
7954                          return 0;
7955                  ret = 0;
7956                  do {
7957                          bits[ret].start = cache->start;
7958                          bits[ret].size = cache->size;
7959                          cache = next_cache_extent(cache);
7960                          ret++;
7961                  } while (cache && ret < bits_nr);
7962                  return ret;
7963         }
7964
7965         ret = 0;
7966         do {
7967                 bits[ret].start = cache->start;
7968                 bits[ret].size = cache->size;
7969                 cache = next_cache_extent(cache);
7970                 ret++;
7971         } while (cache && ret < bits_nr);
7972
7973         if (bits_nr - ret > 8) {
7974                 u64 lookup = bits[0].start + bits[0].size;
7975                 struct cache_extent *next;
7976                 next = search_cache_extent(pending, lookup);
7977                 while(next) {
7978                         if (next->start - lookup > 32768)
7979                                 break;
7980                         bits[ret].start = next->start;
7981                         bits[ret].size = next->size;
7982                         lookup = next->start + next->size;
7983                         ret++;
7984                         if (ret == bits_nr)
7985                                 break;
7986                         next = next_cache_extent(next);
7987                         if (!next)
7988                                 break;
7989                 }
7990         }
7991         return ret;
7992 }
7993
7994 static void free_chunk_record(struct cache_extent *cache)
7995 {
7996         struct chunk_record *rec;
7997
7998         rec = container_of(cache, struct chunk_record, cache);
7999         list_del_init(&rec->list);
8000         list_del_init(&rec->dextents);
8001         free(rec);
8002 }
8003
8004 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
8005 {
8006         cache_tree_free_extents(chunk_cache, free_chunk_record);
8007 }
8008
8009 static void free_device_record(struct rb_node *node)
8010 {
8011         struct device_record *rec;
8012
8013         rec = container_of(node, struct device_record, node);
8014         free(rec);
8015 }
8016
8017 FREE_RB_BASED_TREE(device_cache, free_device_record);
8018
8019 int insert_block_group_record(struct block_group_tree *tree,
8020                               struct block_group_record *bg_rec)
8021 {
8022         int ret;
8023
8024         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
8025         if (ret)
8026                 return ret;
8027
8028         list_add_tail(&bg_rec->list, &tree->block_groups);
8029         return 0;
8030 }
8031
8032 static void free_block_group_record(struct cache_extent *cache)
8033 {
8034         struct block_group_record *rec;
8035
8036         rec = container_of(cache, struct block_group_record, cache);
8037         list_del_init(&rec->list);
8038         free(rec);
8039 }
8040
8041 void free_block_group_tree(struct block_group_tree *tree)
8042 {
8043         cache_tree_free_extents(&tree->tree, free_block_group_record);
8044 }
8045
8046 int insert_device_extent_record(struct device_extent_tree *tree,
8047                                 struct device_extent_record *de_rec)
8048 {
8049         int ret;
8050
8051         /*
8052          * Device extent is a bit different from the other extents, because
8053          * the extents which belong to the different devices may have the
8054          * same start and size, so we need use the special extent cache
8055          * search/insert functions.
8056          */
8057         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
8058         if (ret)
8059                 return ret;
8060
8061         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
8062         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
8063         return 0;
8064 }
8065
8066 static void free_device_extent_record(struct cache_extent *cache)
8067 {
8068         struct device_extent_record *rec;
8069
8070         rec = container_of(cache, struct device_extent_record, cache);
8071         if (!list_empty(&rec->chunk_list))
8072                 list_del_init(&rec->chunk_list);
8073         if (!list_empty(&rec->device_list))
8074                 list_del_init(&rec->device_list);
8075         free(rec);
8076 }
8077
8078 void free_device_extent_tree(struct device_extent_tree *tree)
8079 {
8080         cache_tree_free_extents(&tree->tree, free_device_extent_record);
8081 }
8082
8083 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8084 static int process_extent_ref_v0(struct cache_tree *extent_cache,
8085                                  struct extent_buffer *leaf, int slot)
8086 {
8087         struct btrfs_extent_ref_v0 *ref0;
8088         struct btrfs_key key;
8089         int ret;
8090
8091         btrfs_item_key_to_cpu(leaf, &key, slot);
8092         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
8093         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
8094                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
8095                                 0, 0);
8096         } else {
8097                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
8098                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
8099         }
8100         return ret;
8101 }
8102 #endif
8103
8104 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
8105                                             struct btrfs_key *key,
8106                                             int slot)
8107 {
8108         struct btrfs_chunk *ptr;
8109         struct chunk_record *rec;
8110         int num_stripes, i;
8111
8112         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
8113         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
8114
8115         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
8116         if (!rec) {
8117                 fprintf(stderr, "memory allocation failed\n");
8118                 exit(-1);
8119         }
8120
8121         INIT_LIST_HEAD(&rec->list);
8122         INIT_LIST_HEAD(&rec->dextents);
8123         rec->bg_rec = NULL;
8124
8125         rec->cache.start = key->offset;
8126         rec->cache.size = btrfs_chunk_length(leaf, ptr);
8127
8128         rec->generation = btrfs_header_generation(leaf);
8129
8130         rec->objectid = key->objectid;
8131         rec->type = key->type;
8132         rec->offset = key->offset;
8133
8134         rec->length = rec->cache.size;
8135         rec->owner = btrfs_chunk_owner(leaf, ptr);
8136         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
8137         rec->type_flags = btrfs_chunk_type(leaf, ptr);
8138         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
8139         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
8140         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
8141         rec->num_stripes = num_stripes;
8142         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
8143
8144         for (i = 0; i < rec->num_stripes; ++i) {
8145                 rec->stripes[i].devid =
8146                         btrfs_stripe_devid_nr(leaf, ptr, i);
8147                 rec->stripes[i].offset =
8148                         btrfs_stripe_offset_nr(leaf, ptr, i);
8149                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
8150                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
8151                                 BTRFS_UUID_SIZE);
8152         }
8153
8154         return rec;
8155 }
8156
8157 static int process_chunk_item(struct cache_tree *chunk_cache,
8158                               struct btrfs_key *key, struct extent_buffer *eb,
8159                               int slot)
8160 {
8161         struct chunk_record *rec;
8162         struct btrfs_chunk *chunk;
8163         int ret = 0;
8164
8165         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
8166         /*
8167          * Do extra check for this chunk item,
8168          *
8169          * It's still possible one can craft a leaf with CHUNK_ITEM, with
8170          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
8171          * and owner<->key_type check.
8172          */
8173         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
8174                                       key->offset);
8175         if (ret < 0) {
8176                 error("chunk(%llu, %llu) is not valid, ignore it",
8177                       key->offset, btrfs_chunk_length(eb, chunk));
8178                 return 0;
8179         }
8180         rec = btrfs_new_chunk_record(eb, key, slot);
8181         ret = insert_cache_extent(chunk_cache, &rec->cache);
8182         if (ret) {
8183                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
8184                         rec->offset, rec->length);
8185                 free(rec);
8186         }
8187
8188         return ret;
8189 }
8190
8191 static int process_device_item(struct rb_root *dev_cache,
8192                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
8193 {
8194         struct btrfs_dev_item *ptr;
8195         struct device_record *rec;
8196         int ret = 0;
8197
8198         ptr = btrfs_item_ptr(eb,
8199                 slot, struct btrfs_dev_item);
8200
8201         rec = malloc(sizeof(*rec));
8202         if (!rec) {
8203                 fprintf(stderr, "memory allocation failed\n");
8204                 return -ENOMEM;
8205         }
8206
8207         rec->devid = key->offset;
8208         rec->generation = btrfs_header_generation(eb);
8209
8210         rec->objectid = key->objectid;
8211         rec->type = key->type;
8212         rec->offset = key->offset;
8213
8214         rec->devid = btrfs_device_id(eb, ptr);
8215         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
8216         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
8217
8218         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
8219         if (ret) {
8220                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
8221                 free(rec);
8222         }
8223
8224         return ret;
8225 }
8226
8227 struct block_group_record *
8228 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
8229                              int slot)
8230 {
8231         struct btrfs_block_group_item *ptr;
8232         struct block_group_record *rec;
8233
8234         rec = calloc(1, sizeof(*rec));
8235         if (!rec) {
8236                 fprintf(stderr, "memory allocation failed\n");
8237                 exit(-1);
8238         }
8239
8240         rec->cache.start = key->objectid;
8241         rec->cache.size = key->offset;
8242
8243         rec->generation = btrfs_header_generation(leaf);
8244
8245         rec->objectid = key->objectid;
8246         rec->type = key->type;
8247         rec->offset = key->offset;
8248
8249         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
8250         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
8251
8252         INIT_LIST_HEAD(&rec->list);
8253
8254         return rec;
8255 }
8256
8257 static int process_block_group_item(struct block_group_tree *block_group_cache,
8258                                     struct btrfs_key *key,
8259                                     struct extent_buffer *eb, int slot)
8260 {
8261         struct block_group_record *rec;
8262         int ret = 0;
8263
8264         rec = btrfs_new_block_group_record(eb, key, slot);
8265         ret = insert_block_group_record(block_group_cache, rec);
8266         if (ret) {
8267                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
8268                         rec->objectid, rec->offset);
8269                 free(rec);
8270         }
8271
8272         return ret;
8273 }
8274
8275 struct device_extent_record *
8276 btrfs_new_device_extent_record(struct extent_buffer *leaf,
8277                                struct btrfs_key *key, int slot)
8278 {
8279         struct device_extent_record *rec;
8280         struct btrfs_dev_extent *ptr;
8281
8282         rec = calloc(1, sizeof(*rec));
8283         if (!rec) {
8284                 fprintf(stderr, "memory allocation failed\n");
8285                 exit(-1);
8286         }
8287
8288         rec->cache.objectid = key->objectid;
8289         rec->cache.start = key->offset;
8290
8291         rec->generation = btrfs_header_generation(leaf);
8292
8293         rec->objectid = key->objectid;
8294         rec->type = key->type;
8295         rec->offset = key->offset;
8296
8297         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
8298         rec->chunk_objecteid =
8299                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
8300         rec->chunk_offset =
8301                 btrfs_dev_extent_chunk_offset(leaf, ptr);
8302         rec->length = btrfs_dev_extent_length(leaf, ptr);
8303         rec->cache.size = rec->length;
8304
8305         INIT_LIST_HEAD(&rec->chunk_list);
8306         INIT_LIST_HEAD(&rec->device_list);
8307
8308         return rec;
8309 }
8310
8311 static int
8312 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
8313                            struct btrfs_key *key, struct extent_buffer *eb,
8314                            int slot)
8315 {
8316         struct device_extent_record *rec;
8317         int ret;
8318
8319         rec = btrfs_new_device_extent_record(eb, key, slot);
8320         ret = insert_device_extent_record(dev_extent_cache, rec);
8321         if (ret) {
8322                 fprintf(stderr,
8323                         "Device extent[%llu, %llu, %llu] existed.\n",
8324                         rec->objectid, rec->offset, rec->length);
8325                 free(rec);
8326         }
8327
8328         return ret;
8329 }
8330
8331 static int process_extent_item(struct btrfs_root *root,
8332                                struct cache_tree *extent_cache,
8333                                struct extent_buffer *eb, int slot)
8334 {
8335         struct btrfs_extent_item *ei;
8336         struct btrfs_extent_inline_ref *iref;
8337         struct btrfs_extent_data_ref *dref;
8338         struct btrfs_shared_data_ref *sref;
8339         struct btrfs_key key;
8340         struct extent_record tmpl;
8341         unsigned long end;
8342         unsigned long ptr;
8343         int ret;
8344         int type;
8345         u32 item_size = btrfs_item_size_nr(eb, slot);
8346         u64 refs = 0;
8347         u64 offset;
8348         u64 num_bytes;
8349         int metadata = 0;
8350
8351         btrfs_item_key_to_cpu(eb, &key, slot);
8352
8353         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8354                 metadata = 1;
8355                 num_bytes = root->fs_info->nodesize;
8356         } else {
8357                 num_bytes = key.offset;
8358         }
8359
8360         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8361                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8362                       key.objectid, root->fs_info->sectorsize);
8363                 return -EIO;
8364         }
8365         if (item_size < sizeof(*ei)) {
8366 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8367                 struct btrfs_extent_item_v0 *ei0;
8368                 BUG_ON(item_size != sizeof(*ei0));
8369                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8370                 refs = btrfs_extent_refs_v0(eb, ei0);
8371 #else
8372                 BUG();
8373 #endif
8374                 memset(&tmpl, 0, sizeof(tmpl));
8375                 tmpl.start = key.objectid;
8376                 tmpl.nr = num_bytes;
8377                 tmpl.extent_item_refs = refs;
8378                 tmpl.metadata = metadata;
8379                 tmpl.found_rec = 1;
8380                 tmpl.max_size = num_bytes;
8381
8382                 return add_extent_rec(extent_cache, &tmpl);
8383         }
8384
8385         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8386         refs = btrfs_extent_refs(eb, ei);
8387         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8388                 metadata = 1;
8389         else
8390                 metadata = 0;
8391         if (metadata && num_bytes != root->fs_info->nodesize) {
8392                 error("ignore invalid metadata extent, length %llu does not equal to %u",
8393                       num_bytes, root->fs_info->nodesize);
8394                 return -EIO;
8395         }
8396         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8397                 error("ignore invalid data extent, length %llu is not aligned to %u",
8398                       num_bytes, root->fs_info->sectorsize);
8399                 return -EIO;
8400         }
8401
8402         memset(&tmpl, 0, sizeof(tmpl));
8403         tmpl.start = key.objectid;
8404         tmpl.nr = num_bytes;
8405         tmpl.extent_item_refs = refs;
8406         tmpl.metadata = metadata;
8407         tmpl.found_rec = 1;
8408         tmpl.max_size = num_bytes;
8409         add_extent_rec(extent_cache, &tmpl);
8410
8411         ptr = (unsigned long)(ei + 1);
8412         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8413             key.type == BTRFS_EXTENT_ITEM_KEY)
8414                 ptr += sizeof(struct btrfs_tree_block_info);
8415
8416         end = (unsigned long)ei + item_size;
8417         while (ptr < end) {
8418                 iref = (struct btrfs_extent_inline_ref *)ptr;
8419                 type = btrfs_extent_inline_ref_type(eb, iref);
8420                 offset = btrfs_extent_inline_ref_offset(eb, iref);
8421                 switch (type) {
8422                 case BTRFS_TREE_BLOCK_REF_KEY:
8423                         ret = add_tree_backref(extent_cache, key.objectid,
8424                                         0, offset, 0);
8425                         if (ret < 0)
8426                                 error(
8427                         "add_tree_backref failed (extent items tree block): %s",
8428                                       strerror(-ret));
8429                         break;
8430                 case BTRFS_SHARED_BLOCK_REF_KEY:
8431                         ret = add_tree_backref(extent_cache, key.objectid,
8432                                         offset, 0, 0);
8433                         if (ret < 0)
8434                                 error(
8435                         "add_tree_backref failed (extent items shared block): %s",
8436                                       strerror(-ret));
8437                         break;
8438                 case BTRFS_EXTENT_DATA_REF_KEY:
8439                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8440                         add_data_backref(extent_cache, key.objectid, 0,
8441                                         btrfs_extent_data_ref_root(eb, dref),
8442                                         btrfs_extent_data_ref_objectid(eb,
8443                                                                        dref),
8444                                         btrfs_extent_data_ref_offset(eb, dref),
8445                                         btrfs_extent_data_ref_count(eb, dref),
8446                                         0, num_bytes);
8447                         break;
8448                 case BTRFS_SHARED_DATA_REF_KEY:
8449                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
8450                         add_data_backref(extent_cache, key.objectid, offset,
8451                                         0, 0, 0,
8452                                         btrfs_shared_data_ref_count(eb, sref),
8453                                         0, num_bytes);
8454                         break;
8455                 default:
8456                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8457                                 key.objectid, key.type, num_bytes);
8458                         goto out;
8459                 }
8460                 ptr += btrfs_extent_inline_ref_size(type);
8461         }
8462         WARN_ON(ptr > end);
8463 out:
8464         return 0;
8465 }
8466
8467 static int check_cache_range(struct btrfs_root *root,
8468                              struct btrfs_block_group_cache *cache,
8469                              u64 offset, u64 bytes)
8470 {
8471         struct btrfs_free_space *entry;
8472         u64 *logical;
8473         u64 bytenr;
8474         int stripe_len;
8475         int i, nr, ret;
8476
8477         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8478                 bytenr = btrfs_sb_offset(i);
8479                 ret = btrfs_rmap_block(root->fs_info,
8480                                        cache->key.objectid, bytenr, 0,
8481                                        &logical, &nr, &stripe_len);
8482                 if (ret)
8483                         return ret;
8484
8485                 while (nr--) {
8486                         if (logical[nr] + stripe_len <= offset)
8487                                 continue;
8488                         if (offset + bytes <= logical[nr])
8489                                 continue;
8490                         if (logical[nr] == offset) {
8491                                 if (stripe_len >= bytes) {
8492                                         free(logical);
8493                                         return 0;
8494                                 }
8495                                 bytes -= stripe_len;
8496                                 offset += stripe_len;
8497                         } else if (logical[nr] < offset) {
8498                                 if (logical[nr] + stripe_len >=
8499                                     offset + bytes) {
8500                                         free(logical);
8501                                         return 0;
8502                                 }
8503                                 bytes = (offset + bytes) -
8504                                         (logical[nr] + stripe_len);
8505                                 offset = logical[nr] + stripe_len;
8506                         } else {
8507                                 /*
8508                                  * Could be tricky, the super may land in the
8509                                  * middle of the area we're checking.  First
8510                                  * check the easiest case, it's at the end.
8511                                  */
8512                                 if (logical[nr] + stripe_len >=
8513                                     bytes + offset) {
8514                                         bytes = logical[nr] - offset;
8515                                         continue;
8516                                 }
8517
8518                                 /* Check the left side */
8519                                 ret = check_cache_range(root, cache,
8520                                                         offset,
8521                                                         logical[nr] - offset);
8522                                 if (ret) {
8523                                         free(logical);
8524                                         return ret;
8525                                 }
8526
8527                                 /* Now we continue with the right side */
8528                                 bytes = (offset + bytes) -
8529                                         (logical[nr] + stripe_len);
8530                                 offset = logical[nr] + stripe_len;
8531                         }
8532                 }
8533
8534                 free(logical);
8535         }
8536
8537         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8538         if (!entry) {
8539                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8540                         offset, offset+bytes);
8541                 return -EINVAL;
8542         }
8543
8544         if (entry->offset != offset) {
8545                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8546                         entry->offset);
8547                 return -EINVAL;
8548         }
8549
8550         if (entry->bytes != bytes) {
8551                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8552                         bytes, entry->bytes, offset);
8553                 return -EINVAL;
8554         }
8555
8556         unlink_free_space(cache->free_space_ctl, entry);
8557         free(entry);
8558         return 0;
8559 }
8560
8561 static int verify_space_cache(struct btrfs_root *root,
8562                               struct btrfs_block_group_cache *cache)
8563 {
8564         struct btrfs_path path;
8565         struct extent_buffer *leaf;
8566         struct btrfs_key key;
8567         u64 last;
8568         int ret = 0;
8569
8570         root = root->fs_info->extent_root;
8571
8572         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8573
8574         btrfs_init_path(&path);
8575         key.objectid = last;
8576         key.offset = 0;
8577         key.type = BTRFS_EXTENT_ITEM_KEY;
8578         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8579         if (ret < 0)
8580                 goto out;
8581         ret = 0;
8582         while (1) {
8583                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8584                         ret = btrfs_next_leaf(root, &path);
8585                         if (ret < 0)
8586                                 goto out;
8587                         if (ret > 0) {
8588                                 ret = 0;
8589                                 break;
8590                         }
8591                 }
8592                 leaf = path.nodes[0];
8593                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8594                 if (key.objectid >= cache->key.offset + cache->key.objectid)
8595                         break;
8596                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8597                     key.type != BTRFS_METADATA_ITEM_KEY) {
8598                         path.slots[0]++;
8599                         continue;
8600                 }
8601
8602                 if (last == key.objectid) {
8603                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
8604                                 last = key.objectid + key.offset;
8605                         else
8606                                 last = key.objectid + root->fs_info->nodesize;
8607                         path.slots[0]++;
8608                         continue;
8609                 }
8610
8611                 ret = check_cache_range(root, cache, last,
8612                                         key.objectid - last);
8613                 if (ret)
8614                         break;
8615                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8616                         last = key.objectid + key.offset;
8617                 else
8618                         last = key.objectid + root->fs_info->nodesize;
8619                 path.slots[0]++;
8620         }
8621
8622         if (last < cache->key.objectid + cache->key.offset)
8623                 ret = check_cache_range(root, cache, last,
8624                                         cache->key.objectid +
8625                                         cache->key.offset - last);
8626
8627 out:
8628         btrfs_release_path(&path);
8629
8630         if (!ret &&
8631             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8632                 fprintf(stderr, "There are still entries left in the space "
8633                         "cache\n");
8634                 ret = -EINVAL;
8635         }
8636
8637         return ret;
8638 }
8639
8640 static int check_space_cache(struct btrfs_root *root)
8641 {
8642         struct btrfs_block_group_cache *cache;
8643         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8644         int ret;
8645         int error = 0;
8646
8647         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8648             btrfs_super_generation(root->fs_info->super_copy) !=
8649             btrfs_super_cache_generation(root->fs_info->super_copy)) {
8650                 printf("cache and super generation don't match, space cache "
8651                        "will be invalidated\n");
8652                 return 0;
8653         }
8654
8655         if (ctx.progress_enabled) {
8656                 ctx.tp = TASK_FREE_SPACE;
8657                 task_start(ctx.info);
8658         }
8659
8660         while (1) {
8661                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8662                 if (!cache)
8663                         break;
8664
8665                 start = cache->key.objectid + cache->key.offset;
8666                 if (!cache->free_space_ctl) {
8667                         if (btrfs_init_free_space_ctl(cache,
8668                                                 root->fs_info->sectorsize)) {
8669                                 ret = -ENOMEM;
8670                                 break;
8671                         }
8672                 } else {
8673                         btrfs_remove_free_space_cache(cache);
8674                 }
8675
8676                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8677                         ret = exclude_super_stripes(root, cache);
8678                         if (ret) {
8679                                 fprintf(stderr, "could not exclude super stripes: %s\n",
8680                                         strerror(-ret));
8681                                 error++;
8682                                 continue;
8683                         }
8684                         ret = load_free_space_tree(root->fs_info, cache);
8685                         free_excluded_extents(root, cache);
8686                         if (ret < 0) {
8687                                 fprintf(stderr, "could not load free space tree: %s\n",
8688                                         strerror(-ret));
8689                                 error++;
8690                                 continue;
8691                         }
8692                         error += ret;
8693                 } else {
8694                         ret = load_free_space_cache(root->fs_info, cache);
8695                         if (!ret)
8696                                 continue;
8697                 }
8698
8699                 ret = verify_space_cache(root, cache);
8700                 if (ret) {
8701                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8702                                 cache->key.objectid);
8703                         error++;
8704                 }
8705         }
8706
8707         task_stop(ctx.info);
8708
8709         return error ? -EINVAL : 0;
8710 }
8711
8712 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8713                         u64 num_bytes, unsigned long leaf_offset,
8714                         struct extent_buffer *eb) {
8715
8716         struct btrfs_fs_info *fs_info = root->fs_info;
8717         u64 offset = 0;
8718         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8719         char *data;
8720         unsigned long csum_offset;
8721         u32 csum;
8722         u32 csum_expected;
8723         u64 read_len;
8724         u64 data_checked = 0;
8725         u64 tmp;
8726         int ret = 0;
8727         int mirror;
8728         int num_copies;
8729
8730         if (num_bytes % fs_info->sectorsize)
8731                 return -EINVAL;
8732
8733         data = malloc(num_bytes);
8734         if (!data)
8735                 return -ENOMEM;
8736
8737         while (offset < num_bytes) {
8738                 mirror = 0;
8739 again:
8740                 read_len = num_bytes - offset;
8741                 /* read as much space once a time */
8742                 ret = read_extent_data(fs_info, data + offset,
8743                                 bytenr + offset, &read_len, mirror);
8744                 if (ret)
8745                         goto out;
8746                 data_checked = 0;
8747                 /* verify every 4k data's checksum */
8748                 while (data_checked < read_len) {
8749                         csum = ~(u32)0;
8750                         tmp = offset + data_checked;
8751
8752                         csum = btrfs_csum_data((char *)data + tmp,
8753                                                csum, fs_info->sectorsize);
8754                         btrfs_csum_final(csum, (u8 *)&csum);
8755
8756                         csum_offset = leaf_offset +
8757                                  tmp / fs_info->sectorsize * csum_size;
8758                         read_extent_buffer(eb, (char *)&csum_expected,
8759                                            csum_offset, csum_size);
8760                         /* try another mirror */
8761                         if (csum != csum_expected) {
8762                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8763                                                 mirror, bytenr + tmp,
8764                                                 csum, csum_expected);
8765                                 num_copies = btrfs_num_copies(root->fs_info,
8766                                                 bytenr, num_bytes);
8767                                 if (mirror < num_copies - 1) {
8768                                         mirror += 1;
8769                                         goto again;
8770                                 }
8771                         }
8772                         data_checked += fs_info->sectorsize;
8773                 }
8774                 offset += read_len;
8775         }
8776 out:
8777         free(data);
8778         return ret;
8779 }
8780
8781 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8782                                u64 num_bytes)
8783 {
8784         struct btrfs_path path;
8785         struct extent_buffer *leaf;
8786         struct btrfs_key key;
8787         int ret;
8788
8789         btrfs_init_path(&path);
8790         key.objectid = bytenr;
8791         key.type = BTRFS_EXTENT_ITEM_KEY;
8792         key.offset = (u64)-1;
8793
8794 again:
8795         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8796                                 0, 0);
8797         if (ret < 0) {
8798                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8799                 btrfs_release_path(&path);
8800                 return ret;
8801         } else if (ret) {
8802                 if (path.slots[0] > 0) {
8803                         path.slots[0]--;
8804                 } else {
8805                         ret = btrfs_prev_leaf(root, &path);
8806                         if (ret < 0) {
8807                                 goto out;
8808                         } else if (ret > 0) {
8809                                 ret = 0;
8810                                 goto out;
8811                         }
8812                 }
8813         }
8814
8815         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8816
8817         /*
8818          * Block group items come before extent items if they have the same
8819          * bytenr, so walk back one more just in case.  Dear future traveller,
8820          * first congrats on mastering time travel.  Now if it's not too much
8821          * trouble could you go back to 2006 and tell Chris to make the
8822          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8823          * EXTENT_ITEM_KEY please?
8824          */
8825         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8826                 if (path.slots[0] > 0) {
8827                         path.slots[0]--;
8828                 } else {
8829                         ret = btrfs_prev_leaf(root, &path);
8830                         if (ret < 0) {
8831                                 goto out;
8832                         } else if (ret > 0) {
8833                                 ret = 0;
8834                                 goto out;
8835                         }
8836                 }
8837                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8838         }
8839
8840         while (num_bytes) {
8841                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8842                         ret = btrfs_next_leaf(root, &path);
8843                         if (ret < 0) {
8844                                 fprintf(stderr, "Error going to next leaf "
8845                                         "%d\n", ret);
8846                                 btrfs_release_path(&path);
8847                                 return ret;
8848                         } else if (ret) {
8849                                 break;
8850                         }
8851                 }
8852                 leaf = path.nodes[0];
8853                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8854                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8855                         path.slots[0]++;
8856                         continue;
8857                 }
8858                 if (key.objectid + key.offset < bytenr) {
8859                         path.slots[0]++;
8860                         continue;
8861                 }
8862                 if (key.objectid > bytenr + num_bytes)
8863                         break;
8864
8865                 if (key.objectid == bytenr) {
8866                         if (key.offset >= num_bytes) {
8867                                 num_bytes = 0;
8868                                 break;
8869                         }
8870                         num_bytes -= key.offset;
8871                         bytenr += key.offset;
8872                 } else if (key.objectid < bytenr) {
8873                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8874                                 num_bytes = 0;
8875                                 break;
8876                         }
8877                         num_bytes = (bytenr + num_bytes) -
8878                                 (key.objectid + key.offset);
8879                         bytenr = key.objectid + key.offset;
8880                 } else {
8881                         if (key.objectid + key.offset < bytenr + num_bytes) {
8882                                 u64 new_start = key.objectid + key.offset;
8883                                 u64 new_bytes = bytenr + num_bytes - new_start;
8884
8885                                 /*
8886                                  * Weird case, the extent is in the middle of
8887                                  * our range, we'll have to search one side
8888                                  * and then the other.  Not sure if this happens
8889                                  * in real life, but no harm in coding it up
8890                                  * anyway just in case.
8891                                  */
8892                                 btrfs_release_path(&path);
8893                                 ret = check_extent_exists(root, new_start,
8894                                                           new_bytes);
8895                                 if (ret) {
8896                                         fprintf(stderr, "Right section didn't "
8897                                                 "have a record\n");
8898                                         break;
8899                                 }
8900                                 num_bytes = key.objectid - bytenr;
8901                                 goto again;
8902                         }
8903                         num_bytes = key.objectid - bytenr;
8904                 }
8905                 path.slots[0]++;
8906         }
8907         ret = 0;
8908
8909 out:
8910         if (num_bytes && !ret) {
8911                 fprintf(stderr, "There are no extents for csum range "
8912                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8913                 ret = 1;
8914         }
8915
8916         btrfs_release_path(&path);
8917         return ret;
8918 }
8919
8920 static int check_csums(struct btrfs_root *root)
8921 {
8922         struct btrfs_path path;
8923         struct extent_buffer *leaf;
8924         struct btrfs_key key;
8925         u64 offset = 0, num_bytes = 0;
8926         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8927         int errors = 0;
8928         int ret;
8929         u64 data_len;
8930         unsigned long leaf_offset;
8931
8932         root = root->fs_info->csum_root;
8933         if (!extent_buffer_uptodate(root->node)) {
8934                 fprintf(stderr, "No valid csum tree found\n");
8935                 return -ENOENT;
8936         }
8937
8938         btrfs_init_path(&path);
8939         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8940         key.type = BTRFS_EXTENT_CSUM_KEY;
8941         key.offset = 0;
8942         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8943         if (ret < 0) {
8944                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8945                 btrfs_release_path(&path);
8946                 return ret;
8947         }
8948
8949         if (ret > 0 && path.slots[0])
8950                 path.slots[0]--;
8951         ret = 0;
8952
8953         while (1) {
8954                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8955                         ret = btrfs_next_leaf(root, &path);
8956                         if (ret < 0) {
8957                                 fprintf(stderr, "Error going to next leaf "
8958                                         "%d\n", ret);
8959                                 break;
8960                         }
8961                         if (ret)
8962                                 break;
8963                 }
8964                 leaf = path.nodes[0];
8965
8966                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8967                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8968                         path.slots[0]++;
8969                         continue;
8970                 }
8971
8972                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8973                               csum_size) * root->fs_info->sectorsize;
8974                 if (!check_data_csum)
8975                         goto skip_csum_check;
8976                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8977                 ret = check_extent_csums(root, key.offset, data_len,
8978                                          leaf_offset, leaf);
8979                 if (ret)
8980                         break;
8981 skip_csum_check:
8982                 if (!num_bytes) {
8983                         offset = key.offset;
8984                 } else if (key.offset != offset + num_bytes) {
8985                         ret = check_extent_exists(root, offset, num_bytes);
8986                         if (ret) {
8987                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8988                                         "there is no extent record\n",
8989                                         offset, offset+num_bytes);
8990                                 errors++;
8991                         }
8992                         offset = key.offset;
8993                         num_bytes = 0;
8994                 }
8995                 num_bytes += data_len;
8996                 path.slots[0]++;
8997         }
8998
8999         btrfs_release_path(&path);
9000         return errors;
9001 }
9002
9003 static int is_dropped_key(struct btrfs_key *key,
9004                           struct btrfs_key *drop_key) {
9005         if (key->objectid < drop_key->objectid)
9006                 return 1;
9007         else if (key->objectid == drop_key->objectid) {
9008                 if (key->type < drop_key->type)
9009                         return 1;
9010                 else if (key->type == drop_key->type) {
9011                         if (key->offset < drop_key->offset)
9012                                 return 1;
9013                 }
9014         }
9015         return 0;
9016 }
9017
9018 /*
9019  * Here are the rules for FULL_BACKREF.
9020  *
9021  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
9022  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
9023  *      FULL_BACKREF set.
9024  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
9025  *    if it happened after the relocation occurred since we'll have dropped the
9026  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
9027  *    have no real way to know for sure.
9028  *
9029  * We process the blocks one root at a time, and we start from the lowest root
9030  * objectid and go to the highest.  So we can just lookup the owner backref for
9031  * the record and if we don't find it then we know it doesn't exist and we have
9032  * a FULL BACKREF.
9033  *
9034  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
9035  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
9036  * be set or not and then we can check later once we've gathered all the refs.
9037  */
9038 static int calc_extent_flag(struct cache_tree *extent_cache,
9039                            struct extent_buffer *buf,
9040                            struct root_item_record *ri,
9041                            u64 *flags)
9042 {
9043         struct extent_record *rec;
9044         struct cache_extent *cache;
9045         struct tree_backref *tback;
9046         u64 owner = 0;
9047
9048         cache = lookup_cache_extent(extent_cache, buf->start, 1);
9049         /* we have added this extent before */
9050         if (!cache)
9051                 return -ENOENT;
9052
9053         rec = container_of(cache, struct extent_record, cache);
9054
9055         /*
9056          * Except file/reloc tree, we can not have
9057          * FULL BACKREF MODE
9058          */
9059         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
9060                 goto normal;
9061         /*
9062          * root node
9063          */
9064         if (buf->start == ri->bytenr)
9065                 goto normal;
9066
9067         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
9068                 goto full_backref;
9069
9070         owner = btrfs_header_owner(buf);
9071         if (owner == ri->objectid)
9072                 goto normal;
9073
9074         tback = find_tree_backref(rec, 0, owner);
9075         if (!tback)
9076                 goto full_backref;
9077 normal:
9078         *flags = 0;
9079         if (rec->flag_block_full_backref != FLAG_UNSET &&
9080             rec->flag_block_full_backref != 0)
9081                 rec->bad_full_backref = 1;
9082         return 0;
9083 full_backref:
9084         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9085         if (rec->flag_block_full_backref != FLAG_UNSET &&
9086             rec->flag_block_full_backref != 1)
9087                 rec->bad_full_backref = 1;
9088         return 0;
9089 }
9090
9091 static void report_mismatch_key_root(u8 key_type, u64 rootid)
9092 {
9093         fprintf(stderr, "Invalid key type(");
9094         print_key_type(stderr, 0, key_type);
9095         fprintf(stderr, ") found in root(");
9096         print_objectid(stderr, rootid, 0);
9097         fprintf(stderr, ")\n");
9098 }
9099
9100 /*
9101  * Check if the key is valid with its extent buffer.
9102  *
9103  * This is a early check in case invalid key exists in a extent buffer
9104  * This is not comprehensive yet, but should prevent wrong key/item passed
9105  * further
9106  */
9107 static int check_type_with_root(u64 rootid, u8 key_type)
9108 {
9109         switch (key_type) {
9110         /* Only valid in chunk tree */
9111         case BTRFS_DEV_ITEM_KEY:
9112         case BTRFS_CHUNK_ITEM_KEY:
9113                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
9114                         goto err;
9115                 break;
9116         /* valid in csum and log tree */
9117         case BTRFS_CSUM_TREE_OBJECTID:
9118                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
9119                       is_fstree(rootid)))
9120                         goto err;
9121                 break;
9122         case BTRFS_EXTENT_ITEM_KEY:
9123         case BTRFS_METADATA_ITEM_KEY:
9124         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9125                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
9126                         goto err;
9127                 break;
9128         case BTRFS_ROOT_ITEM_KEY:
9129                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
9130                         goto err;
9131                 break;
9132         case BTRFS_DEV_EXTENT_KEY:
9133                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
9134                         goto err;
9135                 break;
9136         }
9137         return 0;
9138 err:
9139         report_mismatch_key_root(key_type, rootid);
9140         return -EINVAL;
9141 }
9142
9143 static int run_next_block(struct btrfs_root *root,
9144                           struct block_info *bits,
9145                           int bits_nr,
9146                           u64 *last,
9147                           struct cache_tree *pending,
9148                           struct cache_tree *seen,
9149                           struct cache_tree *reada,
9150                           struct cache_tree *nodes,
9151                           struct cache_tree *extent_cache,
9152                           struct cache_tree *chunk_cache,
9153                           struct rb_root *dev_cache,
9154                           struct block_group_tree *block_group_cache,
9155                           struct device_extent_tree *dev_extent_cache,
9156                           struct root_item_record *ri)
9157 {
9158         struct btrfs_fs_info *fs_info = root->fs_info;
9159         struct extent_buffer *buf;
9160         struct extent_record *rec = NULL;
9161         u64 bytenr;
9162         u32 size;
9163         u64 parent;
9164         u64 owner;
9165         u64 flags;
9166         u64 ptr;
9167         u64 gen = 0;
9168         int ret = 0;
9169         int i;
9170         int nritems;
9171         struct btrfs_key key;
9172         struct cache_extent *cache;
9173         int reada_bits;
9174
9175         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
9176                                     bits_nr, &reada_bits);
9177         if (nritems == 0)
9178                 return 1;
9179
9180         if (!reada_bits) {
9181                 for(i = 0; i < nritems; i++) {
9182                         ret = add_cache_extent(reada, bits[i].start,
9183                                                bits[i].size);
9184                         if (ret == -EEXIST)
9185                                 continue;
9186
9187                         /* fixme, get the parent transid */
9188                         readahead_tree_block(fs_info, bits[i].start, 0);
9189                 }
9190         }
9191         *last = bits[0].start;
9192         bytenr = bits[0].start;
9193         size = bits[0].size;
9194
9195         cache = lookup_cache_extent(pending, bytenr, size);
9196         if (cache) {
9197                 remove_cache_extent(pending, cache);
9198                 free(cache);
9199         }
9200         cache = lookup_cache_extent(reada, bytenr, size);
9201         if (cache) {
9202                 remove_cache_extent(reada, cache);
9203                 free(cache);
9204         }
9205         cache = lookup_cache_extent(nodes, bytenr, size);
9206         if (cache) {
9207                 remove_cache_extent(nodes, cache);
9208                 free(cache);
9209         }
9210         cache = lookup_cache_extent(extent_cache, bytenr, size);
9211         if (cache) {
9212                 rec = container_of(cache, struct extent_record, cache);
9213                 gen = rec->parent_generation;
9214         }
9215
9216         /* fixme, get the real parent transid */
9217         buf = read_tree_block(root->fs_info, bytenr, gen);
9218         if (!extent_buffer_uptodate(buf)) {
9219                 record_bad_block_io(root->fs_info,
9220                                     extent_cache, bytenr, size);
9221                 goto out;
9222         }
9223
9224         nritems = btrfs_header_nritems(buf);
9225
9226         flags = 0;
9227         if (!init_extent_tree) {
9228                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
9229                                        btrfs_header_level(buf), 1, NULL,
9230                                        &flags);
9231                 if (ret < 0) {
9232                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9233                         if (ret < 0) {
9234                                 fprintf(stderr, "Couldn't calc extent flags\n");
9235                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9236                         }
9237                 }
9238         } else {
9239                 flags = 0;
9240                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9241                 if (ret < 0) {
9242                         fprintf(stderr, "Couldn't calc extent flags\n");
9243                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9244                 }
9245         }
9246
9247         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9248                 if (ri != NULL &&
9249                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
9250                     ri->objectid == btrfs_header_owner(buf)) {
9251                         /*
9252                          * Ok we got to this block from it's original owner and
9253                          * we have FULL_BACKREF set.  Relocation can leave
9254                          * converted blocks over so this is altogether possible,
9255                          * however it's not possible if the generation > the
9256                          * last snapshot, so check for this case.
9257                          */
9258                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
9259                             btrfs_header_generation(buf) > ri->last_snapshot) {
9260                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9261                                 rec->bad_full_backref = 1;
9262                         }
9263                 }
9264         } else {
9265                 if (ri != NULL &&
9266                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
9267                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
9268                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9269                         rec->bad_full_backref = 1;
9270                 }
9271         }
9272
9273         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9274                 rec->flag_block_full_backref = 1;
9275                 parent = bytenr;
9276                 owner = 0;
9277         } else {
9278                 rec->flag_block_full_backref = 0;
9279                 parent = 0;
9280                 owner = btrfs_header_owner(buf);
9281         }
9282
9283         ret = check_block(root, extent_cache, buf, flags);
9284         if (ret)
9285                 goto out;
9286
9287         if (btrfs_is_leaf(buf)) {
9288                 btree_space_waste += btrfs_leaf_free_space(root, buf);
9289                 for (i = 0; i < nritems; i++) {
9290                         struct btrfs_file_extent_item *fi;
9291                         btrfs_item_key_to_cpu(buf, &key, i);
9292                         /*
9293                          * Check key type against the leaf owner.
9294                          * Could filter quite a lot of early error if
9295                          * owner is correct
9296                          */
9297                         if (check_type_with_root(btrfs_header_owner(buf),
9298                                                  key.type)) {
9299                                 fprintf(stderr, "ignoring invalid key\n");
9300                                 continue;
9301                         }
9302                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
9303                                 process_extent_item(root, extent_cache, buf,
9304                                                     i);
9305                                 continue;
9306                         }
9307                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9308                                 process_extent_item(root, extent_cache, buf,
9309                                                     i);
9310                                 continue;
9311                         }
9312                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
9313                                 total_csum_bytes +=
9314                                         btrfs_item_size_nr(buf, i);
9315                                 continue;
9316                         }
9317                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
9318                                 process_chunk_item(chunk_cache, &key, buf, i);
9319                                 continue;
9320                         }
9321                         if (key.type == BTRFS_DEV_ITEM_KEY) {
9322                                 process_device_item(dev_cache, &key, buf, i);
9323                                 continue;
9324                         }
9325                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9326                                 process_block_group_item(block_group_cache,
9327                                         &key, buf, i);
9328                                 continue;
9329                         }
9330                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
9331                                 process_device_extent_item(dev_extent_cache,
9332                                         &key, buf, i);
9333                                 continue;
9334
9335                         }
9336                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9337 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9338                                 process_extent_ref_v0(extent_cache, buf, i);
9339 #else
9340                                 BUG();
9341 #endif
9342                                 continue;
9343                         }
9344
9345                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9346                                 ret = add_tree_backref(extent_cache,
9347                                                 key.objectid, 0, key.offset, 0);
9348                                 if (ret < 0)
9349                                         error(
9350                                 "add_tree_backref failed (leaf tree block): %s",
9351                                               strerror(-ret));
9352                                 continue;
9353                         }
9354                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9355                                 ret = add_tree_backref(extent_cache,
9356                                                 key.objectid, key.offset, 0, 0);
9357                                 if (ret < 0)
9358                                         error(
9359                                 "add_tree_backref failed (leaf shared block): %s",
9360                                               strerror(-ret));
9361                                 continue;
9362                         }
9363                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9364                                 struct btrfs_extent_data_ref *ref;
9365                                 ref = btrfs_item_ptr(buf, i,
9366                                                 struct btrfs_extent_data_ref);
9367                                 add_data_backref(extent_cache,
9368                                         key.objectid, 0,
9369                                         btrfs_extent_data_ref_root(buf, ref),
9370                                         btrfs_extent_data_ref_objectid(buf,
9371                                                                        ref),
9372                                         btrfs_extent_data_ref_offset(buf, ref),
9373                                         btrfs_extent_data_ref_count(buf, ref),
9374                                         0, root->fs_info->sectorsize);
9375                                 continue;
9376                         }
9377                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9378                                 struct btrfs_shared_data_ref *ref;
9379                                 ref = btrfs_item_ptr(buf, i,
9380                                                 struct btrfs_shared_data_ref);
9381                                 add_data_backref(extent_cache,
9382                                         key.objectid, key.offset, 0, 0, 0,
9383                                         btrfs_shared_data_ref_count(buf, ref),
9384                                         0, root->fs_info->sectorsize);
9385                                 continue;
9386                         }
9387                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9388                                 struct bad_item *bad;
9389
9390                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9391                                         continue;
9392                                 if (!owner)
9393                                         continue;
9394                                 bad = malloc(sizeof(struct bad_item));
9395                                 if (!bad)
9396                                         continue;
9397                                 INIT_LIST_HEAD(&bad->list);
9398                                 memcpy(&bad->key, &key,
9399                                        sizeof(struct btrfs_key));
9400                                 bad->root_id = owner;
9401                                 list_add_tail(&bad->list, &delete_items);
9402                                 continue;
9403                         }
9404                         if (key.type != BTRFS_EXTENT_DATA_KEY)
9405                                 continue;
9406                         fi = btrfs_item_ptr(buf, i,
9407                                             struct btrfs_file_extent_item);
9408                         if (btrfs_file_extent_type(buf, fi) ==
9409                             BTRFS_FILE_EXTENT_INLINE)
9410                                 continue;
9411                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9412                                 continue;
9413
9414                         data_bytes_allocated +=
9415                                 btrfs_file_extent_disk_num_bytes(buf, fi);
9416                         if (data_bytes_allocated < root->fs_info->sectorsize) {
9417                                 abort();
9418                         }
9419                         data_bytes_referenced +=
9420                                 btrfs_file_extent_num_bytes(buf, fi);
9421                         add_data_backref(extent_cache,
9422                                 btrfs_file_extent_disk_bytenr(buf, fi),
9423                                 parent, owner, key.objectid, key.offset -
9424                                 btrfs_file_extent_offset(buf, fi), 1, 1,
9425                                 btrfs_file_extent_disk_num_bytes(buf, fi));
9426                 }
9427         } else {
9428                 int level;
9429                 struct btrfs_key first_key;
9430
9431                 first_key.objectid = 0;
9432
9433                 if (nritems > 0)
9434                         btrfs_item_key_to_cpu(buf, &first_key, 0);
9435                 level = btrfs_header_level(buf);
9436                 for (i = 0; i < nritems; i++) {
9437                         struct extent_record tmpl;
9438
9439                         ptr = btrfs_node_blockptr(buf, i);
9440                         size = root->fs_info->nodesize;
9441                         btrfs_node_key_to_cpu(buf, &key, i);
9442                         if (ri != NULL) {
9443                                 if ((level == ri->drop_level)
9444                                     && is_dropped_key(&key, &ri->drop_key)) {
9445                                         continue;
9446                                 }
9447                         }
9448
9449                         memset(&tmpl, 0, sizeof(tmpl));
9450                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9451                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9452                         tmpl.start = ptr;
9453                         tmpl.nr = size;
9454                         tmpl.refs = 1;
9455                         tmpl.metadata = 1;
9456                         tmpl.max_size = size;
9457                         ret = add_extent_rec(extent_cache, &tmpl);
9458                         if (ret < 0)
9459                                 goto out;
9460
9461                         ret = add_tree_backref(extent_cache, ptr, parent,
9462                                         owner, 1);
9463                         if (ret < 0) {
9464                                 error(
9465                                 "add_tree_backref failed (non-leaf block): %s",
9466                                       strerror(-ret));
9467                                 continue;
9468                         }
9469
9470                         if (level > 1) {
9471                                 add_pending(nodes, seen, ptr, size);
9472                         } else {
9473                                 add_pending(pending, seen, ptr, size);
9474                         }
9475                 }
9476                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
9477                                       nritems) * sizeof(struct btrfs_key_ptr);
9478         }
9479         total_btree_bytes += buf->len;
9480         if (fs_root_objectid(btrfs_header_owner(buf)))
9481                 total_fs_tree_bytes += buf->len;
9482         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9483                 total_extent_tree_bytes += buf->len;
9484 out:
9485         free_extent_buffer(buf);
9486         return ret;
9487 }
9488
9489 static int add_root_to_pending(struct extent_buffer *buf,
9490                                struct cache_tree *extent_cache,
9491                                struct cache_tree *pending,
9492                                struct cache_tree *seen,
9493                                struct cache_tree *nodes,
9494                                u64 objectid)
9495 {
9496         struct extent_record tmpl;
9497         int ret;
9498
9499         if (btrfs_header_level(buf) > 0)
9500                 add_pending(nodes, seen, buf->start, buf->len);
9501         else
9502                 add_pending(pending, seen, buf->start, buf->len);
9503
9504         memset(&tmpl, 0, sizeof(tmpl));
9505         tmpl.start = buf->start;
9506         tmpl.nr = buf->len;
9507         tmpl.is_root = 1;
9508         tmpl.refs = 1;
9509         tmpl.metadata = 1;
9510         tmpl.max_size = buf->len;
9511         add_extent_rec(extent_cache, &tmpl);
9512
9513         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9514             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9515                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9516                                 0, 1);
9517         else
9518                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9519                                 1);
9520         return ret;
9521 }
9522
9523 /* as we fix the tree, we might be deleting blocks that
9524  * we're tracking for repair.  This hook makes sure we
9525  * remove any backrefs for blocks as we are fixing them.
9526  */
9527 static int free_extent_hook(struct btrfs_trans_handle *trans,
9528                             struct btrfs_root *root,
9529                             u64 bytenr, u64 num_bytes, u64 parent,
9530                             u64 root_objectid, u64 owner, u64 offset,
9531                             int refs_to_drop)
9532 {
9533         struct extent_record *rec;
9534         struct cache_extent *cache;
9535         int is_data;
9536         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9537
9538         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9539         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9540         if (!cache)
9541                 return 0;
9542
9543         rec = container_of(cache, struct extent_record, cache);
9544         if (is_data) {
9545                 struct data_backref *back;
9546                 back = find_data_backref(rec, parent, root_objectid, owner,
9547                                          offset, 1, bytenr, num_bytes);
9548                 if (!back)
9549                         goto out;
9550                 if (back->node.found_ref) {
9551                         back->found_ref -= refs_to_drop;
9552                         if (rec->refs)
9553                                 rec->refs -= refs_to_drop;
9554                 }
9555                 if (back->node.found_extent_tree) {
9556                         back->num_refs -= refs_to_drop;
9557                         if (rec->extent_item_refs)
9558                                 rec->extent_item_refs -= refs_to_drop;
9559                 }
9560                 if (back->found_ref == 0)
9561                         back->node.found_ref = 0;
9562                 if (back->num_refs == 0)
9563                         back->node.found_extent_tree = 0;
9564
9565                 if (!back->node.found_extent_tree && back->node.found_ref) {
9566                         rb_erase(&back->node.node, &rec->backref_tree);
9567                         free(back);
9568                 }
9569         } else {
9570                 struct tree_backref *back;
9571                 back = find_tree_backref(rec, parent, root_objectid);
9572                 if (!back)
9573                         goto out;
9574                 if (back->node.found_ref) {
9575                         if (rec->refs)
9576                                 rec->refs--;
9577                         back->node.found_ref = 0;
9578                 }
9579                 if (back->node.found_extent_tree) {
9580                         if (rec->extent_item_refs)
9581                                 rec->extent_item_refs--;
9582                         back->node.found_extent_tree = 0;
9583                 }
9584                 if (!back->node.found_extent_tree && back->node.found_ref) {
9585                         rb_erase(&back->node.node, &rec->backref_tree);
9586                         free(back);
9587                 }
9588         }
9589         maybe_free_extent_rec(extent_cache, rec);
9590 out:
9591         return 0;
9592 }
9593
9594 static int delete_extent_records(struct btrfs_trans_handle *trans,
9595                                  struct btrfs_root *root,
9596                                  struct btrfs_path *path,
9597                                  u64 bytenr)
9598 {
9599         struct btrfs_key key;
9600         struct btrfs_key found_key;
9601         struct extent_buffer *leaf;
9602         int ret;
9603         int slot;
9604
9605
9606         key.objectid = bytenr;
9607         key.type = (u8)-1;
9608         key.offset = (u64)-1;
9609
9610         while(1) {
9611                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9612                                         &key, path, 0, 1);
9613                 if (ret < 0)
9614                         break;
9615
9616                 if (ret > 0) {
9617                         ret = 0;
9618                         if (path->slots[0] == 0)
9619                                 break;
9620                         path->slots[0]--;
9621                 }
9622                 ret = 0;
9623
9624                 leaf = path->nodes[0];
9625                 slot = path->slots[0];
9626
9627                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9628                 if (found_key.objectid != bytenr)
9629                         break;
9630
9631                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9632                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
9633                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9634                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9635                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9636                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9637                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9638                         btrfs_release_path(path);
9639                         if (found_key.type == 0) {
9640                                 if (found_key.offset == 0)
9641                                         break;
9642                                 key.offset = found_key.offset - 1;
9643                                 key.type = found_key.type;
9644                         }
9645                         key.type = found_key.type - 1;
9646                         key.offset = (u64)-1;
9647                         continue;
9648                 }
9649
9650                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9651                         found_key.objectid, found_key.type, found_key.offset);
9652
9653                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9654                 if (ret)
9655                         break;
9656                 btrfs_release_path(path);
9657
9658                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9659                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
9660                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9661                                 found_key.offset : root->fs_info->nodesize;
9662
9663                         ret = btrfs_update_block_group(trans, root, bytenr,
9664                                                        bytes, 0, 0);
9665                         if (ret)
9666                                 break;
9667                 }
9668         }
9669
9670         btrfs_release_path(path);
9671         return ret;
9672 }
9673
9674 /*
9675  * for a single backref, this will allocate a new extent
9676  * and add the backref to it.
9677  */
9678 static int record_extent(struct btrfs_trans_handle *trans,
9679                          struct btrfs_fs_info *info,
9680                          struct btrfs_path *path,
9681                          struct extent_record *rec,
9682                          struct extent_backref *back,
9683                          int allocated, u64 flags)
9684 {
9685         int ret = 0;
9686         struct btrfs_root *extent_root = info->extent_root;
9687         struct extent_buffer *leaf;
9688         struct btrfs_key ins_key;
9689         struct btrfs_extent_item *ei;
9690         struct data_backref *dback;
9691         struct btrfs_tree_block_info *bi;
9692
9693         if (!back->is_data)
9694                 rec->max_size = max_t(u64, rec->max_size,
9695                                     info->nodesize);
9696
9697         if (!allocated) {
9698                 u32 item_size = sizeof(*ei);
9699
9700                 if (!back->is_data)
9701                         item_size += sizeof(*bi);
9702
9703                 ins_key.objectid = rec->start;
9704                 ins_key.offset = rec->max_size;
9705                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9706
9707                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9708                                         &ins_key, item_size);
9709                 if (ret)
9710                         goto fail;
9711
9712                 leaf = path->nodes[0];
9713                 ei = btrfs_item_ptr(leaf, path->slots[0],
9714                                     struct btrfs_extent_item);
9715
9716                 btrfs_set_extent_refs(leaf, ei, 0);
9717                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9718
9719                 if (back->is_data) {
9720                         btrfs_set_extent_flags(leaf, ei,
9721                                                BTRFS_EXTENT_FLAG_DATA);
9722                 } else {
9723                         struct btrfs_disk_key copy_key;;
9724
9725                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9726                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9727                                              sizeof(*bi));
9728
9729                         btrfs_set_disk_key_objectid(&copy_key,
9730                                                     rec->info_objectid);
9731                         btrfs_set_disk_key_type(&copy_key, 0);
9732                         btrfs_set_disk_key_offset(&copy_key, 0);
9733
9734                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9735                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9736
9737                         btrfs_set_extent_flags(leaf, ei,
9738                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9739                 }
9740
9741                 btrfs_mark_buffer_dirty(leaf);
9742                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9743                                                rec->max_size, 1, 0);
9744                 if (ret)
9745                         goto fail;
9746                 btrfs_release_path(path);
9747         }
9748
9749         if (back->is_data) {
9750                 u64 parent;
9751                 int i;
9752
9753                 dback = to_data_backref(back);
9754                 if (back->full_backref)
9755                         parent = dback->parent;
9756                 else
9757                         parent = 0;
9758
9759                 for (i = 0; i < dback->found_ref; i++) {
9760                         /* if parent != 0, we're doing a full backref
9761                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9762                          * just makes the backref allocator create a data
9763                          * backref
9764                          */
9765                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9766                                                    rec->start, rec->max_size,
9767                                                    parent,
9768                                                    dback->root,
9769                                                    parent ?
9770                                                    BTRFS_FIRST_FREE_OBJECTID :
9771                                                    dback->owner,
9772                                                    dback->offset);
9773                         if (ret)
9774                                 break;
9775                 }
9776                 fprintf(stderr, "adding new data backref"
9777                                 " on %llu %s %llu owner %llu"
9778                                 " offset %llu found %d\n",
9779                                 (unsigned long long)rec->start,
9780                                 back->full_backref ?
9781                                 "parent" : "root",
9782                                 back->full_backref ?
9783                                 (unsigned long long)parent :
9784                                 (unsigned long long)dback->root,
9785                                 (unsigned long long)dback->owner,
9786                                 (unsigned long long)dback->offset,
9787                                 dback->found_ref);
9788         } else {
9789                 u64 parent;
9790                 struct tree_backref *tback;
9791
9792                 tback = to_tree_backref(back);
9793                 if (back->full_backref)
9794                         parent = tback->parent;
9795                 else
9796                         parent = 0;
9797
9798                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9799                                            rec->start, rec->max_size,
9800                                            parent, tback->root, 0, 0);
9801                 fprintf(stderr, "adding new tree backref on "
9802                         "start %llu len %llu parent %llu root %llu\n",
9803                         rec->start, rec->max_size, parent, tback->root);
9804         }
9805 fail:
9806         btrfs_release_path(path);
9807         return ret;
9808 }
9809
9810 static struct extent_entry *find_entry(struct list_head *entries,
9811                                        u64 bytenr, u64 bytes)
9812 {
9813         struct extent_entry *entry = NULL;
9814
9815         list_for_each_entry(entry, entries, list) {
9816                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9817                         return entry;
9818         }
9819
9820         return NULL;
9821 }
9822
9823 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9824 {
9825         struct extent_entry *entry, *best = NULL, *prev = NULL;
9826
9827         list_for_each_entry(entry, entries, list) {
9828                 /*
9829                  * If there are as many broken entries as entries then we know
9830                  * not to trust this particular entry.
9831                  */
9832                 if (entry->broken == entry->count)
9833                         continue;
9834
9835                 /*
9836                  * Special case, when there are only two entries and 'best' is
9837                  * the first one
9838                  */
9839                 if (!prev) {
9840                         best = entry;
9841                         prev = entry;
9842                         continue;
9843                 }
9844
9845                 /*
9846                  * If our current entry == best then we can't be sure our best
9847                  * is really the best, so we need to keep searching.
9848                  */
9849                 if (best && best->count == entry->count) {
9850                         prev = entry;
9851                         best = NULL;
9852                         continue;
9853                 }
9854
9855                 /* Prev == entry, not good enough, have to keep searching */
9856                 if (!prev->broken && prev->count == entry->count)
9857                         continue;
9858
9859                 if (!best)
9860                         best = (prev->count > entry->count) ? prev : entry;
9861                 else if (best->count < entry->count)
9862                         best = entry;
9863                 prev = entry;
9864         }
9865
9866         return best;
9867 }
9868
9869 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9870                       struct data_backref *dback, struct extent_entry *entry)
9871 {
9872         struct btrfs_trans_handle *trans;
9873         struct btrfs_root *root;
9874         struct btrfs_file_extent_item *fi;
9875         struct extent_buffer *leaf;
9876         struct btrfs_key key;
9877         u64 bytenr, bytes;
9878         int ret, err;
9879
9880         key.objectid = dback->root;
9881         key.type = BTRFS_ROOT_ITEM_KEY;
9882         key.offset = (u64)-1;
9883         root = btrfs_read_fs_root(info, &key);
9884         if (IS_ERR(root)) {
9885                 fprintf(stderr, "Couldn't find root for our ref\n");
9886                 return -EINVAL;
9887         }
9888
9889         /*
9890          * The backref points to the original offset of the extent if it was
9891          * split, so we need to search down to the offset we have and then walk
9892          * forward until we find the backref we're looking for.
9893          */
9894         key.objectid = dback->owner;
9895         key.type = BTRFS_EXTENT_DATA_KEY;
9896         key.offset = dback->offset;
9897         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9898         if (ret < 0) {
9899                 fprintf(stderr, "Error looking up ref %d\n", ret);
9900                 return ret;
9901         }
9902
9903         while (1) {
9904                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9905                         ret = btrfs_next_leaf(root, path);
9906                         if (ret) {
9907                                 fprintf(stderr, "Couldn't find our ref, next\n");
9908                                 return -EINVAL;
9909                         }
9910                 }
9911                 leaf = path->nodes[0];
9912                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9913                 if (key.objectid != dback->owner ||
9914                     key.type != BTRFS_EXTENT_DATA_KEY) {
9915                         fprintf(stderr, "Couldn't find our ref, search\n");
9916                         return -EINVAL;
9917                 }
9918                 fi = btrfs_item_ptr(leaf, path->slots[0],
9919                                     struct btrfs_file_extent_item);
9920                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9921                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9922
9923                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9924                         break;
9925                 path->slots[0]++;
9926         }
9927
9928         btrfs_release_path(path);
9929
9930         trans = btrfs_start_transaction(root, 1);
9931         if (IS_ERR(trans))
9932                 return PTR_ERR(trans);
9933
9934         /*
9935          * Ok we have the key of the file extent we want to fix, now we can cow
9936          * down to the thing and fix it.
9937          */
9938         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9939         if (ret < 0) {
9940                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9941                         key.objectid, key.type, key.offset, ret);
9942                 goto out;
9943         }
9944         if (ret > 0) {
9945                 fprintf(stderr, "Well that's odd, we just found this key "
9946                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9947                         key.offset);
9948                 ret = -EINVAL;
9949                 goto out;
9950         }
9951         leaf = path->nodes[0];
9952         fi = btrfs_item_ptr(leaf, path->slots[0],
9953                             struct btrfs_file_extent_item);
9954
9955         if (btrfs_file_extent_compression(leaf, fi) &&
9956             dback->disk_bytenr != entry->bytenr) {
9957                 fprintf(stderr, "Ref doesn't match the record start and is "
9958                         "compressed, please take a btrfs-image of this file "
9959                         "system and send it to a btrfs developer so they can "
9960                         "complete this functionality for bytenr %Lu\n",
9961                         dback->disk_bytenr);
9962                 ret = -EINVAL;
9963                 goto out;
9964         }
9965
9966         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9967                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9968         } else if (dback->disk_bytenr > entry->bytenr) {
9969                 u64 off_diff, offset;
9970
9971                 off_diff = dback->disk_bytenr - entry->bytenr;
9972                 offset = btrfs_file_extent_offset(leaf, fi);
9973                 if (dback->disk_bytenr + offset +
9974                     btrfs_file_extent_num_bytes(leaf, fi) >
9975                     entry->bytenr + entry->bytes) {
9976                         fprintf(stderr, "Ref is past the entry end, please "
9977                                 "take a btrfs-image of this file system and "
9978                                 "send it to a btrfs developer, ref %Lu\n",
9979                                 dback->disk_bytenr);
9980                         ret = -EINVAL;
9981                         goto out;
9982                 }
9983                 offset += off_diff;
9984                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9985                 btrfs_set_file_extent_offset(leaf, fi, offset);
9986         } else if (dback->disk_bytenr < entry->bytenr) {
9987                 u64 offset;
9988
9989                 offset = btrfs_file_extent_offset(leaf, fi);
9990                 if (dback->disk_bytenr + offset < entry->bytenr) {
9991                         fprintf(stderr, "Ref is before the entry start, please"
9992                                 " take a btrfs-image of this file system and "
9993                                 "send it to a btrfs developer, ref %Lu\n",
9994                                 dback->disk_bytenr);
9995                         ret = -EINVAL;
9996                         goto out;
9997                 }
9998
9999                 offset += dback->disk_bytenr;
10000                 offset -= entry->bytenr;
10001                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
10002                 btrfs_set_file_extent_offset(leaf, fi, offset);
10003         }
10004
10005         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
10006
10007         /*
10008          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
10009          * only do this if we aren't using compression, otherwise it's a
10010          * trickier case.
10011          */
10012         if (!btrfs_file_extent_compression(leaf, fi))
10013                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
10014         else
10015                 printf("ram bytes may be wrong?\n");
10016         btrfs_mark_buffer_dirty(leaf);
10017 out:
10018         err = btrfs_commit_transaction(trans, root);
10019         btrfs_release_path(path);
10020         return ret ? ret : err;
10021 }
10022
10023 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
10024                            struct extent_record *rec)
10025 {
10026         struct extent_backref *back, *tmp;
10027         struct data_backref *dback;
10028         struct extent_entry *entry, *best = NULL;
10029         LIST_HEAD(entries);
10030         int nr_entries = 0;
10031         int broken_entries = 0;
10032         int ret = 0;
10033         short mismatch = 0;
10034
10035         /*
10036          * Metadata is easy and the backrefs should always agree on bytenr and
10037          * size, if not we've got bigger issues.
10038          */
10039         if (rec->metadata)
10040                 return 0;
10041
10042         rbtree_postorder_for_each_entry_safe(back, tmp,
10043                                              &rec->backref_tree, node) {
10044                 if (back->full_backref || !back->is_data)
10045                         continue;
10046
10047                 dback = to_data_backref(back);
10048
10049                 /*
10050                  * We only pay attention to backrefs that we found a real
10051                  * backref for.
10052                  */
10053                 if (dback->found_ref == 0)
10054                         continue;
10055
10056                 /*
10057                  * For now we only catch when the bytes don't match, not the
10058                  * bytenr.  We can easily do this at the same time, but I want
10059                  * to have a fs image to test on before we just add repair
10060                  * functionality willy-nilly so we know we won't screw up the
10061                  * repair.
10062                  */
10063
10064                 entry = find_entry(&entries, dback->disk_bytenr,
10065                                    dback->bytes);
10066                 if (!entry) {
10067                         entry = malloc(sizeof(struct extent_entry));
10068                         if (!entry) {
10069                                 ret = -ENOMEM;
10070                                 goto out;
10071                         }
10072                         memset(entry, 0, sizeof(*entry));
10073                         entry->bytenr = dback->disk_bytenr;
10074                         entry->bytes = dback->bytes;
10075                         list_add_tail(&entry->list, &entries);
10076                         nr_entries++;
10077                 }
10078
10079                 /*
10080                  * If we only have on entry we may think the entries agree when
10081                  * in reality they don't so we have to do some extra checking.
10082                  */
10083                 if (dback->disk_bytenr != rec->start ||
10084                     dback->bytes != rec->nr || back->broken)
10085                         mismatch = 1;
10086
10087                 if (back->broken) {
10088                         entry->broken++;
10089                         broken_entries++;
10090                 }
10091
10092                 entry->count++;
10093         }
10094
10095         /* Yay all the backrefs agree, carry on good sir */
10096         if (nr_entries <= 1 && !mismatch)
10097                 goto out;
10098
10099         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
10100                 "%Lu\n", rec->start);
10101
10102         /*
10103          * First we want to see if the backrefs can agree amongst themselves who
10104          * is right, so figure out which one of the entries has the highest
10105          * count.
10106          */
10107         best = find_most_right_entry(&entries);
10108
10109         /*
10110          * Ok so we may have an even split between what the backrefs think, so
10111          * this is where we use the extent ref to see what it thinks.
10112          */
10113         if (!best) {
10114                 entry = find_entry(&entries, rec->start, rec->nr);
10115                 if (!entry && (!broken_entries || !rec->found_rec)) {
10116                         fprintf(stderr, "Backrefs don't agree with each other "
10117                                 "and extent record doesn't agree with anybody,"
10118                                 " so we can't fix bytenr %Lu bytes %Lu\n",
10119                                 rec->start, rec->nr);
10120                         ret = -EINVAL;
10121                         goto out;
10122                 } else if (!entry) {
10123                         /*
10124                          * Ok our backrefs were broken, we'll assume this is the
10125                          * correct value and add an entry for this range.
10126                          */
10127                         entry = malloc(sizeof(struct extent_entry));
10128                         if (!entry) {
10129                                 ret = -ENOMEM;
10130                                 goto out;
10131                         }
10132                         memset(entry, 0, sizeof(*entry));
10133                         entry->bytenr = rec->start;
10134                         entry->bytes = rec->nr;
10135                         list_add_tail(&entry->list, &entries);
10136                         nr_entries++;
10137                 }
10138                 entry->count++;
10139                 best = find_most_right_entry(&entries);
10140                 if (!best) {
10141                         fprintf(stderr, "Backrefs and extent record evenly "
10142                                 "split on who is right, this is going to "
10143                                 "require user input to fix bytenr %Lu bytes "
10144                                 "%Lu\n", rec->start, rec->nr);
10145                         ret = -EINVAL;
10146                         goto out;
10147                 }
10148         }
10149
10150         /*
10151          * I don't think this can happen currently as we'll abort() if we catch
10152          * this case higher up, but in case somebody removes that we still can't
10153          * deal with it properly here yet, so just bail out of that's the case.
10154          */
10155         if (best->bytenr != rec->start) {
10156                 fprintf(stderr, "Extent start and backref starts don't match, "
10157                         "please use btrfs-image on this file system and send "
10158                         "it to a btrfs developer so they can make fsck fix "
10159                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
10160                         rec->start, rec->nr);
10161                 ret = -EINVAL;
10162                 goto out;
10163         }
10164
10165         /*
10166          * Ok great we all agreed on an extent record, let's go find the real
10167          * references and fix up the ones that don't match.
10168          */
10169         rbtree_postorder_for_each_entry_safe(back, tmp,
10170                                              &rec->backref_tree, node) {
10171                 if (back->full_backref || !back->is_data)
10172                         continue;
10173
10174                 dback = to_data_backref(back);
10175
10176                 /*
10177                  * Still ignoring backrefs that don't have a real ref attached
10178                  * to them.
10179                  */
10180                 if (dback->found_ref == 0)
10181                         continue;
10182
10183                 if (dback->bytes == best->bytes &&
10184                     dback->disk_bytenr == best->bytenr)
10185                         continue;
10186
10187                 ret = repair_ref(info, path, dback, best);
10188                 if (ret)
10189                         goto out;
10190         }
10191
10192         /*
10193          * Ok we messed with the actual refs, which means we need to drop our
10194          * entire cache and go back and rescan.  I know this is a huge pain and
10195          * adds a lot of extra work, but it's the only way to be safe.  Once all
10196          * the backrefs agree we may not need to do anything to the extent
10197          * record itself.
10198          */
10199         ret = -EAGAIN;
10200 out:
10201         while (!list_empty(&entries)) {
10202                 entry = list_entry(entries.next, struct extent_entry, list);
10203                 list_del_init(&entry->list);
10204                 free(entry);
10205         }
10206         return ret;
10207 }
10208
10209 static int process_duplicates(struct cache_tree *extent_cache,
10210                               struct extent_record *rec)
10211 {
10212         struct extent_record *good, *tmp;
10213         struct cache_extent *cache;
10214         int ret;
10215
10216         /*
10217          * If we found a extent record for this extent then return, or if we
10218          * have more than one duplicate we are likely going to need to delete
10219          * something.
10220          */
10221         if (rec->found_rec || rec->num_duplicates > 1)
10222                 return 0;
10223
10224         /* Shouldn't happen but just in case */
10225         BUG_ON(!rec->num_duplicates);
10226
10227         /*
10228          * So this happens if we end up with a backref that doesn't match the
10229          * actual extent entry.  So either the backref is bad or the extent
10230          * entry is bad.  Either way we want to have the extent_record actually
10231          * reflect what we found in the extent_tree, so we need to take the
10232          * duplicate out and use that as the extent_record since the only way we
10233          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
10234          */
10235         remove_cache_extent(extent_cache, &rec->cache);
10236
10237         good = to_extent_record(rec->dups.next);
10238         list_del_init(&good->list);
10239         INIT_LIST_HEAD(&good->backrefs);
10240         INIT_LIST_HEAD(&good->dups);
10241         good->cache.start = good->start;
10242         good->cache.size = good->nr;
10243         good->content_checked = 0;
10244         good->owner_ref_checked = 0;
10245         good->num_duplicates = 0;
10246         good->refs = rec->refs;
10247         list_splice_init(&rec->backrefs, &good->backrefs);
10248         while (1) {
10249                 cache = lookup_cache_extent(extent_cache, good->start,
10250                                             good->nr);
10251                 if (!cache)
10252                         break;
10253                 tmp = container_of(cache, struct extent_record, cache);
10254
10255                 /*
10256                  * If we find another overlapping extent and it's found_rec is
10257                  * set then it's a duplicate and we need to try and delete
10258                  * something.
10259                  */
10260                 if (tmp->found_rec || tmp->num_duplicates > 0) {
10261                         if (list_empty(&good->list))
10262                                 list_add_tail(&good->list,
10263                                               &duplicate_extents);
10264                         good->num_duplicates += tmp->num_duplicates + 1;
10265                         list_splice_init(&tmp->dups, &good->dups);
10266                         list_del_init(&tmp->list);
10267                         list_add_tail(&tmp->list, &good->dups);
10268                         remove_cache_extent(extent_cache, &tmp->cache);
10269                         continue;
10270                 }
10271
10272                 /*
10273                  * Ok we have another non extent item backed extent rec, so lets
10274                  * just add it to this extent and carry on like we did above.
10275                  */
10276                 good->refs += tmp->refs;
10277                 list_splice_init(&tmp->backrefs, &good->backrefs);
10278                 remove_cache_extent(extent_cache, &tmp->cache);
10279                 free(tmp);
10280         }
10281         ret = insert_cache_extent(extent_cache, &good->cache);
10282         BUG_ON(ret);
10283         free(rec);
10284         return good->num_duplicates ? 0 : 1;
10285 }
10286
10287 static int delete_duplicate_records(struct btrfs_root *root,
10288                                     struct extent_record *rec)
10289 {
10290         struct btrfs_trans_handle *trans;
10291         LIST_HEAD(delete_list);
10292         struct btrfs_path path;
10293         struct extent_record *tmp, *good, *n;
10294         int nr_del = 0;
10295         int ret = 0, err;
10296         struct btrfs_key key;
10297
10298         btrfs_init_path(&path);
10299
10300         good = rec;
10301         /* Find the record that covers all of the duplicates. */
10302         list_for_each_entry(tmp, &rec->dups, list) {
10303                 if (good->start < tmp->start)
10304                         continue;
10305                 if (good->nr > tmp->nr)
10306                         continue;
10307
10308                 if (tmp->start + tmp->nr < good->start + good->nr) {
10309                         fprintf(stderr, "Ok we have overlapping extents that "
10310                                 "aren't completely covered by each other, this "
10311                                 "is going to require more careful thought.  "
10312                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
10313                                 tmp->start, tmp->nr, good->start, good->nr);
10314                         abort();
10315                 }
10316                 good = tmp;
10317         }
10318
10319         if (good != rec)
10320                 list_add_tail(&rec->list, &delete_list);
10321
10322         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10323                 if (tmp == good)
10324                         continue;
10325                 list_move_tail(&tmp->list, &delete_list);
10326         }
10327
10328         root = root->fs_info->extent_root;
10329         trans = btrfs_start_transaction(root, 1);
10330         if (IS_ERR(trans)) {
10331                 ret = PTR_ERR(trans);
10332                 goto out;
10333         }
10334
10335         list_for_each_entry(tmp, &delete_list, list) {
10336                 if (tmp->found_rec == 0)
10337                         continue;
10338                 key.objectid = tmp->start;
10339                 key.type = BTRFS_EXTENT_ITEM_KEY;
10340                 key.offset = tmp->nr;
10341
10342                 /* Shouldn't happen but just in case */
10343                 if (tmp->metadata) {
10344                         fprintf(stderr, "Well this shouldn't happen, extent "
10345                                 "record overlaps but is metadata? "
10346                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10347                         abort();
10348                 }
10349
10350                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10351                 if (ret) {
10352                         if (ret > 0)
10353                                 ret = -EINVAL;
10354                         break;
10355                 }
10356                 ret = btrfs_del_item(trans, root, &path);
10357                 if (ret)
10358                         break;
10359                 btrfs_release_path(&path);
10360                 nr_del++;
10361         }
10362         err = btrfs_commit_transaction(trans, root);
10363         if (err && !ret)
10364                 ret = err;
10365 out:
10366         while (!list_empty(&delete_list)) {
10367                 tmp = to_extent_record(delete_list.next);
10368                 list_del_init(&tmp->list);
10369                 if (tmp == rec)
10370                         continue;
10371                 free(tmp);
10372         }
10373
10374         while (!list_empty(&rec->dups)) {
10375                 tmp = to_extent_record(rec->dups.next);
10376                 list_del_init(&tmp->list);
10377                 free(tmp);
10378         }
10379
10380         btrfs_release_path(&path);
10381
10382         if (!ret && !nr_del)
10383                 rec->num_duplicates = 0;
10384
10385         return ret ? ret : nr_del;
10386 }
10387
10388 static int find_possible_backrefs(struct btrfs_fs_info *info,
10389                                   struct btrfs_path *path,
10390                                   struct cache_tree *extent_cache,
10391                                   struct extent_record *rec)
10392 {
10393         struct btrfs_root *root;
10394         struct extent_backref *back, *tmp;
10395         struct data_backref *dback;
10396         struct cache_extent *cache;
10397         struct btrfs_file_extent_item *fi;
10398         struct btrfs_key key;
10399         u64 bytenr, bytes;
10400         int ret;
10401
10402         rbtree_postorder_for_each_entry_safe(back, tmp,
10403                                              &rec->backref_tree, node) {
10404                 /* Don't care about full backrefs (poor unloved backrefs) */
10405                 if (back->full_backref || !back->is_data)
10406                         continue;
10407
10408                 dback = to_data_backref(back);
10409
10410                 /* We found this one, we don't need to do a lookup */
10411                 if (dback->found_ref)
10412                         continue;
10413
10414                 key.objectid = dback->root;
10415                 key.type = BTRFS_ROOT_ITEM_KEY;
10416                 key.offset = (u64)-1;
10417
10418                 root = btrfs_read_fs_root(info, &key);
10419
10420                 /* No root, definitely a bad ref, skip */
10421                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10422                         continue;
10423                 /* Other err, exit */
10424                 if (IS_ERR(root))
10425                         return PTR_ERR(root);
10426
10427                 key.objectid = dback->owner;
10428                 key.type = BTRFS_EXTENT_DATA_KEY;
10429                 key.offset = dback->offset;
10430                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10431                 if (ret) {
10432                         btrfs_release_path(path);
10433                         if (ret < 0)
10434                                 return ret;
10435                         /* Didn't find it, we can carry on */
10436                         ret = 0;
10437                         continue;
10438                 }
10439
10440                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10441                                     struct btrfs_file_extent_item);
10442                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10443                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10444                 btrfs_release_path(path);
10445                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10446                 if (cache) {
10447                         struct extent_record *tmp;
10448                         tmp = container_of(cache, struct extent_record, cache);
10449
10450                         /*
10451                          * If we found an extent record for the bytenr for this
10452                          * particular backref then we can't add it to our
10453                          * current extent record.  We only want to add backrefs
10454                          * that don't have a corresponding extent item in the
10455                          * extent tree since they likely belong to this record
10456                          * and we need to fix it if it doesn't match bytenrs.
10457                          */
10458                         if  (tmp->found_rec)
10459                                 continue;
10460                 }
10461
10462                 dback->found_ref += 1;
10463                 dback->disk_bytenr = bytenr;
10464                 dback->bytes = bytes;
10465
10466                 /*
10467                  * Set this so the verify backref code knows not to trust the
10468                  * values in this backref.
10469                  */
10470                 back->broken = 1;
10471         }
10472
10473         return 0;
10474 }
10475
10476 /*
10477  * Record orphan data ref into corresponding root.
10478  *
10479  * Return 0 if the extent item contains data ref and recorded.
10480  * Return 1 if the extent item contains no useful data ref
10481  *   On that case, it may contains only shared_dataref or metadata backref
10482  *   or the file extent exists(this should be handled by the extent bytenr
10483  *   recovery routine)
10484  * Return <0 if something goes wrong.
10485  */
10486 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10487                                       struct extent_record *rec)
10488 {
10489         struct btrfs_key key;
10490         struct btrfs_root *dest_root;
10491         struct extent_backref *back, *tmp;
10492         struct data_backref *dback;
10493         struct orphan_data_extent *orphan;
10494         struct btrfs_path path;
10495         int recorded_data_ref = 0;
10496         int ret = 0;
10497
10498         if (rec->metadata)
10499                 return 1;
10500         btrfs_init_path(&path);
10501         rbtree_postorder_for_each_entry_safe(back, tmp,
10502                                              &rec->backref_tree, node) {
10503                 if (back->full_backref || !back->is_data ||
10504                     !back->found_extent_tree)
10505                         continue;
10506                 dback = to_data_backref(back);
10507                 if (dback->found_ref)
10508                         continue;
10509                 key.objectid = dback->root;
10510                 key.type = BTRFS_ROOT_ITEM_KEY;
10511                 key.offset = (u64)-1;
10512
10513                 dest_root = btrfs_read_fs_root(fs_info, &key);
10514
10515                 /* For non-exist root we just skip it */
10516                 if (IS_ERR(dest_root) || !dest_root)
10517                         continue;
10518
10519                 key.objectid = dback->owner;
10520                 key.type = BTRFS_EXTENT_DATA_KEY;
10521                 key.offset = dback->offset;
10522
10523                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10524                 btrfs_release_path(&path);
10525                 /*
10526                  * For ret < 0, it's OK since the fs-tree may be corrupted,
10527                  * we need to record it for inode/file extent rebuild.
10528                  * For ret > 0, we record it only for file extent rebuild.
10529                  * For ret == 0, the file extent exists but only bytenr
10530                  * mismatch, let the original bytenr fix routine to handle,
10531                  * don't record it.
10532                  */
10533                 if (ret == 0)
10534                         continue;
10535                 ret = 0;
10536                 orphan = malloc(sizeof(*orphan));
10537                 if (!orphan) {
10538                         ret = -ENOMEM;
10539                         goto out;
10540                 }
10541                 INIT_LIST_HEAD(&orphan->list);
10542                 orphan->root = dback->root;
10543                 orphan->objectid = dback->owner;
10544                 orphan->offset = dback->offset;
10545                 orphan->disk_bytenr = rec->cache.start;
10546                 orphan->disk_len = rec->cache.size;
10547                 list_add(&dest_root->orphan_data_extents, &orphan->list);
10548                 recorded_data_ref = 1;
10549         }
10550 out:
10551         btrfs_release_path(&path);
10552         if (!ret)
10553                 return !recorded_data_ref;
10554         else
10555                 return ret;
10556 }
10557
10558 /*
10559  * when an incorrect extent item is found, this will delete
10560  * all of the existing entries for it and recreate them
10561  * based on what the tree scan found.
10562  */
10563 static int fixup_extent_refs(struct btrfs_fs_info *info,
10564                              struct cache_tree *extent_cache,
10565                              struct extent_record *rec)
10566 {
10567         struct btrfs_trans_handle *trans = NULL;
10568         int ret;
10569         struct btrfs_path path;
10570         struct cache_extent *cache;
10571         struct extent_backref *back, *tmp;
10572         int allocated = 0;
10573         u64 flags = 0;
10574
10575         if (rec->flag_block_full_backref)
10576                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10577
10578         btrfs_init_path(&path);
10579         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10580                 /*
10581                  * Sometimes the backrefs themselves are so broken they don't
10582                  * get attached to any meaningful rec, so first go back and
10583                  * check any of our backrefs that we couldn't find and throw
10584                  * them into the list if we find the backref so that
10585                  * verify_backrefs can figure out what to do.
10586                  */
10587                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10588                 if (ret < 0)
10589                         goto out;
10590         }
10591
10592         /* step one, make sure all of the backrefs agree */
10593         ret = verify_backrefs(info, &path, rec);
10594         if (ret < 0)
10595                 goto out;
10596
10597         trans = btrfs_start_transaction(info->extent_root, 1);
10598         if (IS_ERR(trans)) {
10599                 ret = PTR_ERR(trans);
10600                 goto out;
10601         }
10602
10603         /* step two, delete all the existing records */
10604         ret = delete_extent_records(trans, info->extent_root, &path,
10605                                     rec->start);
10606
10607         if (ret < 0)
10608                 goto out;
10609
10610         /* was this block corrupt?  If so, don't add references to it */
10611         cache = lookup_cache_extent(info->corrupt_blocks,
10612                                     rec->start, rec->max_size);
10613         if (cache) {
10614                 ret = 0;
10615                 goto out;
10616         }
10617
10618         /* step three, recreate all the refs we did find */
10619         rbtree_postorder_for_each_entry_safe(back, tmp,
10620                                              &rec->backref_tree, node) {
10621                 /*
10622                  * if we didn't find any references, don't create a
10623                  * new extent record
10624                  */
10625                 if (!back->found_ref)
10626                         continue;
10627
10628                 rec->bad_full_backref = 0;
10629                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10630                 allocated = 1;
10631
10632                 if (ret)
10633                         goto out;
10634         }
10635 out:
10636         if (trans) {
10637                 int err = btrfs_commit_transaction(trans, info->extent_root);
10638                 if (!ret)
10639                         ret = err;
10640         }
10641
10642         if (!ret)
10643                 fprintf(stderr, "Repaired extent references for %llu\n",
10644                                 (unsigned long long)rec->start);
10645
10646         btrfs_release_path(&path);
10647         return ret;
10648 }
10649
10650 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10651                               struct extent_record *rec)
10652 {
10653         struct btrfs_trans_handle *trans;
10654         struct btrfs_root *root = fs_info->extent_root;
10655         struct btrfs_path path;
10656         struct btrfs_extent_item *ei;
10657         struct btrfs_key key;
10658         u64 flags;
10659         int ret = 0;
10660
10661         key.objectid = rec->start;
10662         if (rec->metadata) {
10663                 key.type = BTRFS_METADATA_ITEM_KEY;
10664                 key.offset = rec->info_level;
10665         } else {
10666                 key.type = BTRFS_EXTENT_ITEM_KEY;
10667                 key.offset = rec->max_size;
10668         }
10669
10670         trans = btrfs_start_transaction(root, 0);
10671         if (IS_ERR(trans))
10672                 return PTR_ERR(trans);
10673
10674         btrfs_init_path(&path);
10675         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10676         if (ret < 0) {
10677                 btrfs_release_path(&path);
10678                 btrfs_commit_transaction(trans, root);
10679                 return ret;
10680         } else if (ret) {
10681                 fprintf(stderr, "Didn't find extent for %llu\n",
10682                         (unsigned long long)rec->start);
10683                 btrfs_release_path(&path);
10684                 btrfs_commit_transaction(trans, root);
10685                 return -ENOENT;
10686         }
10687
10688         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10689                             struct btrfs_extent_item);
10690         flags = btrfs_extent_flags(path.nodes[0], ei);
10691         if (rec->flag_block_full_backref) {
10692                 fprintf(stderr, "setting full backref on %llu\n",
10693                         (unsigned long long)key.objectid);
10694                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10695         } else {
10696                 fprintf(stderr, "clearing full backref on %llu\n",
10697                         (unsigned long long)key.objectid);
10698                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10699         }
10700         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10701         btrfs_mark_buffer_dirty(path.nodes[0]);
10702         btrfs_release_path(&path);
10703         ret = btrfs_commit_transaction(trans, root);
10704         if (!ret)
10705                 fprintf(stderr, "Repaired extent flags for %llu\n",
10706                                 (unsigned long long)rec->start);
10707
10708         return ret;
10709 }
10710
10711 /* right now we only prune from the extent allocation tree */
10712 static int prune_one_block(struct btrfs_trans_handle *trans,
10713                            struct btrfs_fs_info *info,
10714                            struct btrfs_corrupt_block *corrupt)
10715 {
10716         int ret;
10717         struct btrfs_path path;
10718         struct extent_buffer *eb;
10719         u64 found;
10720         int slot;
10721         int nritems;
10722         int level = corrupt->level + 1;
10723
10724         btrfs_init_path(&path);
10725 again:
10726         /* we want to stop at the parent to our busted block */
10727         path.lowest_level = level;
10728
10729         ret = btrfs_search_slot(trans, info->extent_root,
10730                                 &corrupt->key, &path, -1, 1);
10731
10732         if (ret < 0)
10733                 goto out;
10734
10735         eb = path.nodes[level];
10736         if (!eb) {
10737                 ret = -ENOENT;
10738                 goto out;
10739         }
10740
10741         /*
10742          * hopefully the search gave us the block we want to prune,
10743          * lets try that first
10744          */
10745         slot = path.slots[level];
10746         found =  btrfs_node_blockptr(eb, slot);
10747         if (found == corrupt->cache.start)
10748                 goto del_ptr;
10749
10750         nritems = btrfs_header_nritems(eb);
10751
10752         /* the search failed, lets scan this node and hope we find it */
10753         for (slot = 0; slot < nritems; slot++) {
10754                 found =  btrfs_node_blockptr(eb, slot);
10755                 if (found == corrupt->cache.start)
10756                         goto del_ptr;
10757         }
10758         /*
10759          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10760          * to this block
10761          */
10762         if (eb == info->extent_root->node) {
10763                 ret = -ENOENT;
10764                 goto out;
10765         } else {
10766                 level++;
10767                 btrfs_release_path(&path);
10768                 goto again;
10769         }
10770
10771 del_ptr:
10772         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10773         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10774
10775 out:
10776         btrfs_release_path(&path);
10777         return ret;
10778 }
10779
10780 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10781 {
10782         struct btrfs_trans_handle *trans = NULL;
10783         struct cache_extent *cache;
10784         struct btrfs_corrupt_block *corrupt;
10785
10786         while (1) {
10787                 cache = search_cache_extent(info->corrupt_blocks, 0);
10788                 if (!cache)
10789                         break;
10790                 if (!trans) {
10791                         trans = btrfs_start_transaction(info->extent_root, 1);
10792                         if (IS_ERR(trans))
10793                                 return PTR_ERR(trans);
10794                 }
10795                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10796                 prune_one_block(trans, info, corrupt);
10797                 remove_cache_extent(info->corrupt_blocks, cache);
10798         }
10799         if (trans)
10800                 return btrfs_commit_transaction(trans, info->extent_root);
10801         return 0;
10802 }
10803
10804 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10805 {
10806         struct btrfs_block_group_cache *cache;
10807         u64 start, end;
10808         int ret;
10809
10810         while (1) {
10811                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10812                                             &start, &end, EXTENT_DIRTY);
10813                 if (ret)
10814                         break;
10815                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10816         }
10817
10818         start = 0;
10819         while (1) {
10820                 cache = btrfs_lookup_first_block_group(fs_info, start);
10821                 if (!cache)
10822                         break;
10823                 if (cache->cached)
10824                         cache->cached = 0;
10825                 start = cache->key.objectid + cache->key.offset;
10826         }
10827 }
10828
10829 static int check_extent_refs(struct btrfs_root *root,
10830                              struct cache_tree *extent_cache)
10831 {
10832         struct extent_record *rec;
10833         struct cache_extent *cache;
10834         int ret = 0;
10835         int had_dups = 0;
10836         int err = 0;
10837
10838         if (repair) {
10839                 /*
10840                  * if we're doing a repair, we have to make sure
10841                  * we don't allocate from the problem extents.
10842                  * In the worst case, this will be all the
10843                  * extents in the FS
10844                  */
10845                 cache = search_cache_extent(extent_cache, 0);
10846                 while(cache) {
10847                         rec = container_of(cache, struct extent_record, cache);
10848                         set_extent_dirty(root->fs_info->excluded_extents,
10849                                          rec->start,
10850                                          rec->start + rec->max_size - 1);
10851                         cache = next_cache_extent(cache);
10852                 }
10853
10854                 /* pin down all the corrupted blocks too */
10855                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10856                 while(cache) {
10857                         set_extent_dirty(root->fs_info->excluded_extents,
10858                                          cache->start,
10859                                          cache->start + cache->size - 1);
10860                         cache = next_cache_extent(cache);
10861                 }
10862                 prune_corrupt_blocks(root->fs_info);
10863                 reset_cached_block_groups(root->fs_info);
10864         }
10865
10866         reset_cached_block_groups(root->fs_info);
10867
10868         /*
10869          * We need to delete any duplicate entries we find first otherwise we
10870          * could mess up the extent tree when we have backrefs that actually
10871          * belong to a different extent item and not the weird duplicate one.
10872          */
10873         while (repair && !list_empty(&duplicate_extents)) {
10874                 rec = to_extent_record(duplicate_extents.next);
10875                 list_del_init(&rec->list);
10876
10877                 /* Sometimes we can find a backref before we find an actual
10878                  * extent, so we need to process it a little bit to see if there
10879                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10880                  * if this is a backref screwup.  If we need to delete stuff
10881                  * process_duplicates() will return 0, otherwise it will return
10882                  * 1 and we
10883                  */
10884                 if (process_duplicates(extent_cache, rec))
10885                         continue;
10886                 ret = delete_duplicate_records(root, rec);
10887                 if (ret < 0)
10888                         return ret;
10889                 /*
10890                  * delete_duplicate_records will return the number of entries
10891                  * deleted, so if it's greater than 0 then we know we actually
10892                  * did something and we need to remove.
10893                  */
10894                 if (ret)
10895                         had_dups = 1;
10896         }
10897
10898         if (had_dups)
10899                 return -EAGAIN;
10900
10901         while(1) {
10902                 int cur_err = 0;
10903                 int fix = 0;
10904
10905                 cache = search_cache_extent(extent_cache, 0);
10906                 if (!cache)
10907                         break;
10908                 rec = container_of(cache, struct extent_record, cache);
10909                 if (rec->num_duplicates) {
10910                         fprintf(stderr, "extent item %llu has multiple extent "
10911                                 "items\n", (unsigned long long)rec->start);
10912                         cur_err = 1;
10913                 }
10914
10915                 if (rec->refs != rec->extent_item_refs) {
10916                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10917                                 (unsigned long long)rec->start,
10918                                 (unsigned long long)rec->nr);
10919                         fprintf(stderr, "extent item %llu, found %llu\n",
10920                                 (unsigned long long)rec->extent_item_refs,
10921                                 (unsigned long long)rec->refs);
10922                         ret = record_orphan_data_extents(root->fs_info, rec);
10923                         if (ret < 0)
10924                                 goto repair_abort;
10925                         fix = ret;
10926                         cur_err = 1;
10927                 }
10928                 if (all_backpointers_checked(rec, 1)) {
10929                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10930                                 (unsigned long long)rec->start,
10931                                 (unsigned long long)rec->nr);
10932                         fix = 1;
10933                         cur_err = 1;
10934                 }
10935                 if (!rec->owner_ref_checked) {
10936                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10937                                 (unsigned long long)rec->start,
10938                                 (unsigned long long)rec->nr);
10939                         fix = 1;
10940                         cur_err = 1;
10941                 }
10942
10943                 if (repair && fix) {
10944                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10945                         if (ret)
10946                                 goto repair_abort;
10947                 }
10948
10949
10950                 if (rec->bad_full_backref) {
10951                         fprintf(stderr, "bad full backref, on [%llu]\n",
10952                                 (unsigned long long)rec->start);
10953                         if (repair) {
10954                                 ret = fixup_extent_flags(root->fs_info, rec);
10955                                 if (ret)
10956                                         goto repair_abort;
10957                                 fix = 1;
10958                         }
10959                         cur_err = 1;
10960                 }
10961                 /*
10962                  * Although it's not a extent ref's problem, we reuse this
10963                  * routine for error reporting.
10964                  * No repair function yet.
10965                  */
10966                 if (rec->crossing_stripes) {
10967                         fprintf(stderr,
10968                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10969                                 rec->start, rec->start + rec->max_size);
10970                         cur_err = 1;
10971                 }
10972
10973                 if (rec->wrong_chunk_type) {
10974                         fprintf(stderr,
10975                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10976                                 rec->start, rec->start + rec->max_size);
10977                         cur_err = 1;
10978                 }
10979
10980                 err = cur_err;
10981                 remove_cache_extent(extent_cache, cache);
10982                 free_all_extent_backrefs(rec);
10983                 if (!init_extent_tree && repair && (!cur_err || fix))
10984                         clear_extent_dirty(root->fs_info->excluded_extents,
10985                                            rec->start,
10986                                            rec->start + rec->max_size - 1);
10987                 free(rec);
10988         }
10989 repair_abort:
10990         if (repair) {
10991                 if (ret && ret != -EAGAIN) {
10992                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10993                         exit(1);
10994                 } else if (!ret) {
10995                         struct btrfs_trans_handle *trans;
10996
10997                         root = root->fs_info->extent_root;
10998                         trans = btrfs_start_transaction(root, 1);
10999                         if (IS_ERR(trans)) {
11000                                 ret = PTR_ERR(trans);
11001                                 goto repair_abort;
11002                         }
11003
11004                         ret = btrfs_fix_block_accounting(trans, root);
11005                         if (ret)
11006                                 goto repair_abort;
11007                         ret = btrfs_commit_transaction(trans, root);
11008                         if (ret)
11009                                 goto repair_abort;
11010                 }
11011                 return ret;
11012         }
11013
11014         if (err)
11015                 err = -EIO;
11016         return err;
11017 }
11018
11019 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
11020 {
11021         u64 stripe_size;
11022
11023         if (type & BTRFS_BLOCK_GROUP_RAID0) {
11024                 stripe_size = length;
11025                 stripe_size /= num_stripes;
11026         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
11027                 stripe_size = length * 2;
11028                 stripe_size /= num_stripes;
11029         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
11030                 stripe_size = length;
11031                 stripe_size /= (num_stripes - 1);
11032         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
11033                 stripe_size = length;
11034                 stripe_size /= (num_stripes - 2);
11035         } else {
11036                 stripe_size = length;
11037         }
11038         return stripe_size;
11039 }
11040
11041 /*
11042  * Check the chunk with its block group/dev list ref:
11043  * Return 0 if all refs seems valid.
11044  * Return 1 if part of refs seems valid, need later check for rebuild ref
11045  * like missing block group and needs to search extent tree to rebuild them.
11046  * Return -1 if essential refs are missing and unable to rebuild.
11047  */
11048 static int check_chunk_refs(struct chunk_record *chunk_rec,
11049                             struct block_group_tree *block_group_cache,
11050                             struct device_extent_tree *dev_extent_cache,
11051                             int silent)
11052 {
11053         struct cache_extent *block_group_item;
11054         struct block_group_record *block_group_rec;
11055         struct cache_extent *dev_extent_item;
11056         struct device_extent_record *dev_extent_rec;
11057         u64 devid;
11058         u64 offset;
11059         u64 length;
11060         int metadump_v2 = 0;
11061         int i;
11062         int ret = 0;
11063
11064         block_group_item = lookup_cache_extent(&block_group_cache->tree,
11065                                                chunk_rec->offset,
11066                                                chunk_rec->length);
11067         if (block_group_item) {
11068                 block_group_rec = container_of(block_group_item,
11069                                                struct block_group_record,
11070                                                cache);
11071                 if (chunk_rec->length != block_group_rec->offset ||
11072                     chunk_rec->offset != block_group_rec->objectid ||
11073                     (!metadump_v2 &&
11074                      chunk_rec->type_flags != block_group_rec->flags)) {
11075                         if (!silent)
11076                                 fprintf(stderr,
11077                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
11078                                         chunk_rec->objectid,
11079                                         chunk_rec->type,
11080                                         chunk_rec->offset,
11081                                         chunk_rec->length,
11082                                         chunk_rec->offset,
11083                                         chunk_rec->type_flags,
11084                                         block_group_rec->objectid,
11085                                         block_group_rec->type,
11086                                         block_group_rec->offset,
11087                                         block_group_rec->offset,
11088                                         block_group_rec->objectid,
11089                                         block_group_rec->flags);
11090                         ret = -1;
11091                 } else {
11092                         list_del_init(&block_group_rec->list);
11093                         chunk_rec->bg_rec = block_group_rec;
11094                 }
11095         } else {
11096                 if (!silent)
11097                         fprintf(stderr,
11098                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
11099                                 chunk_rec->objectid,
11100                                 chunk_rec->type,
11101                                 chunk_rec->offset,
11102                                 chunk_rec->length,
11103                                 chunk_rec->offset,
11104                                 chunk_rec->type_flags);
11105                 ret = 1;
11106         }
11107
11108         if (metadump_v2)
11109                 return ret;
11110
11111         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
11112                                     chunk_rec->num_stripes);
11113         for (i = 0; i < chunk_rec->num_stripes; ++i) {
11114                 devid = chunk_rec->stripes[i].devid;
11115                 offset = chunk_rec->stripes[i].offset;
11116                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
11117                                                        devid, offset, length);
11118                 if (dev_extent_item) {
11119                         dev_extent_rec = container_of(dev_extent_item,
11120                                                 struct device_extent_record,
11121                                                 cache);
11122                         if (dev_extent_rec->objectid != devid ||
11123                             dev_extent_rec->offset != offset ||
11124                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
11125                             dev_extent_rec->length != length) {
11126                                 if (!silent)
11127                                         fprintf(stderr,
11128                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
11129                                                 chunk_rec->objectid,
11130                                                 chunk_rec->type,
11131                                                 chunk_rec->offset,
11132                                                 chunk_rec->stripes[i].devid,
11133                                                 chunk_rec->stripes[i].offset,
11134                                                 dev_extent_rec->objectid,
11135                                                 dev_extent_rec->offset,
11136                                                 dev_extent_rec->length);
11137                                 ret = -1;
11138                         } else {
11139                                 list_move(&dev_extent_rec->chunk_list,
11140                                           &chunk_rec->dextents);
11141                         }
11142                 } else {
11143                         if (!silent)
11144                                 fprintf(stderr,
11145                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
11146                                         chunk_rec->objectid,
11147                                         chunk_rec->type,
11148                                         chunk_rec->offset,
11149                                         chunk_rec->stripes[i].devid,
11150                                         chunk_rec->stripes[i].offset);
11151                         ret = -1;
11152                 }
11153         }
11154         return ret;
11155 }
11156
11157 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
11158 int check_chunks(struct cache_tree *chunk_cache,
11159                  struct block_group_tree *block_group_cache,
11160                  struct device_extent_tree *dev_extent_cache,
11161                  struct list_head *good, struct list_head *bad,
11162                  struct list_head *rebuild, int silent)
11163 {
11164         struct cache_extent *chunk_item;
11165         struct chunk_record *chunk_rec;
11166         struct block_group_record *bg_rec;
11167         struct device_extent_record *dext_rec;
11168         int err;
11169         int ret = 0;
11170
11171         chunk_item = first_cache_extent(chunk_cache);
11172         while (chunk_item) {
11173                 chunk_rec = container_of(chunk_item, struct chunk_record,
11174                                          cache);
11175                 err = check_chunk_refs(chunk_rec, block_group_cache,
11176                                        dev_extent_cache, silent);
11177                 if (err < 0)
11178                         ret = err;
11179                 if (err == 0 && good)
11180                         list_add_tail(&chunk_rec->list, good);
11181                 if (err > 0 && rebuild)
11182                         list_add_tail(&chunk_rec->list, rebuild);
11183                 if (err < 0 && bad)
11184                         list_add_tail(&chunk_rec->list, bad);
11185                 chunk_item = next_cache_extent(chunk_item);
11186         }
11187
11188         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
11189                 if (!silent)
11190                         fprintf(stderr,
11191                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
11192                                 bg_rec->objectid,
11193                                 bg_rec->offset,
11194                                 bg_rec->flags);
11195                 if (!ret)
11196                         ret = 1;
11197         }
11198
11199         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
11200                             chunk_list) {
11201                 if (!silent)
11202                         fprintf(stderr,
11203                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
11204                                 dext_rec->objectid,
11205                                 dext_rec->offset,
11206                                 dext_rec->length);
11207                 if (!ret)
11208                         ret = 1;
11209         }
11210         return ret;
11211 }
11212
11213
11214 static int check_device_used(struct device_record *dev_rec,
11215                              struct device_extent_tree *dext_cache)
11216 {
11217         struct cache_extent *cache;
11218         struct device_extent_record *dev_extent_rec;
11219         u64 total_byte = 0;
11220
11221         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
11222         while (cache) {
11223                 dev_extent_rec = container_of(cache,
11224                                               struct device_extent_record,
11225                                               cache);
11226                 if (dev_extent_rec->objectid != dev_rec->devid)
11227                         break;
11228
11229                 list_del_init(&dev_extent_rec->device_list);
11230                 total_byte += dev_extent_rec->length;
11231                 cache = next_cache_extent(cache);
11232         }
11233
11234         if (total_byte != dev_rec->byte_used) {
11235                 fprintf(stderr,
11236                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
11237                         total_byte, dev_rec->byte_used, dev_rec->objectid,
11238                         dev_rec->type, dev_rec->offset);
11239                 return -1;
11240         } else {
11241                 return 0;
11242         }
11243 }
11244
11245 /*
11246  * Extra (optional) check for dev_item size to report possbile problem on a new
11247  * kernel.
11248  */
11249 static void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
11250 {
11251         if (!IS_ALIGNED(total_bytes, sectorsize)) {
11252                 warning(
11253 "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
11254                         devid, total_bytes, sectorsize);
11255                 warning(
11256 "this is OK for older kernel, but may cause kernel warning for newer kernels");
11257                 warning("this can be fixed by 'btrfs rescue fix-device-size'");
11258         }
11259 }
11260
11261 /*
11262  * Unlike device size alignment check above, some super total_bytes check
11263  * failure can lead to mount failure for newer kernel.
11264  *
11265  * So this function will return the error for a fatal super total_bytes problem.
11266  */
11267 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
11268 {
11269         struct btrfs_device *dev;
11270         struct list_head *dev_list = &fs_info->fs_devices->devices;
11271         u64 total_bytes = 0;
11272         u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
11273
11274         list_for_each_entry(dev, dev_list, dev_list)
11275                 total_bytes += dev->total_bytes;
11276
11277         /* Important check, which can cause unmountable fs */
11278         if (super_bytes < total_bytes) {
11279                 error("super total bytes %llu smaller than real device(s) size %llu",
11280                         super_bytes, total_bytes);
11281                 error("mounting this fs may fail for newer kernels");
11282                 error("this can be fixed by 'btrfs rescue fix-device-size'");
11283                 return false;
11284         }
11285
11286         /*
11287          * Optional check, just to make everything aligned and match with each
11288          * other.
11289          *
11290          * For a btrfs-image restored fs, we don't need to check it anyway.
11291          */
11292         if (btrfs_super_flags(fs_info->super_copy) &
11293             (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
11294                 return true;
11295         if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
11296             !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
11297             super_bytes != total_bytes) {
11298                 warning("minor unaligned/mismatch device size detected");
11299                 warning(
11300                 "recommended to use 'btrfs rescue fix-device-size' to fix it");
11301         }
11302         return true;
11303 }
11304
11305 /* check btrfs_dev_item -> btrfs_dev_extent */
11306 static int check_devices(struct rb_root *dev_cache,
11307                          struct device_extent_tree *dev_extent_cache)
11308 {
11309         struct rb_node *dev_node;
11310         struct device_record *dev_rec;
11311         struct device_extent_record *dext_rec;
11312         int err;
11313         int ret = 0;
11314
11315         dev_node = rb_first(dev_cache);
11316         while (dev_node) {
11317                 dev_rec = container_of(dev_node, struct device_record, node);
11318                 err = check_device_used(dev_rec, dev_extent_cache);
11319                 if (err)
11320                         ret = err;
11321
11322                 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
11323                                          global_info->sectorsize);
11324                 dev_node = rb_next(dev_node);
11325         }
11326         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11327                             device_list) {
11328                 fprintf(stderr,
11329                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11330                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
11331                 if (!ret)
11332                         ret = 1;
11333         }
11334         return ret;
11335 }
11336
11337 static int add_root_item_to_list(struct list_head *head,
11338                                   u64 objectid, u64 bytenr, u64 last_snapshot,
11339                                   u8 level, u8 drop_level,
11340                                   struct btrfs_key *drop_key)
11341 {
11342
11343         struct root_item_record *ri_rec;
11344         ri_rec = malloc(sizeof(*ri_rec));
11345         if (!ri_rec)
11346                 return -ENOMEM;
11347         ri_rec->bytenr = bytenr;
11348         ri_rec->objectid = objectid;
11349         ri_rec->level = level;
11350         ri_rec->drop_level = drop_level;
11351         ri_rec->last_snapshot = last_snapshot;
11352         if (drop_key)
11353                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11354         list_add_tail(&ri_rec->list, head);
11355
11356         return 0;
11357 }
11358
11359 static void free_root_item_list(struct list_head *list)
11360 {
11361         struct root_item_record *ri_rec;
11362
11363         while (!list_empty(list)) {
11364                 ri_rec = list_first_entry(list, struct root_item_record,
11365                                           list);
11366                 list_del_init(&ri_rec->list);
11367                 free(ri_rec);
11368         }
11369 }
11370
11371 static int deal_root_from_list(struct list_head *list,
11372                                struct btrfs_root *root,
11373                                struct block_info *bits,
11374                                int bits_nr,
11375                                struct cache_tree *pending,
11376                                struct cache_tree *seen,
11377                                struct cache_tree *reada,
11378                                struct cache_tree *nodes,
11379                                struct cache_tree *extent_cache,
11380                                struct cache_tree *chunk_cache,
11381                                struct rb_root *dev_cache,
11382                                struct block_group_tree *block_group_cache,
11383                                struct device_extent_tree *dev_extent_cache)
11384 {
11385         int ret = 0;
11386         u64 last;
11387
11388         while (!list_empty(list)) {
11389                 struct root_item_record *rec;
11390                 struct extent_buffer *buf;
11391                 rec = list_entry(list->next,
11392                                  struct root_item_record, list);
11393                 last = 0;
11394                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11395                 if (!extent_buffer_uptodate(buf)) {
11396                         free_extent_buffer(buf);
11397                         ret = -EIO;
11398                         break;
11399                 }
11400                 ret = add_root_to_pending(buf, extent_cache, pending,
11401                                     seen, nodes, rec->objectid);
11402                 if (ret < 0)
11403                         break;
11404                 /*
11405                  * To rebuild extent tree, we need deal with snapshot
11406                  * one by one, otherwise we deal with node firstly which
11407                  * can maximize readahead.
11408                  */
11409                 while (1) {
11410                         ret = run_next_block(root, bits, bits_nr, &last,
11411                                              pending, seen, reada, nodes,
11412                                              extent_cache, chunk_cache,
11413                                              dev_cache, block_group_cache,
11414                                              dev_extent_cache, rec);
11415                         if (ret != 0)
11416                                 break;
11417                 }
11418                 free_extent_buffer(buf);
11419                 list_del(&rec->list);
11420                 free(rec);
11421                 if (ret < 0)
11422                         break;
11423         }
11424         while (ret >= 0) {
11425                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11426                                      reada, nodes, extent_cache, chunk_cache,
11427                                      dev_cache, block_group_cache,
11428                                      dev_extent_cache, NULL);
11429                 if (ret != 0) {
11430                         if (ret > 0)
11431                                 ret = 0;
11432                         break;
11433                 }
11434         }
11435         return ret;
11436 }
11437
11438 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11439 {
11440         struct rb_root dev_cache;
11441         struct cache_tree chunk_cache;
11442         struct block_group_tree block_group_cache;
11443         struct device_extent_tree dev_extent_cache;
11444         struct cache_tree extent_cache;
11445         struct cache_tree seen;
11446         struct cache_tree pending;
11447         struct cache_tree reada;
11448         struct cache_tree nodes;
11449         struct extent_io_tree excluded_extents;
11450         struct cache_tree corrupt_blocks;
11451         struct btrfs_path path;
11452         struct btrfs_key key;
11453         struct btrfs_key found_key;
11454         int ret, err = 0;
11455         struct block_info *bits;
11456         int bits_nr;
11457         struct extent_buffer *leaf;
11458         int slot;
11459         struct btrfs_root_item ri;
11460         struct list_head dropping_trees;
11461         struct list_head normal_trees;
11462         struct btrfs_root *root1;
11463         struct btrfs_root *root;
11464         u64 objectid;
11465         u8 level;
11466
11467         root = fs_info->fs_root;
11468         dev_cache = RB_ROOT;
11469         cache_tree_init(&chunk_cache);
11470         block_group_tree_init(&block_group_cache);
11471         device_extent_tree_init(&dev_extent_cache);
11472
11473         cache_tree_init(&extent_cache);
11474         cache_tree_init(&seen);
11475         cache_tree_init(&pending);
11476         cache_tree_init(&nodes);
11477         cache_tree_init(&reada);
11478         cache_tree_init(&corrupt_blocks);
11479         extent_io_tree_init(&excluded_extents);
11480         INIT_LIST_HEAD(&dropping_trees);
11481         INIT_LIST_HEAD(&normal_trees);
11482
11483         if (repair) {
11484                 fs_info->excluded_extents = &excluded_extents;
11485                 fs_info->fsck_extent_cache = &extent_cache;
11486                 fs_info->free_extent_hook = free_extent_hook;
11487                 fs_info->corrupt_blocks = &corrupt_blocks;
11488         }
11489
11490         bits_nr = 1024;
11491         bits = malloc(bits_nr * sizeof(struct block_info));
11492         if (!bits) {
11493                 perror("malloc");
11494                 exit(1);
11495         }
11496
11497         if (ctx.progress_enabled) {
11498                 ctx.tp = TASK_EXTENTS;
11499                 task_start(ctx.info);
11500         }
11501
11502 again:
11503         root1 = fs_info->tree_root;
11504         level = btrfs_header_level(root1->node);
11505         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11506                                     root1->node->start, 0, level, 0, NULL);
11507         if (ret < 0)
11508                 goto out;
11509         root1 = fs_info->chunk_root;
11510         level = btrfs_header_level(root1->node);
11511         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11512                                     root1->node->start, 0, level, 0, NULL);
11513         if (ret < 0)
11514                 goto out;
11515         btrfs_init_path(&path);
11516         key.offset = 0;
11517         key.objectid = 0;
11518         key.type = BTRFS_ROOT_ITEM_KEY;
11519         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11520         if (ret < 0)
11521                 goto out;
11522         while(1) {
11523                 leaf = path.nodes[0];
11524                 slot = path.slots[0];
11525                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11526                         ret = btrfs_next_leaf(root, &path);
11527                         if (ret != 0)
11528                                 break;
11529                         leaf = path.nodes[0];
11530                         slot = path.slots[0];
11531                 }
11532                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11533                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11534                         unsigned long offset;
11535                         u64 last_snapshot;
11536
11537                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11538                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11539                         last_snapshot = btrfs_root_last_snapshot(&ri);
11540                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11541                                 level = btrfs_root_level(&ri);
11542                                 ret = add_root_item_to_list(&normal_trees,
11543                                                 found_key.objectid,
11544                                                 btrfs_root_bytenr(&ri),
11545                                                 last_snapshot, level,
11546                                                 0, NULL);
11547                                 if (ret < 0)
11548                                         goto out;
11549                         } else {
11550                                 level = btrfs_root_level(&ri);
11551                                 objectid = found_key.objectid;
11552                                 btrfs_disk_key_to_cpu(&found_key,
11553                                                       &ri.drop_progress);
11554                                 ret = add_root_item_to_list(&dropping_trees,
11555                                                 objectid,
11556                                                 btrfs_root_bytenr(&ri),
11557                                                 last_snapshot, level,
11558                                                 ri.drop_level, &found_key);
11559                                 if (ret < 0)
11560                                         goto out;
11561                         }
11562                 }
11563                 path.slots[0]++;
11564         }
11565         btrfs_release_path(&path);
11566
11567         /*
11568          * check_block can return -EAGAIN if it fixes something, please keep
11569          * this in mind when dealing with return values from these functions, if
11570          * we get -EAGAIN we want to fall through and restart the loop.
11571          */
11572         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11573                                   &seen, &reada, &nodes, &extent_cache,
11574                                   &chunk_cache, &dev_cache, &block_group_cache,
11575                                   &dev_extent_cache);
11576         if (ret < 0) {
11577                 if (ret == -EAGAIN)
11578                         goto loop;
11579                 goto out;
11580         }
11581         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11582                                   &pending, &seen, &reada, &nodes,
11583                                   &extent_cache, &chunk_cache, &dev_cache,
11584                                   &block_group_cache, &dev_extent_cache);
11585         if (ret < 0) {
11586                 if (ret == -EAGAIN)
11587                         goto loop;
11588                 goto out;
11589         }
11590
11591         ret = check_chunks(&chunk_cache, &block_group_cache,
11592                            &dev_extent_cache, NULL, NULL, NULL, 0);
11593         if (ret) {
11594                 if (ret == -EAGAIN)
11595                         goto loop;
11596                 err = ret;
11597         }
11598
11599         ret = check_extent_refs(root, &extent_cache);
11600         if (ret < 0) {
11601                 if (ret == -EAGAIN)
11602                         goto loop;
11603                 goto out;
11604         }
11605
11606         ret = check_devices(&dev_cache, &dev_extent_cache);
11607         if (ret && err)
11608                 ret = err;
11609
11610 out:
11611         task_stop(ctx.info);
11612         if (repair) {
11613                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11614                 extent_io_tree_cleanup(&excluded_extents);
11615                 fs_info->fsck_extent_cache = NULL;
11616                 fs_info->free_extent_hook = NULL;
11617                 fs_info->corrupt_blocks = NULL;
11618                 fs_info->excluded_extents = NULL;
11619         }
11620         free(bits);
11621         free_chunk_cache_tree(&chunk_cache);
11622         free_device_cache_tree(&dev_cache);
11623         free_block_group_tree(&block_group_cache);
11624         free_device_extent_tree(&dev_extent_cache);
11625         free_extent_cache_tree(&seen);
11626         free_extent_cache_tree(&pending);
11627         free_extent_cache_tree(&reada);
11628         free_extent_cache_tree(&nodes);
11629         free_root_item_list(&normal_trees);
11630         free_root_item_list(&dropping_trees);
11631         return ret;
11632 loop:
11633         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11634         free_extent_cache_tree(&seen);
11635         free_extent_cache_tree(&pending);
11636         free_extent_cache_tree(&reada);
11637         free_extent_cache_tree(&nodes);
11638         free_chunk_cache_tree(&chunk_cache);
11639         free_block_group_tree(&block_group_cache);
11640         free_device_cache_tree(&dev_cache);
11641         free_device_extent_tree(&dev_extent_cache);
11642         free_extent_record_cache(&extent_cache);
11643         free_root_item_list(&normal_trees);
11644         free_root_item_list(&dropping_trees);
11645         extent_io_tree_cleanup(&excluded_extents);
11646         goto again;
11647 }
11648
11649 static int check_extent_inline_ref(struct extent_buffer *eb,
11650                    struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11651 {
11652         int ret;
11653         u8 type = btrfs_extent_inline_ref_type(eb, iref);
11654
11655         switch (type) {
11656         case BTRFS_TREE_BLOCK_REF_KEY:
11657         case BTRFS_EXTENT_DATA_REF_KEY:
11658         case BTRFS_SHARED_BLOCK_REF_KEY:
11659         case BTRFS_SHARED_DATA_REF_KEY:
11660                 ret = 0;
11661                 break;
11662         default:
11663                 error("extent[%llu %u %llu] has unknown ref type: %d",
11664                       key->objectid, key->type, key->offset, type);
11665                 ret = UNKNOWN_TYPE;
11666                 break;
11667         }
11668
11669         return ret;
11670 }
11671
11672 /*
11673  * Check backrefs of a tree block given by @bytenr or @eb.
11674  *
11675  * @root:       the root containing the @bytenr or @eb
11676  * @eb:         tree block extent buffer, can be NULL
11677  * @bytenr:     bytenr of the tree block to search
11678  * @level:      tree level of the tree block
11679  * @owner:      owner of the tree block
11680  *
11681  * Return >0 for any error found and output error message
11682  * Return 0 for no error found
11683  */
11684 static int check_tree_block_ref(struct btrfs_root *root,
11685                                 struct extent_buffer *eb, u64 bytenr,
11686                                 int level, u64 owner, struct node_refs *nrefs)
11687 {
11688         struct btrfs_key key;
11689         struct btrfs_root *extent_root = root->fs_info->extent_root;
11690         struct btrfs_path path;
11691         struct btrfs_extent_item *ei;
11692         struct btrfs_extent_inline_ref *iref;
11693         struct extent_buffer *leaf;
11694         unsigned long end;
11695         unsigned long ptr;
11696         int slot;
11697         int skinny_level;
11698         int root_level = btrfs_header_level(root->node);
11699         int type;
11700         u32 nodesize = root->fs_info->nodesize;
11701         u32 item_size;
11702         u64 offset;
11703         int found_ref = 0;
11704         int err = 0;
11705         int ret;
11706         int strict = 1;
11707         int parent = 0;
11708
11709         btrfs_init_path(&path);
11710         key.objectid = bytenr;
11711         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11712                 key.type = BTRFS_METADATA_ITEM_KEY;
11713         else
11714                 key.type = BTRFS_EXTENT_ITEM_KEY;
11715         key.offset = (u64)-1;
11716
11717         /* Search for the backref in extent tree */
11718         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11719         if (ret < 0) {
11720                 err |= BACKREF_MISSING;
11721                 goto out;
11722         }
11723         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11724         if (ret) {
11725                 err |= BACKREF_MISSING;
11726                 goto out;
11727         }
11728
11729         leaf = path.nodes[0];
11730         slot = path.slots[0];
11731         btrfs_item_key_to_cpu(leaf, &key, slot);
11732
11733         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11734
11735         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11736                 skinny_level = (int)key.offset;
11737                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11738         } else {
11739                 struct btrfs_tree_block_info *info;
11740
11741                 info = (struct btrfs_tree_block_info *)(ei + 1);
11742                 skinny_level = btrfs_tree_block_level(leaf, info);
11743                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11744         }
11745
11746
11747         if (eb) {
11748                 u64 header_gen;
11749                 u64 extent_gen;
11750
11751                 /*
11752                  * Due to the feature of shared tree blocks, if the upper node
11753                  * is a fs root or shared node, the extent of checked node may
11754                  * not be updated until the next CoW.
11755                  */
11756                 if (nrefs)
11757                         strict = should_check_extent_strictly(root, nrefs,
11758                                         level);
11759                 if (!(btrfs_extent_flags(leaf, ei) &
11760                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11761                         error(
11762                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11763                                 key.objectid, nodesize,
11764                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11765                         err = BACKREF_MISMATCH;
11766                 }
11767                 header_gen = btrfs_header_generation(eb);
11768                 extent_gen = btrfs_extent_generation(leaf, ei);
11769                 if (header_gen != extent_gen) {
11770                         error(
11771         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11772                                 key.objectid, nodesize, header_gen,
11773                                 extent_gen);
11774                         err = BACKREF_MISMATCH;
11775                 }
11776                 if (level != skinny_level) {
11777                         error(
11778                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11779                                 key.objectid, nodesize, level, skinny_level);
11780                         err = BACKREF_MISMATCH;
11781                 }
11782                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11783                         error(
11784                         "extent[%llu %u] is referred by other roots than %llu",
11785                                 key.objectid, nodesize, root->objectid);
11786                         err = BACKREF_MISMATCH;
11787                 }
11788         }
11789
11790         /*
11791          * Iterate the extent/metadata item to find the exact backref
11792          */
11793         item_size = btrfs_item_size_nr(leaf, slot);
11794         ptr = (unsigned long)iref;
11795         end = (unsigned long)ei + item_size;
11796
11797         while (ptr < end) {
11798                 iref = (struct btrfs_extent_inline_ref *)ptr;
11799                 type = btrfs_extent_inline_ref_type(leaf, iref);
11800                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11801
11802                 ret = check_extent_inline_ref(leaf, &key, iref);
11803                 if (ret) {
11804                         err |= ret;
11805                         break;
11806                 }
11807                 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11808                         if (offset == root->objectid)
11809                                 found_ref = 1;
11810                         if (!strict && owner == offset)
11811                                 found_ref = 1;
11812                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11813                         /*
11814                          * Backref of tree reloc root points to itself, no need
11815                          * to check backref any more.
11816                          *
11817                          * This may be an error of loop backref, but extent tree
11818                          * checker should have already handled it.
11819                          * Here we only need to avoid infinite iteration.
11820                          */
11821                         if (offset == bytenr) {
11822                                 found_ref = 1;
11823                         } else {
11824                                 /*
11825                                  * Check if the backref points to valid
11826                                  * referencer
11827                                  */
11828                                 found_ref = !check_tree_block_ref( root, NULL,
11829                                                 offset, level + 1, owner,
11830                                                 NULL);
11831                         }
11832                 }
11833
11834                 if (found_ref)
11835                         break;
11836                 ptr += btrfs_extent_inline_ref_size(type);
11837         }
11838
11839         /*
11840          * Inlined extent item doesn't have what we need, check
11841          * TREE_BLOCK_REF_KEY
11842          */
11843         if (!found_ref) {
11844                 btrfs_release_path(&path);
11845                 key.objectid = bytenr;
11846                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11847                 key.offset = root->objectid;
11848
11849                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11850                 if (!ret)
11851                         found_ref = 1;
11852         }
11853         /*
11854          * Finally check SHARED BLOCK REF, any found will be good
11855          * Here we're not doing comprehensive extent backref checking,
11856          * only need to ensure there is some extent referring to this
11857          * tree block.
11858          */
11859         if (!found_ref) {
11860                 btrfs_release_path(&path);
11861                 key.objectid = bytenr;
11862                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11863                 key.offset = (u64)-1;
11864
11865                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11866                 if (ret < 0) {
11867                         err |= BACKREF_MISSING;
11868                         goto out;
11869                 }
11870                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11871                 if (ret) {
11872                         err |= BACKREF_MISSING;
11873                         goto out;
11874                 }
11875                 found_ref = 1;
11876         }
11877         if (!found_ref)
11878                 err |= BACKREF_MISSING;
11879 out:
11880         btrfs_release_path(&path);
11881         if (nrefs && strict &&
11882             level < root_level && nrefs->full_backref[level + 1])
11883                 parent = nrefs->bytenr[level + 1];
11884         if (eb && (err & BACKREF_MISSING))
11885                 error(
11886         "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11887                       bytenr, nodesize, owner, level,
11888                       parent ? "parent" : "root",
11889                       parent ? parent : root->objectid);
11890         return err;
11891 }
11892
11893 /*
11894  * If @err contains BACKREF_MISSING then add extent of the
11895  * file_extent_data_item.
11896  *
11897  * Returns error bits after reapir.
11898  */
11899 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11900                                    struct btrfs_root *root,
11901                                    struct btrfs_path *pathp,
11902                                    struct node_refs *nrefs,
11903                                    int err)
11904 {
11905         struct btrfs_file_extent_item *fi;
11906         struct btrfs_key fi_key;
11907         struct btrfs_key key;
11908         struct btrfs_extent_item *ei;
11909         struct btrfs_path path;
11910         struct btrfs_root *extent_root = root->fs_info->extent_root;
11911         struct extent_buffer *eb;
11912         u64 size;
11913         u64 disk_bytenr;
11914         u64 num_bytes;
11915         u64 parent;
11916         u64 offset;
11917         u64 extent_offset;
11918         u64 file_offset;
11919         int generation;
11920         int slot;
11921         int ret = 0;
11922
11923         eb = pathp->nodes[0];
11924         slot = pathp->slots[0];
11925         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11926         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11927
11928         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11929             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11930                 return err;
11931
11932         file_offset = fi_key.offset;
11933         generation = btrfs_file_extent_generation(eb, fi);
11934         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11935         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11936         extent_offset = btrfs_file_extent_offset(eb, fi);
11937         offset = file_offset - extent_offset;
11938
11939         /* now repair only adds backref */
11940         if ((err & BACKREF_MISSING) == 0)
11941                 return err;
11942
11943         /* search extent item */
11944         key.objectid = disk_bytenr;
11945         key.type = BTRFS_EXTENT_ITEM_KEY;
11946         key.offset = num_bytes;
11947
11948         btrfs_init_path(&path);
11949         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11950         if (ret < 0) {
11951                 ret = -EIO;
11952                 goto out;
11953         }
11954
11955         /* insert an extent item */
11956         if (ret > 0) {
11957                 key.objectid = disk_bytenr;
11958                 key.type = BTRFS_EXTENT_ITEM_KEY;
11959                 key.offset = num_bytes;
11960                 size = sizeof(*ei);
11961
11962                 btrfs_release_path(&path);
11963                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11964                                               size);
11965                 if (ret)
11966                         goto out;
11967                 eb = path.nodes[0];
11968                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11969
11970                 btrfs_set_extent_refs(eb, ei, 0);
11971                 btrfs_set_extent_generation(eb, ei, generation);
11972                 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11973
11974                 btrfs_mark_buffer_dirty(eb);
11975                 ret = btrfs_update_block_group(trans, extent_root, disk_bytenr,
11976                                                num_bytes, 1, 0);
11977                 btrfs_release_path(&path);
11978         }
11979
11980         if (nrefs->full_backref[0])
11981                 parent = btrfs_header_bytenr(eb);
11982         else
11983                 parent = 0;
11984
11985         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11986                                    root->objectid,
11987                    parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11988                                    offset);
11989         if (ret) {
11990                 error(
11991                 "failed to increase extent data backref[%llu %llu] root %llu",
11992                       disk_bytenr, num_bytes, root->objectid);
11993                 goto out;
11994         } else {
11995                 printf("Add one extent data backref [%llu %llu]\n",
11996                        disk_bytenr, num_bytes);
11997         }
11998
11999         err &= ~BACKREF_MISSING;
12000 out:
12001         if (ret)
12002                 error("can't repair root %llu extent data item[%llu %llu]",
12003                       root->objectid, disk_bytenr, num_bytes);
12004         return err;
12005 }
12006
12007 /*
12008  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
12009  *
12010  * Return >0 any error found and output error message
12011  * Return 0 for no error found
12012  */
12013 static int check_extent_data_item(struct btrfs_root *root,
12014                                   struct btrfs_path *pathp,
12015                                   struct node_refs *nrefs,  int account_bytes)
12016 {
12017         struct btrfs_file_extent_item *fi;
12018         struct extent_buffer *eb = pathp->nodes[0];
12019         struct btrfs_path path;
12020         struct btrfs_root *extent_root = root->fs_info->extent_root;
12021         struct btrfs_key fi_key;
12022         struct btrfs_key dbref_key;
12023         struct extent_buffer *leaf;
12024         struct btrfs_extent_item *ei;
12025         struct btrfs_extent_inline_ref *iref;
12026         struct btrfs_extent_data_ref *dref;
12027         u64 owner;
12028         u64 disk_bytenr;
12029         u64 disk_num_bytes;
12030         u64 extent_num_bytes;
12031         u64 extent_flags;
12032         u64 offset;
12033         u32 item_size;
12034         unsigned long end;
12035         unsigned long ptr;
12036         int type;
12037         int found_dbackref = 0;
12038         int slot = pathp->slots[0];
12039         int err = 0;
12040         int ret;
12041         int strict;
12042
12043         btrfs_item_key_to_cpu(eb, &fi_key, slot);
12044         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
12045
12046         /* Nothing to check for hole and inline data extents */
12047         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
12048             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
12049                 return 0;
12050
12051         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
12052         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
12053         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
12054         offset = btrfs_file_extent_offset(eb, fi);
12055
12056         /* Check unaligned disk_num_bytes and num_bytes */
12057         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
12058                 error(
12059 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
12060                         fi_key.objectid, fi_key.offset, disk_num_bytes,
12061                         root->fs_info->sectorsize);
12062                 err |= BYTES_UNALIGNED;
12063         } else if (account_bytes) {
12064                 data_bytes_allocated += disk_num_bytes;
12065         }
12066         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
12067                 error(
12068 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
12069                         fi_key.objectid, fi_key.offset, extent_num_bytes,
12070                         root->fs_info->sectorsize);
12071                 err |= BYTES_UNALIGNED;
12072         } else if (account_bytes) {
12073                 data_bytes_referenced += extent_num_bytes;
12074         }
12075         owner = btrfs_header_owner(eb);
12076
12077         /* Check the extent item of the file extent in extent tree */
12078         btrfs_init_path(&path);
12079         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12080         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
12081         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
12082
12083         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
12084         if (ret)
12085                 goto out;
12086
12087         leaf = path.nodes[0];
12088         slot = path.slots[0];
12089         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12090
12091         extent_flags = btrfs_extent_flags(leaf, ei);
12092
12093         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
12094                 error(
12095                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
12096                     disk_bytenr, disk_num_bytes,
12097                     BTRFS_EXTENT_FLAG_DATA);
12098                 err |= BACKREF_MISMATCH;
12099         }
12100
12101         /* Check data backref inside that extent item */
12102         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
12103         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12104         ptr = (unsigned long)iref;
12105         end = (unsigned long)ei + item_size;
12106         strict = should_check_extent_strictly(root, nrefs, -1);
12107
12108         while (ptr < end) {
12109                 u64 ref_root;
12110                 u64 ref_objectid;
12111                 u64 ref_offset;
12112                 bool match = false;
12113
12114                 iref = (struct btrfs_extent_inline_ref *)ptr;
12115                 type = btrfs_extent_inline_ref_type(leaf, iref);
12116                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12117
12118                 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
12119                 if (ret) {
12120                         err |= ret;
12121                         break;
12122                 }
12123                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
12124                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
12125                         ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
12126                         ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
12127
12128                         if (ref_objectid == fi_key.objectid &&
12129                             ref_offset == fi_key.offset - offset)
12130                                 match = true;
12131                         if (ref_root == root->objectid && match)
12132                                 found_dbackref = 1;
12133                         else if (!strict && owner == ref_root && match)
12134                                 found_dbackref = 1;
12135                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
12136                         found_dbackref = !check_tree_block_ref(root, NULL,
12137                                 btrfs_extent_inline_ref_offset(leaf, iref),
12138                                 0, owner, NULL);
12139                 }
12140
12141                 if (found_dbackref)
12142                         break;
12143                 ptr += btrfs_extent_inline_ref_size(type);
12144         }
12145
12146         if (!found_dbackref) {
12147                 btrfs_release_path(&path);
12148
12149                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
12150                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12151                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
12152                 dbref_key.offset = hash_extent_data_ref(root->objectid,
12153                                 fi_key.objectid, fi_key.offset - offset);
12154
12155                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12156                                         &dbref_key, &path, 0, 0);
12157                 if (!ret) {
12158                         found_dbackref = 1;
12159                         goto out;
12160                 }
12161
12162                 btrfs_release_path(&path);
12163
12164                 /*
12165                  * Neither inlined nor EXTENT_DATA_REF found, try
12166                  * SHARED_DATA_REF as last chance.
12167                  */
12168                 dbref_key.objectid = disk_bytenr;
12169                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
12170                 dbref_key.offset = eb->start;
12171
12172                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12173                                         &dbref_key, &path, 0, 0);
12174                 if (!ret) {
12175                         found_dbackref = 1;
12176                         goto out;
12177                 }
12178         }
12179
12180 out:
12181         if (!found_dbackref)
12182                 err |= BACKREF_MISSING;
12183         btrfs_release_path(&path);
12184         if (err & BACKREF_MISSING) {
12185                 error("data extent[%llu %llu] backref lost",
12186                       disk_bytenr, disk_num_bytes);
12187         }
12188         return err;
12189 }
12190
12191 /*
12192  * Get real tree block level for the case like shared block
12193  * Return >= 0 as tree level
12194  * Return <0 for error
12195  */
12196 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
12197 {
12198         struct extent_buffer *eb;
12199         struct btrfs_path path;
12200         struct btrfs_key key;
12201         struct btrfs_extent_item *ei;
12202         u64 flags;
12203         u64 transid;
12204         u8 backref_level;
12205         u8 header_level;
12206         int ret;
12207
12208         /* Search extent tree for extent generation and level */
12209         key.objectid = bytenr;
12210         key.type = BTRFS_METADATA_ITEM_KEY;
12211         key.offset = (u64)-1;
12212
12213         btrfs_init_path(&path);
12214         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
12215         if (ret < 0)
12216                 goto release_out;
12217         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
12218         if (ret < 0)
12219                 goto release_out;
12220         if (ret > 0) {
12221                 ret = -ENOENT;
12222                 goto release_out;
12223         }
12224
12225         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12226         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
12227                             struct btrfs_extent_item);
12228         flags = btrfs_extent_flags(path.nodes[0], ei);
12229         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
12230                 ret = -ENOENT;
12231                 goto release_out;
12232         }
12233
12234         /* Get transid for later read_tree_block() check */
12235         transid = btrfs_extent_generation(path.nodes[0], ei);
12236
12237         /* Get backref level as one source */
12238         if (key.type == BTRFS_METADATA_ITEM_KEY) {
12239                 backref_level = key.offset;
12240         } else {
12241                 struct btrfs_tree_block_info *info;
12242
12243                 info = (struct btrfs_tree_block_info *)(ei + 1);
12244                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
12245         }
12246         btrfs_release_path(&path);
12247
12248         /* Get level from tree block as an alternative source */
12249         eb = read_tree_block(fs_info, bytenr, transid);
12250         if (!extent_buffer_uptodate(eb)) {
12251                 free_extent_buffer(eb);
12252                 return -EIO;
12253         }
12254         header_level = btrfs_header_level(eb);
12255         free_extent_buffer(eb);
12256
12257         if (header_level != backref_level)
12258                 return -EIO;
12259         return header_level;
12260
12261 release_out:
12262         btrfs_release_path(&path);
12263         return ret;
12264 }
12265
12266 /*
12267  * Check if a tree block backref is valid (points to a valid tree block)
12268  * if level == -1, level will be resolved
12269  * Return >0 for any error found and print error message
12270  */
12271 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
12272                                     u64 bytenr, int level)
12273 {
12274         struct btrfs_root *root;
12275         struct btrfs_key key;
12276         struct btrfs_path path;
12277         struct extent_buffer *eb;
12278         struct extent_buffer *node;
12279         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12280         int err = 0;
12281         int ret;
12282
12283         /* Query level for level == -1 special case */
12284         if (level == -1)
12285                 level = query_tree_block_level(fs_info, bytenr);
12286         if (level < 0) {
12287                 err |= REFERENCER_MISSING;
12288                 goto out;
12289         }
12290
12291         key.objectid = root_id;
12292         key.type = BTRFS_ROOT_ITEM_KEY;
12293         key.offset = (u64)-1;
12294
12295         root = btrfs_read_fs_root(fs_info, &key);
12296         if (IS_ERR(root)) {
12297                 err |= REFERENCER_MISSING;
12298                 goto out;
12299         }
12300
12301         /* Read out the tree block to get item/node key */
12302         eb = read_tree_block(fs_info, bytenr, 0);
12303         if (!extent_buffer_uptodate(eb)) {
12304                 err |= REFERENCER_MISSING;
12305                 free_extent_buffer(eb);
12306                 goto out;
12307         }
12308
12309         /* Empty tree, no need to check key */
12310         if (!btrfs_header_nritems(eb) && !level) {
12311                 free_extent_buffer(eb);
12312                 goto out;
12313         }
12314
12315         if (level)
12316                 btrfs_node_key_to_cpu(eb, &key, 0);
12317         else
12318                 btrfs_item_key_to_cpu(eb, &key, 0);
12319
12320         free_extent_buffer(eb);
12321
12322         btrfs_init_path(&path);
12323         path.lowest_level = level;
12324         /* Search with the first key, to ensure we can reach it */
12325         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12326         if (ret < 0) {
12327                 err |= REFERENCER_MISSING;
12328                 goto release_out;
12329         }
12330
12331         node = path.nodes[level];
12332         if (btrfs_header_bytenr(node) != bytenr) {
12333                 error(
12334         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
12335                         bytenr, nodesize, bytenr,
12336                         btrfs_header_bytenr(node));
12337                 err |= REFERENCER_MISMATCH;
12338         }
12339         if (btrfs_header_level(node) != level) {
12340                 error(
12341         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
12342                         bytenr, nodesize, level,
12343                         btrfs_header_level(node));
12344                 err |= REFERENCER_MISMATCH;
12345         }
12346
12347 release_out:
12348         btrfs_release_path(&path);
12349 out:
12350         if (err & REFERENCER_MISSING) {
12351                 if (level < 0)
12352                         error("extent [%llu %d] lost referencer (owner: %llu)",
12353                                 bytenr, nodesize, root_id);
12354                 else
12355                         error(
12356                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
12357                                 bytenr, nodesize, root_id, level);
12358         }
12359
12360         return err;
12361 }
12362
12363 /*
12364  * Check if tree block @eb is tree reloc root.
12365  * Return 0 if it's not or any problem happens
12366  * Return 1 if it's a tree reloc root
12367  */
12368 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
12369                                  struct extent_buffer *eb)
12370 {
12371         struct btrfs_root *tree_reloc_root;
12372         struct btrfs_key key;
12373         u64 bytenr = btrfs_header_bytenr(eb);
12374         u64 owner = btrfs_header_owner(eb);
12375         int ret = 0;
12376
12377         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12378         key.offset = owner;
12379         key.type = BTRFS_ROOT_ITEM_KEY;
12380
12381         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
12382         if (IS_ERR(tree_reloc_root))
12383                 return 0;
12384
12385         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
12386                 ret = 1;
12387         btrfs_free_fs_root(tree_reloc_root);
12388         return ret;
12389 }
12390
12391 /*
12392  * Check referencer for shared block backref
12393  * If level == -1, this function will resolve the level.
12394  */
12395 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
12396                                      u64 parent, u64 bytenr, int level)
12397 {
12398         struct extent_buffer *eb;
12399         u32 nr;
12400         int found_parent = 0;
12401         int i;
12402
12403         eb = read_tree_block(fs_info, parent, 0);
12404         if (!extent_buffer_uptodate(eb))
12405                 goto out;
12406
12407         if (level == -1)
12408                 level = query_tree_block_level(fs_info, bytenr);
12409         if (level < 0)
12410                 goto out;
12411
12412         /* It's possible it's a tree reloc root */
12413         if (parent == bytenr) {
12414                 if (is_tree_reloc_root(fs_info, eb))
12415                         found_parent = 1;
12416                 goto out;
12417         }
12418
12419         if (level + 1 != btrfs_header_level(eb))
12420                 goto out;
12421
12422         nr = btrfs_header_nritems(eb);
12423         for (i = 0; i < nr; i++) {
12424                 if (bytenr == btrfs_node_blockptr(eb, i)) {
12425                         found_parent = 1;
12426                         break;
12427                 }
12428         }
12429 out:
12430         free_extent_buffer(eb);
12431         if (!found_parent) {
12432                 error(
12433         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12434                         bytenr, fs_info->nodesize, parent, level);
12435                 return REFERENCER_MISSING;
12436         }
12437         return 0;
12438 }
12439
12440 /*
12441  * Check referencer for normal (inlined) data ref
12442  * If len == 0, it will be resolved by searching in extent tree
12443  */
12444 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12445                                      u64 root_id, u64 objectid, u64 offset,
12446                                      u64 bytenr, u64 len, u32 count)
12447 {
12448         struct btrfs_root *root;
12449         struct btrfs_root *extent_root = fs_info->extent_root;
12450         struct btrfs_key key;
12451         struct btrfs_path path;
12452         struct extent_buffer *leaf;
12453         struct btrfs_file_extent_item *fi;
12454         u32 found_count = 0;
12455         int slot;
12456         int ret = 0;
12457
12458         if (!len) {
12459                 key.objectid = bytenr;
12460                 key.type = BTRFS_EXTENT_ITEM_KEY;
12461                 key.offset = (u64)-1;
12462
12463                 btrfs_init_path(&path);
12464                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12465                 if (ret < 0)
12466                         goto out;
12467                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12468                 if (ret)
12469                         goto out;
12470                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12471                 if (key.objectid != bytenr ||
12472                     key.type != BTRFS_EXTENT_ITEM_KEY)
12473                         goto out;
12474                 len = key.offset;
12475                 btrfs_release_path(&path);
12476         }
12477         key.objectid = root_id;
12478         key.type = BTRFS_ROOT_ITEM_KEY;
12479         key.offset = (u64)-1;
12480         btrfs_init_path(&path);
12481
12482         root = btrfs_read_fs_root(fs_info, &key);
12483         if (IS_ERR(root))
12484                 goto out;
12485
12486         key.objectid = objectid;
12487         key.type = BTRFS_EXTENT_DATA_KEY;
12488         /*
12489          * It can be nasty as data backref offset is
12490          * file offset - file extent offset, which is smaller or
12491          * equal to original backref offset.  The only special case is
12492          * overflow.  So we need to special check and do further search.
12493          */
12494         key.offset = offset & (1ULL << 63) ? 0 : offset;
12495
12496         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12497         if (ret < 0)
12498                 goto out;
12499
12500         /*
12501          * Search afterwards to get correct one
12502          * NOTE: As we must do a comprehensive check on the data backref to
12503          * make sure the dref count also matches, we must iterate all file
12504          * extents for that inode.
12505          */
12506         while (1) {
12507                 leaf = path.nodes[0];
12508                 slot = path.slots[0];
12509
12510                 if (slot >= btrfs_header_nritems(leaf) ||
12511                     btrfs_header_owner(leaf) != root_id)
12512                         goto next;
12513                 btrfs_item_key_to_cpu(leaf, &key, slot);
12514                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12515                         break;
12516                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12517                 /*
12518                  * Except normal disk bytenr and disk num bytes, we still
12519                  * need to do extra check on dbackref offset as
12520                  * dbackref offset = file_offset - file_extent_offset
12521                  */
12522                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12523                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12524                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12525                     offset)
12526                         found_count++;
12527
12528 next:
12529                 ret = btrfs_next_item(root, &path);
12530                 if (ret)
12531                         break;
12532         }
12533 out:
12534         btrfs_release_path(&path);
12535         if (found_count != count) {
12536                 error(
12537 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12538                         bytenr, len, root_id, objectid, offset, count, found_count);
12539                 return REFERENCER_MISSING;
12540         }
12541         return 0;
12542 }
12543
12544 /*
12545  * Check if the referencer of a shared data backref exists
12546  */
12547 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12548                                      u64 parent, u64 bytenr)
12549 {
12550         struct extent_buffer *eb;
12551         struct btrfs_key key;
12552         struct btrfs_file_extent_item *fi;
12553         u32 nr;
12554         int found_parent = 0;
12555         int i;
12556
12557         eb = read_tree_block(fs_info, parent, 0);
12558         if (!extent_buffer_uptodate(eb))
12559                 goto out;
12560
12561         nr = btrfs_header_nritems(eb);
12562         for (i = 0; i < nr; i++) {
12563                 btrfs_item_key_to_cpu(eb, &key, i);
12564                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12565                         continue;
12566
12567                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12568                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12569                         continue;
12570
12571                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12572                         found_parent = 1;
12573                         break;
12574                 }
12575         }
12576
12577 out:
12578         free_extent_buffer(eb);
12579         if (!found_parent) {
12580                 error("shared extent %llu referencer lost (parent: %llu)",
12581                         bytenr, parent);
12582                 return REFERENCER_MISSING;
12583         }
12584         return 0;
12585 }
12586
12587 /*
12588  * Only delete backref if REFERENCER_MISSING now
12589  *
12590  * Returns <0   the extent was deleted
12591  * Returns >0   the backref was deleted but extent still exists, returned value
12592  *               means error after repair
12593  * Returns  0   nothing happened
12594  */
12595 static int repair_extent_item(struct btrfs_trans_handle *trans,
12596                       struct btrfs_root *root, struct btrfs_path *path,
12597                       u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12598                       u64 owner, u64 offset, int err)
12599 {
12600         struct btrfs_key old_key;
12601         int freed = 0;
12602         int ret;
12603
12604         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12605
12606         if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12607                 /* delete the backref */
12608                 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12609                           num_bytes, parent, root_objectid, owner, offset);
12610                 if (!ret) {
12611                         freed = 1;
12612                         err &= ~REFERENCER_MISSING;
12613                         printf("Delete backref in extent [%llu %llu]\n",
12614                                bytenr, num_bytes);
12615                 } else {
12616                         error("fail to delete backref in extent [%llu %llu]",
12617                                bytenr, num_bytes);
12618                 }
12619         }
12620
12621         /* btrfs_free_extent may delete the extent */
12622         btrfs_release_path(path);
12623         ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12624
12625         if (ret)
12626                 ret = -ENOENT;
12627         else if (freed)
12628                 ret = err;
12629         return ret;
12630 }
12631
12632 /*
12633  * This function will check a given extent item, including its backref and
12634  * itself (like crossing stripe boundary and type)
12635  *
12636  * Since we don't use extent_record anymore, introduce new error bit
12637  */
12638 static int check_extent_item(struct btrfs_trans_handle *trans,
12639                              struct btrfs_fs_info *fs_info,
12640                              struct btrfs_path *path)
12641 {
12642         struct btrfs_extent_item *ei;
12643         struct btrfs_extent_inline_ref *iref;
12644         struct btrfs_extent_data_ref *dref;
12645         struct extent_buffer *eb = path->nodes[0];
12646         unsigned long end;
12647         unsigned long ptr;
12648         int slot = path->slots[0];
12649         int type;
12650         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12651         u32 item_size = btrfs_item_size_nr(eb, slot);
12652         u64 flags;
12653         u64 offset;
12654         u64 parent;
12655         u64 num_bytes;
12656         u64 root_objectid;
12657         u64 owner;
12658         u64 owner_offset;
12659         int metadata = 0;
12660         int level;
12661         struct btrfs_key key;
12662         int ret;
12663         int err = 0;
12664
12665         btrfs_item_key_to_cpu(eb, &key, slot);
12666         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12667                 bytes_used += key.offset;
12668                 num_bytes = key.offset;
12669         } else {
12670                 bytes_used += nodesize;
12671                 num_bytes = nodesize;
12672         }
12673
12674         if (item_size < sizeof(*ei)) {
12675                 /*
12676                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12677                  * old thing when on disk format is still un-determined.
12678                  * No need to care about it anymore
12679                  */
12680                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12681                 return -ENOTTY;
12682         }
12683
12684         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12685         flags = btrfs_extent_flags(eb, ei);
12686
12687         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12688                 metadata = 1;
12689         if (metadata && check_crossing_stripes(global_info, key.objectid,
12690                                                eb->len)) {
12691                 error("bad metadata [%llu, %llu) crossing stripe boundary",
12692                       key.objectid, key.objectid + nodesize);
12693                 err |= CROSSING_STRIPE_BOUNDARY;
12694         }
12695
12696         ptr = (unsigned long)(ei + 1);
12697
12698         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12699                 /* Old EXTENT_ITEM metadata */
12700                 struct btrfs_tree_block_info *info;
12701
12702                 info = (struct btrfs_tree_block_info *)ptr;
12703                 level = btrfs_tree_block_level(eb, info);
12704                 ptr += sizeof(struct btrfs_tree_block_info);
12705         } else {
12706                 /* New METADATA_ITEM */
12707                 level = key.offset;
12708         }
12709         end = (unsigned long)ei + item_size;
12710
12711 next:
12712         /* Reached extent item end normally */
12713         if (ptr == end)
12714                 goto out;
12715
12716         /* Beyond extent item end, wrong item size */
12717         if (ptr > end) {
12718                 err |= ITEM_SIZE_MISMATCH;
12719                 error("extent item at bytenr %llu slot %d has wrong size",
12720                         eb->start, slot);
12721                 goto out;
12722         }
12723
12724         parent = 0;
12725         root_objectid = 0;
12726         owner = 0;
12727         owner_offset = 0;
12728         /* Now check every backref in this extent item */
12729         iref = (struct btrfs_extent_inline_ref *)ptr;
12730         type = btrfs_extent_inline_ref_type(eb, iref);
12731         offset = btrfs_extent_inline_ref_offset(eb, iref);
12732         switch (type) {
12733         case BTRFS_TREE_BLOCK_REF_KEY:
12734                 root_objectid = offset;
12735                 owner = level;
12736                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12737                                                level);
12738                 err |= ret;
12739                 break;
12740         case BTRFS_SHARED_BLOCK_REF_KEY:
12741                 parent = offset;
12742                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12743                                                  level);
12744                 err |= ret;
12745                 break;
12746         case BTRFS_EXTENT_DATA_REF_KEY:
12747                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12748                 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12749                 owner = btrfs_extent_data_ref_objectid(eb, dref);
12750                 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12751                 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12752                                         owner_offset, key.objectid, key.offset,
12753                                         btrfs_extent_data_ref_count(eb, dref));
12754                 err |= ret;
12755                 break;
12756         case BTRFS_SHARED_DATA_REF_KEY:
12757                 parent = offset;
12758                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12759                 err |= ret;
12760                 break;
12761         default:
12762                 error("extent[%llu %d %llu] has unknown ref type: %d",
12763                         key.objectid, key.type, key.offset, type);
12764                 ret = UNKNOWN_TYPE;
12765                 err |= ret;
12766                 goto out;
12767         }
12768
12769         if (err && repair) {
12770                 ret = repair_extent_item(trans, fs_info->extent_root, path,
12771                          key.objectid, num_bytes, parent, root_objectid,
12772                          owner, owner_offset, ret);
12773                 if (ret < 0)
12774                         goto out;
12775                 if (ret) {
12776                         goto next;
12777                         err = ret;
12778                 }
12779         }
12780
12781         ptr += btrfs_extent_inline_ref_size(type);
12782         goto next;
12783
12784 out:
12785         return err;
12786 }
12787
12788 /*
12789  * Check if a dev extent item is referred correctly by its chunk
12790  */
12791 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12792                                  struct extent_buffer *eb, int slot)
12793 {
12794         struct btrfs_root *chunk_root = fs_info->chunk_root;
12795         struct btrfs_dev_extent *ptr;
12796         struct btrfs_path path;
12797         struct btrfs_key chunk_key;
12798         struct btrfs_key devext_key;
12799         struct btrfs_chunk *chunk;
12800         struct extent_buffer *l;
12801         int num_stripes;
12802         u64 length;
12803         int i;
12804         int found_chunk = 0;
12805         int ret;
12806
12807         btrfs_item_key_to_cpu(eb, &devext_key, slot);
12808         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12809         length = btrfs_dev_extent_length(eb, ptr);
12810
12811         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12812         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12813         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12814
12815         btrfs_init_path(&path);
12816         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12817         if (ret)
12818                 goto out;
12819
12820         l = path.nodes[0];
12821         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12822         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12823                                       chunk_key.offset);
12824         if (ret < 0)
12825                 goto out;
12826
12827         if (btrfs_stripe_length(fs_info, l, chunk) != length)
12828                 goto out;
12829
12830         num_stripes = btrfs_chunk_num_stripes(l, chunk);
12831         for (i = 0; i < num_stripes; i++) {
12832                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12833                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12834
12835                 if (devid == devext_key.objectid &&
12836                     offset == devext_key.offset) {
12837                         found_chunk = 1;
12838                         break;
12839                 }
12840         }
12841 out:
12842         btrfs_release_path(&path);
12843         if (!found_chunk) {
12844                 error(
12845                 "device extent[%llu, %llu, %llu] did not find the related chunk",
12846                         devext_key.objectid, devext_key.offset, length);
12847                 return REFERENCER_MISSING;
12848         }
12849         return 0;
12850 }
12851
12852 /*
12853  * Check if the used space is correct with the dev item
12854  */
12855 static int check_dev_item(struct btrfs_fs_info *fs_info,
12856                           struct extent_buffer *eb, int slot)
12857 {
12858         struct btrfs_root *dev_root = fs_info->dev_root;
12859         struct btrfs_dev_item *dev_item;
12860         struct btrfs_path path;
12861         struct btrfs_key key;
12862         struct btrfs_dev_extent *ptr;
12863         u64 total_bytes;
12864         u64 dev_id;
12865         u64 used;
12866         u64 total = 0;
12867         int ret;
12868
12869         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12870         dev_id = btrfs_device_id(eb, dev_item);
12871         used = btrfs_device_bytes_used(eb, dev_item);
12872         total_bytes = btrfs_device_total_bytes(eb, dev_item);
12873
12874         key.objectid = dev_id;
12875         key.type = BTRFS_DEV_EXTENT_KEY;
12876         key.offset = 0;
12877
12878         btrfs_init_path(&path);
12879         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12880         if (ret < 0) {
12881                 btrfs_item_key_to_cpu(eb, &key, slot);
12882                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12883                         key.objectid, key.type, key.offset);
12884                 btrfs_release_path(&path);
12885                 return REFERENCER_MISSING;
12886         }
12887
12888         /* Iterate dev_extents to calculate the used space of a device */
12889         while (1) {
12890                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12891                         goto next;
12892
12893                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12894                 if (key.objectid > dev_id)
12895                         break;
12896                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12897                         goto next;
12898
12899                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12900                                      struct btrfs_dev_extent);
12901                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12902 next:
12903                 ret = btrfs_next_item(dev_root, &path);
12904                 if (ret)
12905                         break;
12906         }
12907         btrfs_release_path(&path);
12908
12909         if (used != total) {
12910                 btrfs_item_key_to_cpu(eb, &key, slot);
12911                 error(
12912 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12913                         total, used, BTRFS_ROOT_TREE_OBJECTID,
12914                         BTRFS_DEV_EXTENT_KEY, dev_id);
12915                 return ACCOUNTING_MISMATCH;
12916         }
12917         check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12918
12919         return 0;
12920 }
12921
12922 /*
12923  * Check a block group item with its referener (chunk) and its used space
12924  * with extent/metadata item
12925  */
12926 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12927                                   struct extent_buffer *eb, int slot)
12928 {
12929         struct btrfs_root *extent_root = fs_info->extent_root;
12930         struct btrfs_root *chunk_root = fs_info->chunk_root;
12931         struct btrfs_block_group_item *bi;
12932         struct btrfs_block_group_item bg_item;
12933         struct btrfs_path path;
12934         struct btrfs_key bg_key;
12935         struct btrfs_key chunk_key;
12936         struct btrfs_key extent_key;
12937         struct btrfs_chunk *chunk;
12938         struct extent_buffer *leaf;
12939         struct btrfs_extent_item *ei;
12940         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12941         u64 flags;
12942         u64 bg_flags;
12943         u64 used;
12944         u64 total = 0;
12945         int ret;
12946         int err = 0;
12947
12948         btrfs_item_key_to_cpu(eb, &bg_key, slot);
12949         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12950         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12951         used = btrfs_block_group_used(&bg_item);
12952         bg_flags = btrfs_block_group_flags(&bg_item);
12953
12954         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12955         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12956         chunk_key.offset = bg_key.objectid;
12957
12958         btrfs_init_path(&path);
12959         /* Search for the referencer chunk */
12960         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12961         if (ret) {
12962                 error(
12963                 "block group[%llu %llu] did not find the related chunk item",
12964                         bg_key.objectid, bg_key.offset);
12965                 err |= REFERENCER_MISSING;
12966         } else {
12967                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12968                                         struct btrfs_chunk);
12969                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12970                                                 bg_key.offset) {
12971                         error(
12972         "block group[%llu %llu] related chunk item length does not match",
12973                                 bg_key.objectid, bg_key.offset);
12974                         err |= REFERENCER_MISMATCH;
12975                 }
12976         }
12977         btrfs_release_path(&path);
12978
12979         /* Search from the block group bytenr */
12980         extent_key.objectid = bg_key.objectid;
12981         extent_key.type = 0;
12982         extent_key.offset = 0;
12983
12984         btrfs_init_path(&path);
12985         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12986         if (ret < 0)
12987                 goto out;
12988
12989         /* Iterate extent tree to account used space */
12990         while (1) {
12991                 leaf = path.nodes[0];
12992
12993                 /* Search slot can point to the last item beyond leaf nritems */
12994                 if (path.slots[0] >= btrfs_header_nritems(leaf))
12995                         goto next;
12996
12997                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12998                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12999                         break;
13000
13001                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
13002                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
13003                         goto next;
13004                 if (extent_key.objectid < bg_key.objectid)
13005                         goto next;
13006
13007                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
13008                         total += nodesize;
13009                 else
13010                         total += extent_key.offset;
13011
13012                 ei = btrfs_item_ptr(leaf, path.slots[0],
13013                                     struct btrfs_extent_item);
13014                 flags = btrfs_extent_flags(leaf, ei);
13015                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
13016                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
13017                                 error(
13018                         "bad extent[%llu, %llu) type mismatch with chunk",
13019                                         extent_key.objectid,
13020                                         extent_key.objectid + extent_key.offset);
13021                                 err |= CHUNK_TYPE_MISMATCH;
13022                         }
13023                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
13024                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
13025                                     BTRFS_BLOCK_GROUP_METADATA))) {
13026                                 error(
13027                         "bad extent[%llu, %llu) type mismatch with chunk",
13028                                         extent_key.objectid,
13029                                         extent_key.objectid + nodesize);
13030                                 err |= CHUNK_TYPE_MISMATCH;
13031                         }
13032                 }
13033 next:
13034                 ret = btrfs_next_item(extent_root, &path);
13035                 if (ret)
13036                         break;
13037         }
13038
13039 out:
13040         btrfs_release_path(&path);
13041
13042         if (total != used) {
13043                 error(
13044                 "block group[%llu %llu] used %llu but extent items used %llu",
13045                         bg_key.objectid, bg_key.offset, used, total);
13046                 err |= BG_ACCOUNTING_ERROR;
13047         }
13048         return err;
13049 }
13050
13051 /*
13052  * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
13053  * FIXME: We still need to repair error of dev_item.
13054  *
13055  * Returns error after repair.
13056  */
13057 static int repair_chunk_item(struct btrfs_trans_handle *trans,
13058                              struct btrfs_root *chunk_root,
13059                              struct btrfs_path *path, int err)
13060 {
13061         struct btrfs_chunk *chunk;
13062         struct btrfs_key chunk_key;
13063         struct extent_buffer *eb = path->nodes[0];
13064         u64 length;
13065         int slot = path->slots[0];
13066         u64 type;
13067         int ret = 0;
13068
13069         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
13070         if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
13071                 return err;
13072         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
13073         type = btrfs_chunk_type(path->nodes[0], chunk);
13074         length = btrfs_chunk_length(eb, chunk);
13075
13076         if (err & REFERENCER_MISSING) {
13077                 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
13078                      type, chunk_key.objectid, chunk_key.offset, length);
13079                 if (ret) {
13080                         error("fail to add block group item[%llu %llu]",
13081                               chunk_key.offset, length);
13082                         goto out;
13083                 } else {
13084                         err &= ~REFERENCER_MISSING;
13085                         printf("Added block group item[%llu %llu]\n",
13086                                chunk_key.offset, length);
13087                 }
13088         }
13089
13090 out:
13091         return err;
13092 }
13093
13094 /*
13095  * Check a chunk item.
13096  * Including checking all referred dev_extents and block group
13097  */
13098 static int check_chunk_item(struct btrfs_fs_info *fs_info,
13099                             struct extent_buffer *eb, int slot)
13100 {
13101         struct btrfs_root *extent_root = fs_info->extent_root;
13102         struct btrfs_root *dev_root = fs_info->dev_root;
13103         struct btrfs_path path;
13104         struct btrfs_key chunk_key;
13105         struct btrfs_key bg_key;
13106         struct btrfs_key devext_key;
13107         struct btrfs_chunk *chunk;
13108         struct extent_buffer *leaf;
13109         struct btrfs_block_group_item *bi;
13110         struct btrfs_block_group_item bg_item;
13111         struct btrfs_dev_extent *ptr;
13112         u64 length;
13113         u64 chunk_end;
13114         u64 stripe_len;
13115         u64 type;
13116         int num_stripes;
13117         u64 offset;
13118         u64 objectid;
13119         int i;
13120         int ret;
13121         int err = 0;
13122
13123         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
13124         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
13125         length = btrfs_chunk_length(eb, chunk);
13126         chunk_end = chunk_key.offset + length;
13127         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
13128                                       chunk_key.offset);
13129         if (ret < 0) {
13130                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
13131                         chunk_end);
13132                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
13133                 goto out;
13134         }
13135         type = btrfs_chunk_type(eb, chunk);
13136
13137         bg_key.objectid = chunk_key.offset;
13138         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
13139         bg_key.offset = length;
13140
13141         btrfs_init_path(&path);
13142         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
13143         if (ret) {
13144                 error(
13145                 "chunk[%llu %llu) did not find the related block group item",
13146                         chunk_key.offset, chunk_end);
13147                 err |= REFERENCER_MISSING;
13148         } else{
13149                 leaf = path.nodes[0];
13150                 bi = btrfs_item_ptr(leaf, path.slots[0],
13151                                     struct btrfs_block_group_item);
13152                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
13153                                    sizeof(bg_item));
13154                 if (btrfs_block_group_flags(&bg_item) != type) {
13155                         error(
13156 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
13157                                 chunk_key.offset, chunk_end, type,
13158                                 btrfs_block_group_flags(&bg_item));
13159                         err |= REFERENCER_MISSING;
13160                 }
13161         }
13162
13163         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
13164         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
13165         for (i = 0; i < num_stripes; i++) {
13166                 btrfs_release_path(&path);
13167                 btrfs_init_path(&path);
13168                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
13169                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
13170                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
13171
13172                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
13173                                         0, 0);
13174                 if (ret)
13175                         goto not_match_dev;
13176
13177                 leaf = path.nodes[0];
13178                 ptr = btrfs_item_ptr(leaf, path.slots[0],
13179                                      struct btrfs_dev_extent);
13180                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
13181                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
13182                 if (objectid != chunk_key.objectid ||
13183                     offset != chunk_key.offset ||
13184                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
13185                         goto not_match_dev;
13186                 continue;
13187 not_match_dev:
13188                 err |= BACKREF_MISSING;
13189                 error(
13190                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
13191                         chunk_key.objectid, chunk_end, i);
13192                 continue;
13193         }
13194         btrfs_release_path(&path);
13195 out:
13196         return err;
13197 }
13198
13199 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
13200                                    struct btrfs_root *root,
13201                                    struct btrfs_path *path)
13202 {
13203         struct btrfs_key key;
13204         int ret = 0;
13205
13206         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
13207         btrfs_release_path(path);
13208         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
13209         if (ret) {
13210                 ret = -ENOENT;
13211                 goto out;
13212         }
13213
13214         ret = btrfs_del_item(trans, root, path);
13215         if (ret)
13216                 goto out;
13217
13218         if (path->slots[0] == 0)
13219                 btrfs_prev_leaf(root, path);
13220         else
13221                 path->slots[0]--;
13222 out:
13223         if (ret)
13224                 error("failed to delete root %llu item[%llu, %u, %llu]",
13225                       root->objectid, key.objectid, key.type, key.offset);
13226         else
13227                 printf("Deleted root %llu item[%llu, %u, %llu]\n",
13228                        root->objectid, key.objectid, key.type, key.offset);
13229         return ret;
13230 }
13231
13232 /*
13233  * Main entry function to check known items and update related accounting info
13234  */
13235 static int check_leaf_items(struct btrfs_trans_handle *trans,
13236                             struct btrfs_root *root, struct btrfs_path *path,
13237                             struct node_refs *nrefs, int account_bytes)
13238 {
13239         struct btrfs_fs_info *fs_info = root->fs_info;
13240         struct btrfs_key key;
13241         struct extent_buffer *eb;
13242         int slot;
13243         int type;
13244         struct btrfs_extent_data_ref *dref;
13245         int ret = 0;
13246         int err = 0;
13247
13248 again:
13249         eb = path->nodes[0];
13250         slot = path->slots[0];
13251         if (slot >= btrfs_header_nritems(eb)) {
13252                 if (slot == 0) {
13253                         error("empty leaf [%llu %u] root %llu", eb->start,
13254                                 root->fs_info->nodesize, root->objectid);
13255                         err |= EIO;
13256                 }
13257                 goto out;
13258         }
13259
13260         btrfs_item_key_to_cpu(eb, &key, slot);
13261         type = key.type;
13262
13263         switch (type) {
13264         case BTRFS_EXTENT_DATA_KEY:
13265                 ret = check_extent_data_item(root, path, nrefs, account_bytes);
13266                 if (repair && ret)
13267                         ret = repair_extent_data_item(trans, root, path, nrefs,
13268                                                       ret);
13269                 err |= ret;
13270                 break;
13271         case BTRFS_BLOCK_GROUP_ITEM_KEY:
13272                 ret = check_block_group_item(fs_info, eb, slot);
13273                 if (repair &&
13274                     ret & REFERENCER_MISSING)
13275                         ret = delete_extent_tree_item(trans, root, path);
13276                 err |= ret;
13277                 break;
13278         case BTRFS_DEV_ITEM_KEY:
13279                 ret = check_dev_item(fs_info, eb, slot);
13280                 err |= ret;
13281                 break;
13282         case BTRFS_CHUNK_ITEM_KEY:
13283                 ret = check_chunk_item(fs_info, eb, slot);
13284                 if (repair && ret)
13285                         ret = repair_chunk_item(trans, root, path, ret);
13286                 err |= ret;
13287                 break;
13288         case BTRFS_DEV_EXTENT_KEY:
13289                 ret = check_dev_extent_item(fs_info, eb, slot);
13290                 err |= ret;
13291                 break;
13292         case BTRFS_EXTENT_ITEM_KEY:
13293         case BTRFS_METADATA_ITEM_KEY:
13294                 ret = check_extent_item(trans, fs_info, path);
13295                 err |= ret;
13296                 break;
13297         case BTRFS_EXTENT_CSUM_KEY:
13298                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
13299                 err |= ret;
13300                 break;
13301         case BTRFS_TREE_BLOCK_REF_KEY:
13302                 ret = check_tree_block_backref(fs_info, key.offset,
13303                                                key.objectid, -1);
13304                 if (repair &&
13305                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13306                         ret = delete_extent_tree_item(trans, root, path);
13307                 err |= ret;
13308                 break;
13309         case BTRFS_EXTENT_DATA_REF_KEY:
13310                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
13311                 ret = check_extent_data_backref(fs_info,
13312                                 btrfs_extent_data_ref_root(eb, dref),
13313                                 btrfs_extent_data_ref_objectid(eb, dref),
13314                                 btrfs_extent_data_ref_offset(eb, dref),
13315                                 key.objectid, 0,
13316                                 btrfs_extent_data_ref_count(eb, dref));
13317                 if (repair &&
13318                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13319                         ret = delete_extent_tree_item(trans, root, path);
13320                 err |= ret;
13321                 break;
13322         case BTRFS_SHARED_BLOCK_REF_KEY:
13323                 ret = check_shared_block_backref(fs_info, key.offset,
13324                                                  key.objectid, -1);
13325                 if (repair &&
13326                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13327                         ret = delete_extent_tree_item(trans, root, path);
13328                 err |= ret;
13329                 break;
13330         case BTRFS_SHARED_DATA_REF_KEY:
13331                 ret = check_shared_data_backref(fs_info, key.offset,
13332                                                 key.objectid);
13333                 if (repair &&
13334                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13335                         ret = delete_extent_tree_item(trans, root, path);
13336                 err |= ret;
13337                 break;
13338         default:
13339                 break;
13340         }
13341
13342         ++path->slots[0];
13343         goto again;
13344 out:
13345         return err;
13346 }
13347
13348 /*
13349  * Low memory usage version check_chunks_and_extents.
13350  */
13351 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
13352 {
13353         struct btrfs_trans_handle *trans = NULL;
13354         struct btrfs_path path;
13355         struct btrfs_key old_key;
13356         struct btrfs_key key;
13357         struct btrfs_root *root1;
13358         struct btrfs_root *root;
13359         struct btrfs_root *cur_root;
13360         int err = 0;
13361         int ret;
13362
13363         root = fs_info->fs_root;
13364
13365         if (repair) {
13366                 trans = btrfs_start_transaction(fs_info->extent_root, 1);
13367                 if (IS_ERR(trans)) {
13368                         error("failed to start transaction before check");
13369                         return PTR_ERR(trans);
13370                 }
13371         }
13372
13373         root1 = root->fs_info->chunk_root;
13374         ret = check_btrfs_root(trans, root1, 0, 1);
13375         err |= ret;
13376
13377         root1 = root->fs_info->tree_root;
13378         ret = check_btrfs_root(trans, root1, 0, 1);
13379         err |= ret;
13380
13381         btrfs_init_path(&path);
13382         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
13383         key.offset = 0;
13384         key.type = BTRFS_ROOT_ITEM_KEY;
13385
13386         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
13387         if (ret) {
13388                 error("cannot find extent tree in tree_root");
13389                 goto out;
13390         }
13391
13392         while (1) {
13393                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13394                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13395                         goto next;
13396                 old_key = key;
13397                 key.offset = (u64)-1;
13398
13399                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13400                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13401                                         &key);
13402                 else
13403                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
13404                 if (IS_ERR(cur_root) || !cur_root) {
13405                         error("failed to read tree: %lld", key.objectid);
13406                         goto next;
13407                 }
13408
13409                 ret = check_btrfs_root(trans, cur_root, 0, 1);
13410                 err |= ret;
13411
13412                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13413                         btrfs_free_fs_root(cur_root);
13414
13415                 btrfs_release_path(&path);
13416                 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13417                                         &old_key, &path, 0, 0);
13418                 if (ret)
13419                         goto out;
13420 next:
13421                 ret = btrfs_next_item(root1, &path);
13422                 if (ret)
13423                         goto out;
13424         }
13425 out:
13426
13427         /* if repair, update block accounting */
13428         if (repair) {
13429                 ret = btrfs_fix_block_accounting(trans, root);
13430                 if (ret)
13431                         err |= ret;
13432                 else
13433                         err &= ~BG_ACCOUNTING_ERROR;
13434         }
13435
13436         if (trans)
13437                 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13438
13439         btrfs_release_path(&path);
13440
13441         return err;
13442 }
13443
13444 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13445 {
13446         int ret;
13447
13448         if (!ctx.progress_enabled)
13449                 fprintf(stderr, "checking extents\n");
13450         if (check_mode == CHECK_MODE_LOWMEM)
13451                 ret = check_chunks_and_extents_v2(fs_info);
13452         else
13453                 ret = check_chunks_and_extents(fs_info);
13454
13455         /* Also repair device size related problems */
13456         if (repair && !ret) {
13457                 ret = btrfs_fix_device_and_super_size(fs_info);
13458                 if (ret > 0)
13459                         ret = 0;
13460         }
13461         return ret;
13462 }
13463
13464 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13465                            struct btrfs_root *root, int overwrite)
13466 {
13467         struct extent_buffer *c;
13468         struct extent_buffer *old = root->node;
13469         int level;
13470         int ret;
13471         struct btrfs_disk_key disk_key = {0,0,0};
13472
13473         level = 0;
13474
13475         if (overwrite) {
13476                 c = old;
13477                 extent_buffer_get(c);
13478                 goto init;
13479         }
13480         c = btrfs_alloc_free_block(trans, root,
13481                                    root->fs_info->nodesize,
13482                                    root->root_key.objectid,
13483                                    &disk_key, level, 0, 0);
13484         if (IS_ERR(c)) {
13485                 c = old;
13486                 extent_buffer_get(c);
13487                 overwrite = 1;
13488         }
13489 init:
13490         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13491         btrfs_set_header_level(c, level);
13492         btrfs_set_header_bytenr(c, c->start);
13493         btrfs_set_header_generation(c, trans->transid);
13494         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13495         btrfs_set_header_owner(c, root->root_key.objectid);
13496
13497         write_extent_buffer(c, root->fs_info->fsid,
13498                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
13499
13500         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13501                             btrfs_header_chunk_tree_uuid(c),
13502                             BTRFS_UUID_SIZE);
13503
13504         btrfs_mark_buffer_dirty(c);
13505         /*
13506          * this case can happen in the following case:
13507          *
13508          * 1.overwrite previous root.
13509          *
13510          * 2.reinit reloc data root, this is because we skip pin
13511          * down reloc data tree before which means we can allocate
13512          * same block bytenr here.
13513          */
13514         if (old->start == c->start) {
13515                 btrfs_set_root_generation(&root->root_item,
13516                                           trans->transid);
13517                 root->root_item.level = btrfs_header_level(root->node);
13518                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13519                                         &root->root_key, &root->root_item);
13520                 if (ret) {
13521                         free_extent_buffer(c);
13522                         return ret;
13523                 }
13524         }
13525         free_extent_buffer(old);
13526         root->node = c;
13527         add_root_to_dirty_list(root);
13528         return 0;
13529 }
13530
13531 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13532                                 struct extent_buffer *eb, int tree_root)
13533 {
13534         struct extent_buffer *tmp;
13535         struct btrfs_root_item *ri;
13536         struct btrfs_key key;
13537         u64 bytenr;
13538         int level = btrfs_header_level(eb);
13539         int nritems;
13540         int ret;
13541         int i;
13542
13543         /*
13544          * If we have pinned this block before, don't pin it again.
13545          * This can not only avoid forever loop with broken filesystem
13546          * but also give us some speedups.
13547          */
13548         if (test_range_bit(&fs_info->pinned_extents, eb->start,
13549                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13550                 return 0;
13551
13552         btrfs_pin_extent(fs_info, eb->start, eb->len);
13553
13554         nritems = btrfs_header_nritems(eb);
13555         for (i = 0; i < nritems; i++) {
13556                 if (level == 0) {
13557                         btrfs_item_key_to_cpu(eb, &key, i);
13558                         if (key.type != BTRFS_ROOT_ITEM_KEY)
13559                                 continue;
13560                         /* Skip the extent root and reloc roots */
13561                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13562                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13563                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13564                                 continue;
13565                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13566                         bytenr = btrfs_disk_root_bytenr(eb, ri);
13567
13568                         /*
13569                          * If at any point we start needing the real root we
13570                          * will have to build a stump root for the root we are
13571                          * in, but for now this doesn't actually use the root so
13572                          * just pass in extent_root.
13573                          */
13574                         tmp = read_tree_block(fs_info, bytenr, 0);
13575                         if (!extent_buffer_uptodate(tmp)) {
13576                                 fprintf(stderr, "Error reading root block\n");
13577                                 return -EIO;
13578                         }
13579                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
13580                         free_extent_buffer(tmp);
13581                         if (ret)
13582                                 return ret;
13583                 } else {
13584                         bytenr = btrfs_node_blockptr(eb, i);
13585
13586                         /* If we aren't the tree root don't read the block */
13587                         if (level == 1 && !tree_root) {
13588                                 btrfs_pin_extent(fs_info, bytenr,
13589                                                 fs_info->nodesize);
13590                                 continue;
13591                         }
13592
13593                         tmp = read_tree_block(fs_info, bytenr, 0);
13594                         if (!extent_buffer_uptodate(tmp)) {
13595                                 fprintf(stderr, "Error reading tree block\n");
13596                                 return -EIO;
13597                         }
13598                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13599                         free_extent_buffer(tmp);
13600                         if (ret)
13601                                 return ret;
13602                 }
13603         }
13604
13605         return 0;
13606 }
13607
13608 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13609 {
13610         int ret;
13611
13612         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13613         if (ret)
13614                 return ret;
13615
13616         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13617 }
13618
13619 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13620 {
13621         struct btrfs_block_group_cache *cache;
13622         struct btrfs_path path;
13623         struct extent_buffer *leaf;
13624         struct btrfs_chunk *chunk;
13625         struct btrfs_key key;
13626         int ret;
13627         u64 start;
13628
13629         btrfs_init_path(&path);
13630         key.objectid = 0;
13631         key.type = BTRFS_CHUNK_ITEM_KEY;
13632         key.offset = 0;
13633         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13634         if (ret < 0) {
13635                 btrfs_release_path(&path);
13636                 return ret;
13637         }
13638
13639         /*
13640          * We do this in case the block groups were screwed up and had alloc
13641          * bits that aren't actually set on the chunks.  This happens with
13642          * restored images every time and could happen in real life I guess.
13643          */
13644         fs_info->avail_data_alloc_bits = 0;
13645         fs_info->avail_metadata_alloc_bits = 0;
13646         fs_info->avail_system_alloc_bits = 0;
13647
13648         /* First we need to create the in-memory block groups */
13649         while (1) {
13650                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13651                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13652                         if (ret < 0) {
13653                                 btrfs_release_path(&path);
13654                                 return ret;
13655                         }
13656                         if (ret) {
13657                                 ret = 0;
13658                                 break;
13659                         }
13660                 }
13661                 leaf = path.nodes[0];
13662                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13663                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13664                         path.slots[0]++;
13665                         continue;
13666                 }
13667
13668                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13669                 btrfs_add_block_group(fs_info, 0,
13670                                       btrfs_chunk_type(leaf, chunk),
13671                                       key.objectid, key.offset,
13672                                       btrfs_chunk_length(leaf, chunk));
13673                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13674                                  key.offset + btrfs_chunk_length(leaf, chunk));
13675                 path.slots[0]++;
13676         }
13677         start = 0;
13678         while (1) {
13679                 cache = btrfs_lookup_first_block_group(fs_info, start);
13680                 if (!cache)
13681                         break;
13682                 cache->cached = 1;
13683                 start = cache->key.objectid + cache->key.offset;
13684         }
13685
13686         btrfs_release_path(&path);
13687         return 0;
13688 }
13689
13690 static int reset_balance(struct btrfs_trans_handle *trans,
13691                          struct btrfs_fs_info *fs_info)
13692 {
13693         struct btrfs_root *root = fs_info->tree_root;
13694         struct btrfs_path path;
13695         struct extent_buffer *leaf;
13696         struct btrfs_key key;
13697         int del_slot, del_nr = 0;
13698         int ret;
13699         int found = 0;
13700
13701         btrfs_init_path(&path);
13702         key.objectid = BTRFS_BALANCE_OBJECTID;
13703         key.type = BTRFS_BALANCE_ITEM_KEY;
13704         key.offset = 0;
13705         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13706         if (ret) {
13707                 if (ret > 0)
13708                         ret = 0;
13709                 if (!ret)
13710                         goto reinit_data_reloc;
13711                 else
13712                         goto out;
13713         }
13714
13715         ret = btrfs_del_item(trans, root, &path);
13716         if (ret)
13717                 goto out;
13718         btrfs_release_path(&path);
13719
13720         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13721         key.type = BTRFS_ROOT_ITEM_KEY;
13722         key.offset = 0;
13723         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13724         if (ret < 0)
13725                 goto out;
13726         while (1) {
13727                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13728                         if (!found)
13729                                 break;
13730
13731                         if (del_nr) {
13732                                 ret = btrfs_del_items(trans, root, &path,
13733                                                       del_slot, del_nr);
13734                                 del_nr = 0;
13735                                 if (ret)
13736                                         goto out;
13737                         }
13738                         key.offset++;
13739                         btrfs_release_path(&path);
13740
13741                         found = 0;
13742                         ret = btrfs_search_slot(trans, root, &key, &path,
13743                                                 -1, 1);
13744                         if (ret < 0)
13745                                 goto out;
13746                         continue;
13747                 }
13748                 found = 1;
13749                 leaf = path.nodes[0];
13750                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13751                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13752                         break;
13753                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13754                         path.slots[0]++;
13755                         continue;
13756                 }
13757                 if (!del_nr) {
13758                         del_slot = path.slots[0];
13759                         del_nr = 1;
13760                 } else {
13761                         del_nr++;
13762                 }
13763                 path.slots[0]++;
13764         }
13765
13766         if (del_nr) {
13767                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13768                 if (ret)
13769                         goto out;
13770         }
13771         btrfs_release_path(&path);
13772
13773 reinit_data_reloc:
13774         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13775         key.type = BTRFS_ROOT_ITEM_KEY;
13776         key.offset = (u64)-1;
13777         root = btrfs_read_fs_root(fs_info, &key);
13778         if (IS_ERR(root)) {
13779                 fprintf(stderr, "Error reading data reloc tree\n");
13780                 ret = PTR_ERR(root);
13781                 goto out;
13782         }
13783         record_root_in_trans(trans, root);
13784         ret = btrfs_fsck_reinit_root(trans, root, 0);
13785         if (ret)
13786                 goto out;
13787         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13788 out:
13789         btrfs_release_path(&path);
13790         return ret;
13791 }
13792
13793 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13794                               struct btrfs_fs_info *fs_info)
13795 {
13796         u64 start = 0;
13797         int ret;
13798
13799         /*
13800          * The only reason we don't do this is because right now we're just
13801          * walking the trees we find and pinning down their bytes, we don't look
13802          * at any of the leaves.  In order to do mixed groups we'd have to check
13803          * the leaves of any fs roots and pin down the bytes for any file
13804          * extents we find.  Not hard but why do it if we don't have to?
13805          */
13806         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13807                 fprintf(stderr, "We don't support re-initing the extent tree "
13808                         "for mixed block groups yet, please notify a btrfs "
13809                         "developer you want to do this so they can add this "
13810                         "functionality.\n");
13811                 return -EINVAL;
13812         }
13813
13814         /*
13815          * first we need to walk all of the trees except the extent tree and pin
13816          * down the bytes that are in use so we don't overwrite any existing
13817          * metadata.
13818          */
13819         ret = pin_metadata_blocks(fs_info);
13820         if (ret) {
13821                 fprintf(stderr, "error pinning down used bytes\n");
13822                 return ret;
13823         }
13824
13825         /*
13826          * Need to drop all the block groups since we're going to recreate all
13827          * of them again.
13828          */
13829         btrfs_free_block_groups(fs_info);
13830         ret = reset_block_groups(fs_info);
13831         if (ret) {
13832                 fprintf(stderr, "error resetting the block groups\n");
13833                 return ret;
13834         }
13835
13836         /* Ok we can allocate now, reinit the extent root */
13837         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13838         if (ret) {
13839                 fprintf(stderr, "extent root initialization failed\n");
13840                 /*
13841                  * When the transaction code is updated we should end the
13842                  * transaction, but for now progs only knows about commit so
13843                  * just return an error.
13844                  */
13845                 return ret;
13846         }
13847
13848         /*
13849          * Now we have all the in-memory block groups setup so we can make
13850          * allocations properly, and the metadata we care about is safe since we
13851          * pinned all of it above.
13852          */
13853         while (1) {
13854                 struct btrfs_block_group_cache *cache;
13855
13856                 cache = btrfs_lookup_first_block_group(fs_info, start);
13857                 if (!cache)
13858                         break;
13859                 start = cache->key.objectid + cache->key.offset;
13860                 ret = btrfs_insert_item(trans, fs_info->extent_root,
13861                                         &cache->key, &cache->item,
13862                                         sizeof(cache->item));
13863                 if (ret) {
13864                         fprintf(stderr, "Error adding block group\n");
13865                         return ret;
13866                 }
13867                 btrfs_extent_post_op(trans, fs_info->extent_root);
13868         }
13869
13870         ret = reset_balance(trans, fs_info);
13871         if (ret)
13872                 fprintf(stderr, "error resetting the pending balance\n");
13873
13874         return ret;
13875 }
13876
13877 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13878 {
13879         struct btrfs_path path;
13880         struct btrfs_trans_handle *trans;
13881         struct btrfs_key key;
13882         int ret;
13883
13884         printf("Recowing metadata block %llu\n", eb->start);
13885         key.objectid = btrfs_header_owner(eb);
13886         key.type = BTRFS_ROOT_ITEM_KEY;
13887         key.offset = (u64)-1;
13888
13889         root = btrfs_read_fs_root(root->fs_info, &key);
13890         if (IS_ERR(root)) {
13891                 fprintf(stderr, "Couldn't find owner root %llu\n",
13892                         key.objectid);
13893                 return PTR_ERR(root);
13894         }
13895
13896         trans = btrfs_start_transaction(root, 1);
13897         if (IS_ERR(trans))
13898                 return PTR_ERR(trans);
13899
13900         btrfs_init_path(&path);
13901         path.lowest_level = btrfs_header_level(eb);
13902         if (path.lowest_level)
13903                 btrfs_node_key_to_cpu(eb, &key, 0);
13904         else
13905                 btrfs_item_key_to_cpu(eb, &key, 0);
13906
13907         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13908         btrfs_commit_transaction(trans, root);
13909         btrfs_release_path(&path);
13910         return ret;
13911 }
13912
13913 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13914 {
13915         struct btrfs_path path;
13916         struct btrfs_trans_handle *trans;
13917         struct btrfs_key key;
13918         int ret;
13919
13920         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13921                bad->key.type, bad->key.offset);
13922         key.objectid = bad->root_id;
13923         key.type = BTRFS_ROOT_ITEM_KEY;
13924         key.offset = (u64)-1;
13925
13926         root = btrfs_read_fs_root(root->fs_info, &key);
13927         if (IS_ERR(root)) {
13928                 fprintf(stderr, "Couldn't find owner root %llu\n",
13929                         key.objectid);
13930                 return PTR_ERR(root);
13931         }
13932
13933         trans = btrfs_start_transaction(root, 1);
13934         if (IS_ERR(trans))
13935                 return PTR_ERR(trans);
13936
13937         btrfs_init_path(&path);
13938         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13939         if (ret) {
13940                 if (ret > 0)
13941                         ret = 0;
13942                 goto out;
13943         }
13944         ret = btrfs_del_item(trans, root, &path);
13945 out:
13946         btrfs_commit_transaction(trans, root);
13947         btrfs_release_path(&path);
13948         return ret;
13949 }
13950
13951 static int zero_log_tree(struct btrfs_root *root)
13952 {
13953         struct btrfs_trans_handle *trans;
13954         int ret;
13955
13956         trans = btrfs_start_transaction(root, 1);
13957         if (IS_ERR(trans)) {
13958                 ret = PTR_ERR(trans);
13959                 return ret;
13960         }
13961         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13962         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13963         ret = btrfs_commit_transaction(trans, root);
13964         return ret;
13965 }
13966
13967 static int populate_csum(struct btrfs_trans_handle *trans,
13968                          struct btrfs_root *csum_root, char *buf, u64 start,
13969                          u64 len)
13970 {
13971         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13972         u64 offset = 0;
13973         u64 sectorsize;
13974         int ret = 0;
13975
13976         while (offset < len) {
13977                 sectorsize = fs_info->sectorsize;
13978                 ret = read_extent_data(fs_info, buf, start + offset,
13979                                        &sectorsize, 0);
13980                 if (ret)
13981                         break;
13982                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13983                                             start + offset, buf, sectorsize);
13984                 if (ret)
13985                         break;
13986                 offset += sectorsize;
13987         }
13988         return ret;
13989 }
13990
13991 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13992                                       struct btrfs_root *csum_root,
13993                                       struct btrfs_root *cur_root)
13994 {
13995         struct btrfs_path path;
13996         struct btrfs_key key;
13997         struct extent_buffer *node;
13998         struct btrfs_file_extent_item *fi;
13999         char *buf = NULL;
14000         u64 start = 0;
14001         u64 len = 0;
14002         int slot = 0;
14003         int ret = 0;
14004
14005         buf = malloc(cur_root->fs_info->sectorsize);
14006         if (!buf)
14007                 return -ENOMEM;
14008
14009         btrfs_init_path(&path);
14010         key.objectid = 0;
14011         key.offset = 0;
14012         key.type = 0;
14013         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
14014         if (ret < 0)
14015                 goto out;
14016         /* Iterate all regular file extents and fill its csum */
14017         while (1) {
14018                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
14019
14020                 if (key.type != BTRFS_EXTENT_DATA_KEY)
14021                         goto next;
14022                 node = path.nodes[0];
14023                 slot = path.slots[0];
14024                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
14025                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
14026                         goto next;
14027                 start = btrfs_file_extent_disk_bytenr(node, fi);
14028                 len = btrfs_file_extent_disk_num_bytes(node, fi);
14029
14030                 ret = populate_csum(trans, csum_root, buf, start, len);
14031                 if (ret == -EEXIST)
14032                         ret = 0;
14033                 if (ret < 0)
14034                         goto out;
14035 next:
14036                 /*
14037                  * TODO: if next leaf is corrupted, jump to nearest next valid
14038                  * leaf.
14039                  */
14040                 ret = btrfs_next_item(cur_root, &path);
14041                 if (ret < 0)
14042                         goto out;
14043                 if (ret > 0) {
14044                         ret = 0;
14045                         goto out;
14046                 }
14047         }
14048
14049 out:
14050         btrfs_release_path(&path);
14051         free(buf);
14052         return ret;
14053 }
14054
14055 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
14056                                   struct btrfs_root *csum_root)
14057 {
14058         struct btrfs_fs_info *fs_info = csum_root->fs_info;
14059         struct btrfs_path path;
14060         struct btrfs_root *tree_root = fs_info->tree_root;
14061         struct btrfs_root *cur_root;
14062         struct extent_buffer *node;
14063         struct btrfs_key key;
14064         int slot = 0;
14065         int ret = 0;
14066
14067         btrfs_init_path(&path);
14068         key.objectid = BTRFS_FS_TREE_OBJECTID;
14069         key.offset = 0;
14070         key.type = BTRFS_ROOT_ITEM_KEY;
14071         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
14072         if (ret < 0)
14073                 goto out;
14074         if (ret > 0) {
14075                 ret = -ENOENT;
14076                 goto out;
14077         }
14078
14079         while (1) {
14080                 node = path.nodes[0];
14081                 slot = path.slots[0];
14082                 btrfs_item_key_to_cpu(node, &key, slot);
14083                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
14084                         goto out;
14085                 if (key.type != BTRFS_ROOT_ITEM_KEY)
14086                         goto next;
14087                 if (!is_fstree(key.objectid))
14088                         goto next;
14089                 key.offset = (u64)-1;
14090
14091                 cur_root = btrfs_read_fs_root(fs_info, &key);
14092                 if (IS_ERR(cur_root) || !cur_root) {
14093                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
14094                                 key.objectid);
14095                         goto out;
14096                 }
14097                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
14098                                 cur_root);
14099                 if (ret < 0)
14100                         goto out;
14101 next:
14102                 ret = btrfs_next_item(tree_root, &path);
14103                 if (ret > 0) {
14104                         ret = 0;
14105                         goto out;
14106                 }
14107                 if (ret < 0)
14108                         goto out;
14109         }
14110
14111 out:
14112         btrfs_release_path(&path);
14113         return ret;
14114 }
14115
14116 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
14117                                       struct btrfs_root *csum_root)
14118 {
14119         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
14120         struct btrfs_path path;
14121         struct btrfs_extent_item *ei;
14122         struct extent_buffer *leaf;
14123         char *buf;
14124         struct btrfs_key key;
14125         int ret;
14126
14127         btrfs_init_path(&path);
14128         key.objectid = 0;
14129         key.type = BTRFS_EXTENT_ITEM_KEY;
14130         key.offset = 0;
14131         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
14132         if (ret < 0) {
14133                 btrfs_release_path(&path);
14134                 return ret;
14135         }
14136
14137         buf = malloc(csum_root->fs_info->sectorsize);
14138         if (!buf) {
14139                 btrfs_release_path(&path);
14140                 return -ENOMEM;
14141         }
14142
14143         while (1) {
14144                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
14145                         ret = btrfs_next_leaf(extent_root, &path);
14146                         if (ret < 0)
14147                                 break;
14148                         if (ret) {
14149                                 ret = 0;
14150                                 break;
14151                         }
14152                 }
14153                 leaf = path.nodes[0];
14154
14155                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
14156                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
14157                         path.slots[0]++;
14158                         continue;
14159                 }
14160
14161                 ei = btrfs_item_ptr(leaf, path.slots[0],
14162                                     struct btrfs_extent_item);
14163                 if (!(btrfs_extent_flags(leaf, ei) &
14164                       BTRFS_EXTENT_FLAG_DATA)) {
14165                         path.slots[0]++;
14166                         continue;
14167                 }
14168
14169                 ret = populate_csum(trans, csum_root, buf, key.objectid,
14170                                     key.offset);
14171                 if (ret)
14172                         break;
14173                 path.slots[0]++;
14174         }
14175
14176         btrfs_release_path(&path);
14177         free(buf);
14178         return ret;
14179 }
14180
14181 /*
14182  * Recalculate the csum and put it into the csum tree.
14183  *
14184  * Extent tree init will wipe out all the extent info, so in that case, we
14185  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
14186  * will use fs/subvol trees to init the csum tree.
14187  */
14188 static int fill_csum_tree(struct btrfs_trans_handle *trans,
14189                           struct btrfs_root *csum_root,
14190                           int search_fs_tree)
14191 {
14192         if (search_fs_tree)
14193                 return fill_csum_tree_from_fs(trans, csum_root);
14194         else
14195                 return fill_csum_tree_from_extent(trans, csum_root);
14196 }
14197
14198 static void free_roots_info_cache(void)
14199 {
14200         if (!roots_info_cache)
14201                 return;
14202
14203         while (!cache_tree_empty(roots_info_cache)) {
14204                 struct cache_extent *entry;
14205                 struct root_item_info *rii;
14206
14207                 entry = first_cache_extent(roots_info_cache);
14208                 if (!entry)
14209                         break;
14210                 remove_cache_extent(roots_info_cache, entry);
14211                 rii = container_of(entry, struct root_item_info, cache_extent);
14212                 free(rii);
14213         }
14214
14215         free(roots_info_cache);
14216         roots_info_cache = NULL;
14217 }
14218
14219 static int build_roots_info_cache(struct btrfs_fs_info *info)
14220 {
14221         int ret = 0;
14222         struct btrfs_key key;
14223         struct extent_buffer *leaf;
14224         struct btrfs_path path;
14225
14226         if (!roots_info_cache) {
14227                 roots_info_cache = malloc(sizeof(*roots_info_cache));
14228                 if (!roots_info_cache)
14229                         return -ENOMEM;
14230                 cache_tree_init(roots_info_cache);
14231         }
14232
14233         btrfs_init_path(&path);
14234         key.objectid = 0;
14235         key.type = BTRFS_EXTENT_ITEM_KEY;
14236         key.offset = 0;
14237         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
14238         if (ret < 0)
14239                 goto out;
14240         leaf = path.nodes[0];
14241
14242         while (1) {
14243                 struct btrfs_key found_key;
14244                 struct btrfs_extent_item *ei;
14245                 struct btrfs_extent_inline_ref *iref;
14246                 int slot = path.slots[0];
14247                 int type;
14248                 u64 flags;
14249                 u64 root_id;
14250                 u8 level;
14251                 struct cache_extent *entry;
14252                 struct root_item_info *rii;
14253
14254                 if (slot >= btrfs_header_nritems(leaf)) {
14255                         ret = btrfs_next_leaf(info->extent_root, &path);
14256                         if (ret < 0) {
14257                                 break;
14258                         } else if (ret) {
14259                                 ret = 0;
14260                                 break;
14261                         }
14262                         leaf = path.nodes[0];
14263                         slot = path.slots[0];
14264                 }
14265
14266                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14267
14268                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
14269                     found_key.type != BTRFS_METADATA_ITEM_KEY)
14270                         goto next;
14271
14272                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
14273                 flags = btrfs_extent_flags(leaf, ei);
14274
14275                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
14276                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
14277                         goto next;
14278
14279                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
14280                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
14281                         level = found_key.offset;
14282                 } else {
14283                         struct btrfs_tree_block_info *binfo;
14284
14285                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
14286                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
14287                         level = btrfs_tree_block_level(leaf, binfo);
14288                 }
14289
14290                 /*
14291                  * For a root extent, it must be of the following type and the
14292                  * first (and only one) iref in the item.
14293                  */
14294                 type = btrfs_extent_inline_ref_type(leaf, iref);
14295                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
14296                         goto next;
14297
14298                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
14299                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14300                 if (!entry) {
14301                         rii = malloc(sizeof(struct root_item_info));
14302                         if (!rii) {
14303                                 ret = -ENOMEM;
14304                                 goto out;
14305                         }
14306                         rii->cache_extent.start = root_id;
14307                         rii->cache_extent.size = 1;
14308                         rii->level = (u8)-1;
14309                         entry = &rii->cache_extent;
14310                         ret = insert_cache_extent(roots_info_cache, entry);
14311                         ASSERT(ret == 0);
14312                 } else {
14313                         rii = container_of(entry, struct root_item_info,
14314                                            cache_extent);
14315                 }
14316
14317                 ASSERT(rii->cache_extent.start == root_id);
14318                 ASSERT(rii->cache_extent.size == 1);
14319
14320                 if (level > rii->level || rii->level == (u8)-1) {
14321                         rii->level = level;
14322                         rii->bytenr = found_key.objectid;
14323                         rii->gen = btrfs_extent_generation(leaf, ei);
14324                         rii->node_count = 1;
14325                 } else if (level == rii->level) {
14326                         rii->node_count++;
14327                 }
14328 next:
14329                 path.slots[0]++;
14330         }
14331
14332 out:
14333         btrfs_release_path(&path);
14334
14335         return ret;
14336 }
14337
14338 static int maybe_repair_root_item(struct btrfs_path *path,
14339                                   const struct btrfs_key *root_key,
14340                                   const int read_only_mode)
14341 {
14342         const u64 root_id = root_key->objectid;
14343         struct cache_extent *entry;
14344         struct root_item_info *rii;
14345         struct btrfs_root_item ri;
14346         unsigned long offset;
14347
14348         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14349         if (!entry) {
14350                 fprintf(stderr,
14351                         "Error: could not find extent items for root %llu\n",
14352                         root_key->objectid);
14353                 return -ENOENT;
14354         }
14355
14356         rii = container_of(entry, struct root_item_info, cache_extent);
14357         ASSERT(rii->cache_extent.start == root_id);
14358         ASSERT(rii->cache_extent.size == 1);
14359
14360         if (rii->node_count != 1) {
14361                 fprintf(stderr,
14362                         "Error: could not find btree root extent for root %llu\n",
14363                         root_id);
14364                 return -ENOENT;
14365         }
14366
14367         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
14368         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
14369
14370         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
14371             btrfs_root_level(&ri) != rii->level ||
14372             btrfs_root_generation(&ri) != rii->gen) {
14373
14374                 /*
14375                  * If we're in repair mode but our caller told us to not update
14376                  * the root item, i.e. just check if it needs to be updated, don't
14377                  * print this message, since the caller will call us again shortly
14378                  * for the same root item without read only mode (the caller will
14379                  * open a transaction first).
14380                  */
14381                 if (!(read_only_mode && repair))
14382                         fprintf(stderr,
14383                                 "%sroot item for root %llu,"
14384                                 " current bytenr %llu, current gen %llu, current level %u,"
14385                                 " new bytenr %llu, new gen %llu, new level %u\n",
14386                                 (read_only_mode ? "" : "fixing "),
14387                                 root_id,
14388                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
14389                                 btrfs_root_level(&ri),
14390                                 rii->bytenr, rii->gen, rii->level);
14391
14392                 if (btrfs_root_generation(&ri) > rii->gen) {
14393                         fprintf(stderr,
14394                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
14395                                 root_id, btrfs_root_generation(&ri), rii->gen);
14396                         return -EINVAL;
14397                 }
14398
14399                 if (!read_only_mode) {
14400                         btrfs_set_root_bytenr(&ri, rii->bytenr);
14401                         btrfs_set_root_level(&ri, rii->level);
14402                         btrfs_set_root_generation(&ri, rii->gen);
14403                         write_extent_buffer(path->nodes[0], &ri,
14404                                             offset, sizeof(ri));
14405                 }
14406
14407                 return 1;
14408         }
14409
14410         return 0;
14411 }
14412
14413 /*
14414  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14415  * caused read-only snapshots to be corrupted if they were created at a moment
14416  * when the source subvolume/snapshot had orphan items. The issue was that the
14417  * on-disk root items became incorrect, referring to the pre orphan cleanup root
14418  * node instead of the post orphan cleanup root node.
14419  * So this function, and its callees, just detects and fixes those cases. Even
14420  * though the regression was for read-only snapshots, this function applies to
14421  * any snapshot/subvolume root.
14422  * This must be run before any other repair code - not doing it so, makes other
14423  * repair code delete or modify backrefs in the extent tree for example, which
14424  * will result in an inconsistent fs after repairing the root items.
14425  */
14426 static int repair_root_items(struct btrfs_fs_info *info)
14427 {
14428         struct btrfs_path path;
14429         struct btrfs_key key;
14430         struct extent_buffer *leaf;
14431         struct btrfs_trans_handle *trans = NULL;
14432         int ret = 0;
14433         int bad_roots = 0;
14434         int need_trans = 0;
14435
14436         btrfs_init_path(&path);
14437
14438         ret = build_roots_info_cache(info);
14439         if (ret)
14440                 goto out;
14441
14442         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14443         key.type = BTRFS_ROOT_ITEM_KEY;
14444         key.offset = 0;
14445
14446 again:
14447         /*
14448          * Avoid opening and committing transactions if a leaf doesn't have
14449          * any root items that need to be fixed, so that we avoid rotating
14450          * backup roots unnecessarily.
14451          */
14452         if (need_trans) {
14453                 trans = btrfs_start_transaction(info->tree_root, 1);
14454                 if (IS_ERR(trans)) {
14455                         ret = PTR_ERR(trans);
14456                         goto out;
14457                 }
14458         }
14459
14460         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14461                                 0, trans ? 1 : 0);
14462         if (ret < 0)
14463                 goto out;
14464         leaf = path.nodes[0];
14465
14466         while (1) {
14467                 struct btrfs_key found_key;
14468
14469                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14470                         int no_more_keys = find_next_key(&path, &key);
14471
14472                         btrfs_release_path(&path);
14473                         if (trans) {
14474                                 ret = btrfs_commit_transaction(trans,
14475                                                                info->tree_root);
14476                                 trans = NULL;
14477                                 if (ret < 0)
14478                                         goto out;
14479                         }
14480                         need_trans = 0;
14481                         if (no_more_keys)
14482                                 break;
14483                         goto again;
14484                 }
14485
14486                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14487
14488                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14489                         goto next;
14490                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14491                         goto next;
14492
14493                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14494                 if (ret < 0)
14495                         goto out;
14496                 if (ret) {
14497                         if (!trans && repair) {
14498                                 need_trans = 1;
14499                                 key = found_key;
14500                                 btrfs_release_path(&path);
14501                                 goto again;
14502                         }
14503                         bad_roots++;
14504                 }
14505 next:
14506                 path.slots[0]++;
14507         }
14508         ret = 0;
14509 out:
14510         free_roots_info_cache();
14511         btrfs_release_path(&path);
14512         if (trans)
14513                 btrfs_commit_transaction(trans, info->tree_root);
14514         if (ret < 0)
14515                 return ret;
14516
14517         return bad_roots;
14518 }
14519
14520 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14521 {
14522         struct btrfs_trans_handle *trans;
14523         struct btrfs_block_group_cache *bg_cache;
14524         u64 current = 0;
14525         int ret = 0;
14526
14527         /* Clear all free space cache inodes and its extent data */
14528         while (1) {
14529                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14530                 if (!bg_cache)
14531                         break;
14532                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14533                 if (ret < 0)
14534                         return ret;
14535                 current = bg_cache->key.objectid + bg_cache->key.offset;
14536         }
14537
14538         /* Don't forget to set cache_generation to -1 */
14539         trans = btrfs_start_transaction(fs_info->tree_root, 0);
14540         if (IS_ERR(trans)) {
14541                 error("failed to update super block cache generation");
14542                 return PTR_ERR(trans);
14543         }
14544         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14545         btrfs_commit_transaction(trans, fs_info->tree_root);
14546
14547         return ret;
14548 }
14549
14550 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14551                 int clear_version)
14552 {
14553         int ret = 0;
14554
14555         if (clear_version == 1) {
14556                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14557                         error(
14558                 "free space cache v2 detected, use --clear-space-cache v2");
14559                         ret = 1;
14560                         goto close_out;
14561                 }
14562                 printf("Clearing free space cache\n");
14563                 ret = clear_free_space_cache(fs_info);
14564                 if (ret) {
14565                         error("failed to clear free space cache");
14566                         ret = 1;
14567                 } else {
14568                         printf("Free space cache cleared\n");
14569                 }
14570         } else if (clear_version == 2) {
14571                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14572                         printf("no free space cache v2 to clear\n");
14573                         ret = 0;
14574                         goto close_out;
14575                 }
14576                 printf("Clear free space cache v2\n");
14577                 ret = btrfs_clear_free_space_tree(fs_info);
14578                 if (ret) {
14579                         error("failed to clear free space cache v2: %d", ret);
14580                         ret = 1;
14581                 } else {
14582                         printf("free space cache v2 cleared\n");
14583                 }
14584         }
14585 close_out:
14586         return ret;
14587 }
14588
14589 const char * const cmd_check_usage[] = {
14590         "btrfs check [options] <device>",
14591         "Check structural integrity of a filesystem (unmounted).",
14592         "Check structural integrity of an unmounted filesystem. Verify internal",
14593         "trees' consistency and item connectivity. In the repair mode try to",
14594         "fix the problems found. ",
14595         "WARNING: the repair mode is considered dangerous",
14596         "",
14597         "-s|--super <superblock>     use this superblock copy",
14598         "-b|--backup                 use the first valid backup root copy",
14599         "--force                     skip mount checks, repair is not possible",
14600         "--repair                    try to repair the filesystem",
14601         "--readonly                  run in read-only mode (default)",
14602         "--init-csum-tree            create a new CRC tree",
14603         "--init-extent-tree          create a new extent tree",
14604         "--mode <MODE>               allows choice of memory/IO trade-offs",
14605         "                            where MODE is one of:",
14606         "                            original - read inodes and extents to memory (requires",
14607         "                                       more memory, does less IO)",
14608         "                            lowmem   - try to use less memory but read blocks again",
14609         "                                       when needed",
14610         "--check-data-csum           verify checksums of data blocks",
14611         "-Q|--qgroup-report          print a report on qgroup consistency",
14612         "-E|--subvol-extents <subvolid>",
14613         "                            print subvolume extents and sharing state",
14614         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
14615         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
14616         "-p|--progress               indicate progress",
14617         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
14618         NULL
14619 };
14620
14621 int cmd_check(int argc, char **argv)
14622 {
14623         struct cache_tree root_cache;
14624         struct btrfs_root *root;
14625         struct btrfs_fs_info *info;
14626         u64 bytenr = 0;
14627         u64 subvolid = 0;
14628         u64 tree_root_bytenr = 0;
14629         u64 chunk_root_bytenr = 0;
14630         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14631         int ret = 0;
14632         int err = 0;
14633         u64 num;
14634         int init_csum_tree = 0;
14635         int readonly = 0;
14636         int clear_space_cache = 0;
14637         int qgroup_report = 0;
14638         int qgroups_repaired = 0;
14639         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14640         int force = 0;
14641
14642         while(1) {
14643                 int c;
14644                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14645                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14646                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14647                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14648                         GETOPT_VAL_FORCE };
14649                 static const struct option long_options[] = {
14650                         { "super", required_argument, NULL, 's' },
14651                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14652                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14653                         { "init-csum-tree", no_argument, NULL,
14654                                 GETOPT_VAL_INIT_CSUM },
14655                         { "init-extent-tree", no_argument, NULL,
14656                                 GETOPT_VAL_INIT_EXTENT },
14657                         { "check-data-csum", no_argument, NULL,
14658                                 GETOPT_VAL_CHECK_CSUM },
14659                         { "backup", no_argument, NULL, 'b' },
14660                         { "subvol-extents", required_argument, NULL, 'E' },
14661                         { "qgroup-report", no_argument, NULL, 'Q' },
14662                         { "tree-root", required_argument, NULL, 'r' },
14663                         { "chunk-root", required_argument, NULL,
14664                                 GETOPT_VAL_CHUNK_TREE },
14665                         { "progress", no_argument, NULL, 'p' },
14666                         { "mode", required_argument, NULL,
14667                                 GETOPT_VAL_MODE },
14668                         { "clear-space-cache", required_argument, NULL,
14669                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
14670                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14671                         { NULL, 0, NULL, 0}
14672                 };
14673
14674                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14675                 if (c < 0)
14676                         break;
14677                 switch(c) {
14678                         case 'a': /* ignored */ break;
14679                         case 'b':
14680                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14681                                 break;
14682                         case 's':
14683                                 num = arg_strtou64(optarg);
14684                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14685                                         error(
14686                                         "super mirror should be less than %d",
14687                                                 BTRFS_SUPER_MIRROR_MAX);
14688                                         exit(1);
14689                                 }
14690                                 bytenr = btrfs_sb_offset(((int)num));
14691                                 printf("using SB copy %llu, bytenr %llu\n", num,
14692                                        (unsigned long long)bytenr);
14693                                 break;
14694                         case 'Q':
14695                                 qgroup_report = 1;
14696                                 break;
14697                         case 'E':
14698                                 subvolid = arg_strtou64(optarg);
14699                                 break;
14700                         case 'r':
14701                                 tree_root_bytenr = arg_strtou64(optarg);
14702                                 break;
14703                         case GETOPT_VAL_CHUNK_TREE:
14704                                 chunk_root_bytenr = arg_strtou64(optarg);
14705                                 break;
14706                         case 'p':
14707                                 ctx.progress_enabled = true;
14708                                 break;
14709                         case '?':
14710                         case 'h':
14711                                 usage(cmd_check_usage);
14712                         case GETOPT_VAL_REPAIR:
14713                                 printf("enabling repair mode\n");
14714                                 repair = 1;
14715                                 ctree_flags |= OPEN_CTREE_WRITES;
14716                                 break;
14717                         case GETOPT_VAL_READONLY:
14718                                 readonly = 1;
14719                                 break;
14720                         case GETOPT_VAL_INIT_CSUM:
14721                                 printf("Creating a new CRC tree\n");
14722                                 init_csum_tree = 1;
14723                                 repair = 1;
14724                                 ctree_flags |= OPEN_CTREE_WRITES;
14725                                 break;
14726                         case GETOPT_VAL_INIT_EXTENT:
14727                                 init_extent_tree = 1;
14728                                 ctree_flags |= (OPEN_CTREE_WRITES |
14729                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
14730                                 repair = 1;
14731                                 break;
14732                         case GETOPT_VAL_CHECK_CSUM:
14733                                 check_data_csum = 1;
14734                                 break;
14735                         case GETOPT_VAL_MODE:
14736                                 check_mode = parse_check_mode(optarg);
14737                                 if (check_mode == CHECK_MODE_UNKNOWN) {
14738                                         error("unknown mode: %s", optarg);
14739                                         exit(1);
14740                                 }
14741                                 break;
14742                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
14743                                 if (strcmp(optarg, "v1") == 0) {
14744                                         clear_space_cache = 1;
14745                                 } else if (strcmp(optarg, "v2") == 0) {
14746                                         clear_space_cache = 2;
14747                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14748                                 } else {
14749                                         error(
14750                 "invalid argument to --clear-space-cache, must be v1 or v2");
14751                                         exit(1);
14752                                 }
14753                                 ctree_flags |= OPEN_CTREE_WRITES;
14754                                 break;
14755                         case GETOPT_VAL_FORCE:
14756                                 force = 1;
14757                                 break;
14758                 }
14759         }
14760
14761         if (check_argc_exact(argc - optind, 1))
14762                 usage(cmd_check_usage);
14763
14764         if (ctx.progress_enabled) {
14765                 ctx.tp = TASK_NOTHING;
14766                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14767         }
14768
14769         /* This check is the only reason for --readonly to exist */
14770         if (readonly && repair) {
14771                 error("repair options are not compatible with --readonly");
14772                 exit(1);
14773         }
14774
14775         /*
14776          * experimental and dangerous
14777          */
14778         if (repair && check_mode == CHECK_MODE_LOWMEM)
14779                 warning("low-memory mode repair support is only partial");
14780
14781         radix_tree_init();
14782         cache_tree_init(&root_cache);
14783
14784         ret = check_mounted(argv[optind]);
14785         if (!force) {
14786                 if (ret < 0) {
14787                         error("could not check mount status: %s",
14788                                         strerror(-ret));
14789                         err |= !!ret;
14790                         goto err_out;
14791                 } else if (ret) {
14792                         error(
14793 "%s is currently mounted, use --force if you really intend to check the filesystem",
14794                                 argv[optind]);
14795                         ret = -EBUSY;
14796                         err |= !!ret;
14797                         goto err_out;
14798                 }
14799         } else {
14800                 if (repair) {
14801                         error("repair and --force is not yet supported");
14802                         ret = 1;
14803                         err |= !!ret;
14804                         goto err_out;
14805                 }
14806                 if (ret < 0) {
14807                         warning(
14808 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14809                                 argv[optind]);
14810                 } else if (ret) {
14811                         warning(
14812                         "filesystem mounted, continuing because of --force");
14813                 }
14814                 /* A block device is mounted in exclusive mode by kernel */
14815                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14816         }
14817
14818         /* only allow partial opening under repair mode */
14819         if (repair)
14820                 ctree_flags |= OPEN_CTREE_PARTIAL;
14821
14822         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14823                                   chunk_root_bytenr, ctree_flags);
14824         if (!info) {
14825                 error("cannot open file system");
14826                 ret = -EIO;
14827                 err |= !!ret;
14828                 goto err_out;
14829         }
14830
14831         global_info = info;
14832         root = info->fs_root;
14833         uuid_unparse(info->super_copy->fsid, uuidbuf);
14834
14835         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14836
14837         /*
14838          * Check the bare minimum before starting anything else that could rely
14839          * on it, namely the tree roots, any local consistency checks
14840          */
14841         if (!extent_buffer_uptodate(info->tree_root->node) ||
14842             !extent_buffer_uptodate(info->dev_root->node) ||
14843             !extent_buffer_uptodate(info->chunk_root->node)) {
14844                 error("critical roots corrupted, unable to check the filesystem");
14845                 err |= !!ret;
14846                 ret = -EIO;
14847                 goto close_out;
14848         }
14849
14850         if (clear_space_cache) {
14851                 ret = do_clear_free_space_cache(info, clear_space_cache);
14852                 err |= !!ret;
14853                 goto close_out;
14854         }
14855
14856         /*
14857          * repair mode will force us to commit transaction which
14858          * will make us fail to load log tree when mounting.
14859          */
14860         if (repair && btrfs_super_log_root(info->super_copy)) {
14861                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14862                 if (!ret) {
14863                         ret = 1;
14864                         err |= !!ret;
14865                         goto close_out;
14866                 }
14867                 ret = zero_log_tree(root);
14868                 err |= !!ret;
14869                 if (ret) {
14870                         error("failed to zero log tree: %d", ret);
14871                         goto close_out;
14872                 }
14873         }
14874
14875         if (qgroup_report) {
14876                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14877                        uuidbuf);
14878                 ret = qgroup_verify_all(info);
14879                 err |= !!ret;
14880                 if (ret == 0)
14881                         report_qgroups(1);
14882                 goto close_out;
14883         }
14884         if (subvolid) {
14885                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14886                        subvolid, argv[optind], uuidbuf);
14887                 ret = print_extent_state(info, subvolid);
14888                 err |= !!ret;
14889                 goto close_out;
14890         }
14891
14892         if (init_extent_tree || init_csum_tree) {
14893                 struct btrfs_trans_handle *trans;
14894
14895                 trans = btrfs_start_transaction(info->extent_root, 0);
14896                 if (IS_ERR(trans)) {
14897                         error("error starting transaction");
14898                         ret = PTR_ERR(trans);
14899                         err |= !!ret;
14900                         goto close_out;
14901                 }
14902
14903                 if (init_extent_tree) {
14904                         printf("Creating a new extent tree\n");
14905                         ret = reinit_extent_tree(trans, info);
14906                         err |= !!ret;
14907                         if (ret)
14908                                 goto close_out;
14909                 }
14910
14911                 if (init_csum_tree) {
14912                         printf("Reinitialize checksum tree\n");
14913                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14914                         if (ret) {
14915                                 error("checksum tree initialization failed: %d",
14916                                                 ret);
14917                                 ret = -EIO;
14918                                 err |= !!ret;
14919                                 goto close_out;
14920                         }
14921
14922                         ret = fill_csum_tree(trans, info->csum_root,
14923                                              init_extent_tree);
14924                         err |= !!ret;
14925                         if (ret) {
14926                                 error("checksum tree refilling failed: %d", ret);
14927                                 return -EIO;
14928                         }
14929                 }
14930                 /*
14931                  * Ok now we commit and run the normal fsck, which will add
14932                  * extent entries for all of the items it finds.
14933                  */
14934                 ret = btrfs_commit_transaction(trans, info->extent_root);
14935                 err |= !!ret;
14936                 if (ret)
14937                         goto close_out;
14938         }
14939         if (!extent_buffer_uptodate(info->extent_root->node)) {
14940                 error("critical: extent_root, unable to check the filesystem");
14941                 ret = -EIO;
14942                 err |= !!ret;
14943                 goto close_out;
14944         }
14945         if (!extent_buffer_uptodate(info->csum_root->node)) {
14946                 error("critical: csum_root, unable to check the filesystem");
14947                 ret = -EIO;
14948                 err |= !!ret;
14949                 goto close_out;
14950         }
14951
14952         if (!init_extent_tree) {
14953                 ret = repair_root_items(info);
14954                 if (ret < 0) {
14955                         err = !!ret;
14956                         error("failed to repair root items: %s", strerror(-ret));
14957                         goto close_out;
14958                 }
14959                 if (repair) {
14960                         fprintf(stderr, "Fixed %d roots.\n", ret);
14961                         ret = 0;
14962                 } else if (ret > 0) {
14963                         fprintf(stderr,
14964                                 "Found %d roots with an outdated root item.\n",
14965                                 ret);
14966                         fprintf(stderr,
14967         "Please run a filesystem check with the option --repair to fix them.\n");
14968                         ret = 1;
14969                         err |= ret;
14970                         goto close_out;
14971                 }
14972         }
14973
14974         ret = do_check_chunks_and_extents(info);
14975         err |= !!ret;
14976         if (ret)
14977                 error(
14978                 "errors found in extent allocation tree or chunk allocation");
14979
14980         /* Only re-check super size after we checked and repaired the fs */
14981         err |= !is_super_size_valid(info);
14982
14983         if (!ctx.progress_enabled) {
14984                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14985                         fprintf(stderr, "checking free space tree\n");
14986                 else
14987                         fprintf(stderr, "checking free space cache\n");
14988         }
14989         ret = check_space_cache(root);
14990         err |= !!ret;
14991         if (ret) {
14992                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14993                         error("errors found in free space tree");
14994                 else
14995                         error("errors found in free space cache");
14996                 goto out;
14997         }
14998
14999         /*
15000          * We used to have to have these hole extents in between our real
15001          * extents so if we don't have this flag set we need to make sure there
15002          * are no gaps in the file extents for inodes, otherwise we can just
15003          * ignore it when this happens.
15004          */
15005         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
15006         ret = do_check_fs_roots(info, &root_cache);
15007         err |= !!ret;
15008         if (ret) {
15009                 error("errors found in fs roots");
15010                 goto out;
15011         }
15012
15013         fprintf(stderr, "checking csums\n");
15014         ret = check_csums(root);
15015         err |= !!ret;
15016         if (ret) {
15017                 error("errors found in csum tree");
15018                 goto out;
15019         }
15020
15021         fprintf(stderr, "checking root refs\n");
15022         /* For low memory mode, check_fs_roots_v2 handles root refs */
15023         if (check_mode != CHECK_MODE_LOWMEM) {
15024                 ret = check_root_refs(root, &root_cache);
15025                 err |= !!ret;
15026                 if (ret) {
15027                         error("errors found in root refs");
15028                         goto out;
15029                 }
15030         }
15031
15032         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
15033                 struct extent_buffer *eb;
15034
15035                 eb = list_first_entry(&root->fs_info->recow_ebs,
15036                                       struct extent_buffer, recow);
15037                 list_del_init(&eb->recow);
15038                 ret = recow_extent_buffer(root, eb);
15039                 err |= !!ret;
15040                 if (ret) {
15041                         error("fails to fix transid errors");
15042                         break;
15043                 }
15044         }
15045
15046         while (!list_empty(&delete_items)) {
15047                 struct bad_item *bad;
15048
15049                 bad = list_first_entry(&delete_items, struct bad_item, list);
15050                 list_del_init(&bad->list);
15051                 if (repair) {
15052                         ret = delete_bad_item(root, bad);
15053                         err |= !!ret;
15054                 }
15055                 free(bad);
15056         }
15057
15058         if (info->quota_enabled) {
15059                 fprintf(stderr, "checking quota groups\n");
15060                 ret = qgroup_verify_all(info);
15061                 err |= !!ret;
15062                 if (ret) {
15063                         error("failed to check quota groups");
15064                         goto out;
15065                 }
15066                 report_qgroups(0);
15067                 ret = repair_qgroups(info, &qgroups_repaired);
15068                 err |= !!ret;
15069                 if (err) {
15070                         error("failed to repair quota groups");
15071                         goto out;
15072                 }
15073                 ret = 0;
15074         }
15075
15076         if (!list_empty(&root->fs_info->recow_ebs)) {
15077                 error("transid errors in file system");
15078                 ret = 1;
15079                 err |= !!ret;
15080         }
15081 out:
15082         printf("found %llu bytes used, ",
15083                (unsigned long long)bytes_used);
15084         if (err)
15085                 printf("error(s) found\n");
15086         else
15087                 printf("no error found\n");
15088         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
15089         printf("total tree bytes: %llu\n",
15090                (unsigned long long)total_btree_bytes);
15091         printf("total fs tree bytes: %llu\n",
15092                (unsigned long long)total_fs_tree_bytes);
15093         printf("total extent tree bytes: %llu\n",
15094                (unsigned long long)total_extent_tree_bytes);
15095         printf("btree space waste bytes: %llu\n",
15096                (unsigned long long)btree_space_waste);
15097         printf("file data blocks allocated: %llu\n referenced %llu\n",
15098                 (unsigned long long)data_bytes_allocated,
15099                 (unsigned long long)data_bytes_referenced);
15100
15101         free_qgroup_counts();
15102         free_root_recs_tree(&root_cache);
15103 close_out:
15104         close_ctree(root);
15105 err_out:
15106         if (ctx.progress_enabled)
15107                 task_deinit(ctx.info);
15108
15109         return err;
15110 }