btrfs-progs: convert: Fix a bug in rollback check which overwrite return value
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING       (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH      (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN         (1<<20) /* DIR isize should be recalculated */
136 #define BG_ACCOUNTING_ERROR     (1<<21) /* Block group accounting error */
137
138 static inline struct data_backref* to_data_backref(struct extent_backref *back)
139 {
140         return container_of(back, struct data_backref, node);
141 }
142
143 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
144 {
145         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
146         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
147         struct data_backref *back1 = to_data_backref(ext1);
148         struct data_backref *back2 = to_data_backref(ext2);
149
150         WARN_ON(!ext1->is_data);
151         WARN_ON(!ext2->is_data);
152
153         /* parent and root are a union, so this covers both */
154         if (back1->parent > back2->parent)
155                 return 1;
156         if (back1->parent < back2->parent)
157                 return -1;
158
159         /* This is a full backref and the parents match. */
160         if (back1->node.full_backref)
161                 return 0;
162
163         if (back1->owner > back2->owner)
164                 return 1;
165         if (back1->owner < back2->owner)
166                 return -1;
167
168         if (back1->offset > back2->offset)
169                 return 1;
170         if (back1->offset < back2->offset)
171                 return -1;
172
173         if (back1->found_ref && back2->found_ref) {
174                 if (back1->disk_bytenr > back2->disk_bytenr)
175                         return 1;
176                 if (back1->disk_bytenr < back2->disk_bytenr)
177                         return -1;
178
179                 if (back1->bytes > back2->bytes)
180                         return 1;
181                 if (back1->bytes < back2->bytes)
182                         return -1;
183         }
184
185         return 0;
186 }
187
188 /*
189  * Much like data_backref, just removed the undetermined members
190  * and change it to use list_head.
191  * During extent scan, it is stored in root->orphan_data_extent.
192  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
193  */
194 struct orphan_data_extent {
195         struct list_head list;
196         u64 root;
197         u64 objectid;
198         u64 offset;
199         u64 disk_bytenr;
200         u64 disk_len;
201 };
202
203 struct tree_backref {
204         struct extent_backref node;
205         union {
206                 u64 parent;
207                 u64 root;
208         };
209 };
210
211 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
212 {
213         return container_of(back, struct tree_backref, node);
214 }
215
216 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
217 {
218         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
219         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
220         struct tree_backref *back1 = to_tree_backref(ext1);
221         struct tree_backref *back2 = to_tree_backref(ext2);
222
223         WARN_ON(ext1->is_data);
224         WARN_ON(ext2->is_data);
225
226         /* parent and root are a union, so this covers both */
227         if (back1->parent > back2->parent)
228                 return 1;
229         if (back1->parent < back2->parent)
230                 return -1;
231
232         return 0;
233 }
234
235 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
236 {
237         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
238         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
239
240         if (ext1->is_data > ext2->is_data)
241                 return 1;
242
243         if (ext1->is_data < ext2->is_data)
244                 return -1;
245
246         if (ext1->full_backref > ext2->full_backref)
247                 return 1;
248         if (ext1->full_backref < ext2->full_backref)
249                 return -1;
250
251         if (ext1->is_data)
252                 return compare_data_backref(node1, node2);
253         else
254                 return compare_tree_backref(node1, node2);
255 }
256
257 /* Explicit initialization for extent_record::flag_block_full_backref */
258 enum { FLAG_UNSET = 2 };
259
260 struct extent_record {
261         struct list_head backrefs;
262         struct list_head dups;
263         struct rb_root backref_tree;
264         struct list_head list;
265         struct cache_extent cache;
266         struct btrfs_disk_key parent_key;
267         u64 start;
268         u64 max_size;
269         u64 nr;
270         u64 refs;
271         u64 extent_item_refs;
272         u64 generation;
273         u64 parent_generation;
274         u64 info_objectid;
275         u32 num_duplicates;
276         u8 info_level;
277         unsigned int flag_block_full_backref:2;
278         unsigned int found_rec:1;
279         unsigned int content_checked:1;
280         unsigned int owner_ref_checked:1;
281         unsigned int is_root:1;
282         unsigned int metadata:1;
283         unsigned int bad_full_backref:1;
284         unsigned int crossing_stripes:1;
285         unsigned int wrong_chunk_type:1;
286 };
287
288 static inline struct extent_record* to_extent_record(struct list_head *entry)
289 {
290         return container_of(entry, struct extent_record, list);
291 }
292
293 struct inode_backref {
294         struct list_head list;
295         unsigned int found_dir_item:1;
296         unsigned int found_dir_index:1;
297         unsigned int found_inode_ref:1;
298         u8 filetype;
299         u8 ref_type;
300         int errors;
301         u64 dir;
302         u64 index;
303         u16 namelen;
304         char name[0];
305 };
306
307 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
308 {
309         return list_entry(entry, struct inode_backref, list);
310 }
311
312 struct root_item_record {
313         struct list_head list;
314         u64 objectid;
315         u64 bytenr;
316         u64 last_snapshot;
317         u8 level;
318         u8 drop_level;
319         struct btrfs_key drop_key;
320 };
321
322 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
323 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
324 #define REF_ERR_NO_INODE_REF            (1 << 2)
325 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
326 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
327 #define REF_ERR_DUP_INODE_REF           (1 << 5)
328 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
329 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
330 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
331 #define REF_ERR_NO_ROOT_REF             (1 << 9)
332 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
333 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
334 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
335
336 struct file_extent_hole {
337         struct rb_node node;
338         u64 start;
339         u64 len;
340 };
341
342 struct inode_record {
343         struct list_head backrefs;
344         unsigned int checked:1;
345         unsigned int merging:1;
346         unsigned int found_inode_item:1;
347         unsigned int found_dir_item:1;
348         unsigned int found_file_extent:1;
349         unsigned int found_csum_item:1;
350         unsigned int some_csum_missing:1;
351         unsigned int nodatasum:1;
352         int errors;
353
354         u64 ino;
355         u32 nlink;
356         u32 imode;
357         u64 isize;
358         u64 nbytes;
359
360         u32 found_link;
361         u64 found_size;
362         u64 extent_start;
363         u64 extent_end;
364         struct rb_root holes;
365         struct list_head orphan_extents;
366
367         u32 refs;
368 };
369
370 #define I_ERR_NO_INODE_ITEM             (1 << 0)
371 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
372 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
373 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
374 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
375 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
376 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
377 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
378 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
379 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
380 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
381 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
382 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
383 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
384 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
385
386 struct root_backref {
387         struct list_head list;
388         unsigned int found_dir_item:1;
389         unsigned int found_dir_index:1;
390         unsigned int found_back_ref:1;
391         unsigned int found_forward_ref:1;
392         unsigned int reachable:1;
393         int errors;
394         u64 ref_root;
395         u64 dir;
396         u64 index;
397         u16 namelen;
398         char name[0];
399 };
400
401 static inline struct root_backref* to_root_backref(struct list_head *entry)
402 {
403         return list_entry(entry, struct root_backref, list);
404 }
405
406 struct root_record {
407         struct list_head backrefs;
408         struct cache_extent cache;
409         unsigned int found_root_item:1;
410         u64 objectid;
411         u32 found_ref;
412 };
413
414 struct ptr_node {
415         struct cache_extent cache;
416         void *data;
417 };
418
419 struct shared_node {
420         struct cache_extent cache;
421         struct cache_tree root_cache;
422         struct cache_tree inode_cache;
423         struct inode_record *current;
424         u32 refs;
425 };
426
427 struct block_info {
428         u64 start;
429         u32 size;
430 };
431
432 struct walk_control {
433         struct cache_tree shared;
434         struct shared_node *nodes[BTRFS_MAX_LEVEL];
435         int active_node;
436         int root_level;
437 };
438
439 struct bad_item {
440         struct btrfs_key key;
441         u64 root_id;
442         struct list_head list;
443 };
444
445 struct extent_entry {
446         u64 bytenr;
447         u64 bytes;
448         int count;
449         int broken;
450         struct list_head list;
451 };
452
453 struct root_item_info {
454         /* level of the root */
455         u8 level;
456         /* number of nodes at this level, must be 1 for a root */
457         int node_count;
458         u64 bytenr;
459         u64 gen;
460         struct cache_extent cache_extent;
461 };
462
463 /*
464  * Error bit for low memory mode check.
465  *
466  * Currently no caller cares about it yet.  Just internal use for error
467  * classification.
468  */
469 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
470 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
471 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
472 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
473 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
474 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
475 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
476 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
477 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
478 #define CHUNK_TYPE_MISMATCH     (1 << 8)
479
480 static void *print_status_check(void *p)
481 {
482         struct task_ctx *priv = p;
483         const char work_indicator[] = { '.', 'o', 'O', 'o' };
484         uint32_t count = 0;
485         static char *task_position_string[] = {
486                 "checking extents",
487                 "checking free space cache",
488                 "checking fs roots",
489         };
490
491         task_period_start(priv->info, 1000 /* 1s */);
492
493         if (priv->tp == TASK_NOTHING)
494                 return NULL;
495
496         while (1) {
497                 printf("%s [%c]\r", task_position_string[priv->tp],
498                                 work_indicator[count % 4]);
499                 count++;
500                 fflush(stdout);
501                 task_period_wait(priv->info);
502         }
503         return NULL;
504 }
505
506 static int print_status_return(void *p)
507 {
508         printf("\n");
509         fflush(stdout);
510
511         return 0;
512 }
513
514 static enum btrfs_check_mode parse_check_mode(const char *str)
515 {
516         if (strcmp(str, "lowmem") == 0)
517                 return CHECK_MODE_LOWMEM;
518         if (strcmp(str, "orig") == 0)
519                 return CHECK_MODE_ORIGINAL;
520         if (strcmp(str, "original") == 0)
521                 return CHECK_MODE_ORIGINAL;
522
523         return CHECK_MODE_UNKNOWN;
524 }
525
526 /* Compatible function to allow reuse of old codes */
527 static u64 first_extent_gap(struct rb_root *holes)
528 {
529         struct file_extent_hole *hole;
530
531         if (RB_EMPTY_ROOT(holes))
532                 return (u64)-1;
533
534         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
535         return hole->start;
536 }
537
538 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
539 {
540         struct file_extent_hole *hole1;
541         struct file_extent_hole *hole2;
542
543         hole1 = rb_entry(node1, struct file_extent_hole, node);
544         hole2 = rb_entry(node2, struct file_extent_hole, node);
545
546         if (hole1->start > hole2->start)
547                 return -1;
548         if (hole1->start < hole2->start)
549                 return 1;
550         /* Now hole1->start == hole2->start */
551         if (hole1->len >= hole2->len)
552                 /*
553                  * Hole 1 will be merge center
554                  * Same hole will be merged later
555                  */
556                 return -1;
557         /* Hole 2 will be merge center */
558         return 1;
559 }
560
561 /*
562  * Add a hole to the record
563  *
564  * This will do hole merge for copy_file_extent_holes(),
565  * which will ensure there won't be continuous holes.
566  */
567 static int add_file_extent_hole(struct rb_root *holes,
568                                 u64 start, u64 len)
569 {
570         struct file_extent_hole *hole;
571         struct file_extent_hole *prev = NULL;
572         struct file_extent_hole *next = NULL;
573
574         hole = malloc(sizeof(*hole));
575         if (!hole)
576                 return -ENOMEM;
577         hole->start = start;
578         hole->len = len;
579         /* Since compare will not return 0, no -EEXIST will happen */
580         rb_insert(holes, &hole->node, compare_hole);
581
582         /* simple merge with previous hole */
583         if (rb_prev(&hole->node))
584                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
585                                 node);
586         if (prev && prev->start + prev->len >= hole->start) {
587                 hole->len = hole->start + hole->len - prev->start;
588                 hole->start = prev->start;
589                 rb_erase(&prev->node, holes);
590                 free(prev);
591                 prev = NULL;
592         }
593
594         /* iterate merge with next holes */
595         while (1) {
596                 if (!rb_next(&hole->node))
597                         break;
598                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
599                                         node);
600                 if (hole->start + hole->len >= next->start) {
601                         if (hole->start + hole->len <= next->start + next->len)
602                                 hole->len = next->start + next->len -
603                                             hole->start;
604                         rb_erase(&next->node, holes);
605                         free(next);
606                         next = NULL;
607                 } else
608                         break;
609         }
610         return 0;
611 }
612
613 static int compare_hole_range(struct rb_node *node, void *data)
614 {
615         struct file_extent_hole *hole;
616         u64 start;
617
618         hole = (struct file_extent_hole *)data;
619         start = hole->start;
620
621         hole = rb_entry(node, struct file_extent_hole, node);
622         if (start < hole->start)
623                 return -1;
624         if (start >= hole->start && start < hole->start + hole->len)
625                 return 0;
626         return 1;
627 }
628
629 /*
630  * Delete a hole in the record
631  *
632  * This will do the hole split and is much restrict than add.
633  */
634 static int del_file_extent_hole(struct rb_root *holes,
635                                 u64 start, u64 len)
636 {
637         struct file_extent_hole *hole;
638         struct file_extent_hole tmp;
639         u64 prev_start = 0;
640         u64 prev_len = 0;
641         u64 next_start = 0;
642         u64 next_len = 0;
643         struct rb_node *node;
644         int have_prev = 0;
645         int have_next = 0;
646         int ret = 0;
647
648         tmp.start = start;
649         tmp.len = len;
650         node = rb_search(holes, &tmp, compare_hole_range, NULL);
651         if (!node)
652                 return -EEXIST;
653         hole = rb_entry(node, struct file_extent_hole, node);
654         if (start + len > hole->start + hole->len)
655                 return -EEXIST;
656
657         /*
658          * Now there will be no overlap, delete the hole and re-add the
659          * split(s) if they exists.
660          */
661         if (start > hole->start) {
662                 prev_start = hole->start;
663                 prev_len = start - hole->start;
664                 have_prev = 1;
665         }
666         if (hole->start + hole->len > start + len) {
667                 next_start = start + len;
668                 next_len = hole->start + hole->len - start - len;
669                 have_next = 1;
670         }
671         rb_erase(node, holes);
672         free(hole);
673         if (have_prev) {
674                 ret = add_file_extent_hole(holes, prev_start, prev_len);
675                 if (ret < 0)
676                         return ret;
677         }
678         if (have_next) {
679                 ret = add_file_extent_hole(holes, next_start, next_len);
680                 if (ret < 0)
681                         return ret;
682         }
683         return 0;
684 }
685
686 static int copy_file_extent_holes(struct rb_root *dst,
687                                   struct rb_root *src)
688 {
689         struct file_extent_hole *hole;
690         struct rb_node *node;
691         int ret = 0;
692
693         node = rb_first(src);
694         while (node) {
695                 hole = rb_entry(node, struct file_extent_hole, node);
696                 ret = add_file_extent_hole(dst, hole->start, hole->len);
697                 if (ret)
698                         break;
699                 node = rb_next(node);
700         }
701         return ret;
702 }
703
704 static void free_file_extent_holes(struct rb_root *holes)
705 {
706         struct rb_node *node;
707         struct file_extent_hole *hole;
708
709         node = rb_first(holes);
710         while (node) {
711                 hole = rb_entry(node, struct file_extent_hole, node);
712                 rb_erase(node, holes);
713                 free(hole);
714                 node = rb_first(holes);
715         }
716 }
717
718 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
719
720 static void record_root_in_trans(struct btrfs_trans_handle *trans,
721                                  struct btrfs_root *root)
722 {
723         if (root->last_trans != trans->transid) {
724                 root->track_dirty = 1;
725                 root->last_trans = trans->transid;
726                 root->commit_root = root->node;
727                 extent_buffer_get(root->node);
728         }
729 }
730
731 static u8 imode_to_type(u32 imode)
732 {
733 #define S_SHIFT 12
734         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
735                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
736                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
737                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
738                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
739                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
740                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
741                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
742         };
743
744         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
745 #undef S_SHIFT
746 }
747
748 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
749 {
750         struct device_record *rec1;
751         struct device_record *rec2;
752
753         rec1 = rb_entry(node1, struct device_record, node);
754         rec2 = rb_entry(node2, struct device_record, node);
755         if (rec1->devid > rec2->devid)
756                 return -1;
757         else if (rec1->devid < rec2->devid)
758                 return 1;
759         else
760                 return 0;
761 }
762
763 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
764 {
765         struct inode_record *rec;
766         struct inode_backref *backref;
767         struct inode_backref *orig;
768         struct inode_backref *tmp;
769         struct orphan_data_extent *src_orphan;
770         struct orphan_data_extent *dst_orphan;
771         struct rb_node *rb;
772         size_t size;
773         int ret;
774
775         rec = malloc(sizeof(*rec));
776         if (!rec)
777                 return ERR_PTR(-ENOMEM);
778         memcpy(rec, orig_rec, sizeof(*rec));
779         rec->refs = 1;
780         INIT_LIST_HEAD(&rec->backrefs);
781         INIT_LIST_HEAD(&rec->orphan_extents);
782         rec->holes = RB_ROOT;
783
784         list_for_each_entry(orig, &orig_rec->backrefs, list) {
785                 size = sizeof(*orig) + orig->namelen + 1;
786                 backref = malloc(size);
787                 if (!backref) {
788                         ret = -ENOMEM;
789                         goto cleanup;
790                 }
791                 memcpy(backref, orig, size);
792                 list_add_tail(&backref->list, &rec->backrefs);
793         }
794         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
795                 dst_orphan = malloc(sizeof(*dst_orphan));
796                 if (!dst_orphan) {
797                         ret = -ENOMEM;
798                         goto cleanup;
799                 }
800                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
801                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
802         }
803         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
804         if (ret < 0)
805                 goto cleanup_rb;
806
807         return rec;
808
809 cleanup_rb:
810         rb = rb_first(&rec->holes);
811         while (rb) {
812                 struct file_extent_hole *hole;
813
814                 hole = rb_entry(rb, struct file_extent_hole, node);
815                 rb = rb_next(rb);
816                 free(hole);
817         }
818
819 cleanup:
820         if (!list_empty(&rec->backrefs))
821                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
822                         list_del(&orig->list);
823                         free(orig);
824                 }
825
826         if (!list_empty(&rec->orphan_extents))
827                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
828                         list_del(&orig->list);
829                         free(orig);
830                 }
831
832         free(rec);
833
834         return ERR_PTR(ret);
835 }
836
837 static void print_orphan_data_extents(struct list_head *orphan_extents,
838                                       u64 objectid)
839 {
840         struct orphan_data_extent *orphan;
841
842         if (list_empty(orphan_extents))
843                 return;
844         printf("The following data extent is lost in tree %llu:\n",
845                objectid);
846         list_for_each_entry(orphan, orphan_extents, list) {
847                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
848                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
849                        orphan->disk_len);
850         }
851 }
852
853 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
854 {
855         u64 root_objectid = root->root_key.objectid;
856         int errors = rec->errors;
857
858         if (!errors)
859                 return;
860         /* reloc root errors, we print its corresponding fs root objectid*/
861         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
862                 root_objectid = root->root_key.offset;
863                 fprintf(stderr, "reloc");
864         }
865         fprintf(stderr, "root %llu inode %llu errors %x",
866                 (unsigned long long) root_objectid,
867                 (unsigned long long) rec->ino, rec->errors);
868
869         if (errors & I_ERR_NO_INODE_ITEM)
870                 fprintf(stderr, ", no inode item");
871         if (errors & I_ERR_NO_ORPHAN_ITEM)
872                 fprintf(stderr, ", no orphan item");
873         if (errors & I_ERR_DUP_INODE_ITEM)
874                 fprintf(stderr, ", dup inode item");
875         if (errors & I_ERR_DUP_DIR_INDEX)
876                 fprintf(stderr, ", dup dir index");
877         if (errors & I_ERR_ODD_DIR_ITEM)
878                 fprintf(stderr, ", odd dir item");
879         if (errors & I_ERR_ODD_FILE_EXTENT)
880                 fprintf(stderr, ", odd file extent");
881         if (errors & I_ERR_BAD_FILE_EXTENT)
882                 fprintf(stderr, ", bad file extent");
883         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
884                 fprintf(stderr, ", file extent overlap");
885         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
886                 fprintf(stderr, ", file extent discount");
887         if (errors & I_ERR_DIR_ISIZE_WRONG)
888                 fprintf(stderr, ", dir isize wrong");
889         if (errors & I_ERR_FILE_NBYTES_WRONG)
890                 fprintf(stderr, ", nbytes wrong");
891         if (errors & I_ERR_ODD_CSUM_ITEM)
892                 fprintf(stderr, ", odd csum item");
893         if (errors & I_ERR_SOME_CSUM_MISSING)
894                 fprintf(stderr, ", some csum missing");
895         if (errors & I_ERR_LINK_COUNT_WRONG)
896                 fprintf(stderr, ", link count wrong");
897         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898                 fprintf(stderr, ", orphan file extent");
899         fprintf(stderr, "\n");
900         /* Print the orphan extents if needed */
901         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
902                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
903
904         /* Print the holes if needed */
905         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
906                 struct file_extent_hole *hole;
907                 struct rb_node *node;
908                 int found = 0;
909
910                 node = rb_first(&rec->holes);
911                 fprintf(stderr, "Found file extent holes:\n");
912                 while (node) {
913                         found = 1;
914                         hole = rb_entry(node, struct file_extent_hole, node);
915                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
916                                 hole->start, hole->len);
917                         node = rb_next(node);
918                 }
919                 if (!found)
920                         fprintf(stderr, "\tstart: 0, len: %llu\n",
921                                 round_up(rec->isize,
922                                          root->fs_info->sectorsize));
923         }
924 }
925
926 static void print_ref_error(int errors)
927 {
928         if (errors & REF_ERR_NO_DIR_ITEM)
929                 fprintf(stderr, ", no dir item");
930         if (errors & REF_ERR_NO_DIR_INDEX)
931                 fprintf(stderr, ", no dir index");
932         if (errors & REF_ERR_NO_INODE_REF)
933                 fprintf(stderr, ", no inode ref");
934         if (errors & REF_ERR_DUP_DIR_ITEM)
935                 fprintf(stderr, ", dup dir item");
936         if (errors & REF_ERR_DUP_DIR_INDEX)
937                 fprintf(stderr, ", dup dir index");
938         if (errors & REF_ERR_DUP_INODE_REF)
939                 fprintf(stderr, ", dup inode ref");
940         if (errors & REF_ERR_INDEX_UNMATCH)
941                 fprintf(stderr, ", index mismatch");
942         if (errors & REF_ERR_FILETYPE_UNMATCH)
943                 fprintf(stderr, ", filetype mismatch");
944         if (errors & REF_ERR_NAME_TOO_LONG)
945                 fprintf(stderr, ", name too long");
946         if (errors & REF_ERR_NO_ROOT_REF)
947                 fprintf(stderr, ", no root ref");
948         if (errors & REF_ERR_NO_ROOT_BACKREF)
949                 fprintf(stderr, ", no root backref");
950         if (errors & REF_ERR_DUP_ROOT_REF)
951                 fprintf(stderr, ", dup root ref");
952         if (errors & REF_ERR_DUP_ROOT_BACKREF)
953                 fprintf(stderr, ", dup root backref");
954         fprintf(stderr, "\n");
955 }
956
957 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
958                                           u64 ino, int mod)
959 {
960         struct ptr_node *node;
961         struct cache_extent *cache;
962         struct inode_record *rec = NULL;
963         int ret;
964
965         cache = lookup_cache_extent(inode_cache, ino, 1);
966         if (cache) {
967                 node = container_of(cache, struct ptr_node, cache);
968                 rec = node->data;
969                 if (mod && rec->refs > 1) {
970                         node->data = clone_inode_rec(rec);
971                         if (IS_ERR(node->data))
972                                 return node->data;
973                         rec->refs--;
974                         rec = node->data;
975                 }
976         } else if (mod) {
977                 rec = calloc(1, sizeof(*rec));
978                 if (!rec)
979                         return ERR_PTR(-ENOMEM);
980                 rec->ino = ino;
981                 rec->extent_start = (u64)-1;
982                 rec->refs = 1;
983                 INIT_LIST_HEAD(&rec->backrefs);
984                 INIT_LIST_HEAD(&rec->orphan_extents);
985                 rec->holes = RB_ROOT;
986
987                 node = malloc(sizeof(*node));
988                 if (!node) {
989                         free(rec);
990                         return ERR_PTR(-ENOMEM);
991                 }
992                 node->cache.start = ino;
993                 node->cache.size = 1;
994                 node->data = rec;
995
996                 if (ino == BTRFS_FREE_INO_OBJECTID)
997                         rec->found_link = 1;
998
999                 ret = insert_cache_extent(inode_cache, &node->cache);
1000                 if (ret)
1001                         return ERR_PTR(-EEXIST);
1002         }
1003         return rec;
1004 }
1005
1006 static void free_orphan_data_extents(struct list_head *orphan_extents)
1007 {
1008         struct orphan_data_extent *orphan;
1009
1010         while (!list_empty(orphan_extents)) {
1011                 orphan = list_entry(orphan_extents->next,
1012                                     struct orphan_data_extent, list);
1013                 list_del(&orphan->list);
1014                 free(orphan);
1015         }
1016 }
1017
1018 static void free_inode_rec(struct inode_record *rec)
1019 {
1020         struct inode_backref *backref;
1021
1022         if (--rec->refs > 0)
1023                 return;
1024
1025         while (!list_empty(&rec->backrefs)) {
1026                 backref = to_inode_backref(rec->backrefs.next);
1027                 list_del(&backref->list);
1028                 free(backref);
1029         }
1030         free_orphan_data_extents(&rec->orphan_extents);
1031         free_file_extent_holes(&rec->holes);
1032         free(rec);
1033 }
1034
1035 static int can_free_inode_rec(struct inode_record *rec)
1036 {
1037         if (!rec->errors && rec->checked && rec->found_inode_item &&
1038             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1039                 return 1;
1040         return 0;
1041 }
1042
1043 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1044                                  struct inode_record *rec)
1045 {
1046         struct cache_extent *cache;
1047         struct inode_backref *tmp, *backref;
1048         struct ptr_node *node;
1049         u8 filetype;
1050
1051         if (!rec->found_inode_item)
1052                 return;
1053
1054         filetype = imode_to_type(rec->imode);
1055         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1056                 if (backref->found_dir_item && backref->found_dir_index) {
1057                         if (backref->filetype != filetype)
1058                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1059                         if (!backref->errors && backref->found_inode_ref &&
1060                             rec->nlink == rec->found_link) {
1061                                 list_del(&backref->list);
1062                                 free(backref);
1063                         }
1064                 }
1065         }
1066
1067         if (!rec->checked || rec->merging)
1068                 return;
1069
1070         if (S_ISDIR(rec->imode)) {
1071                 if (rec->found_size != rec->isize)
1072                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1073                 if (rec->found_file_extent)
1074                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1075         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1076                 if (rec->found_dir_item)
1077                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1078                 if (rec->found_size != rec->nbytes)
1079                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1080                 if (rec->nlink > 0 && !no_holes &&
1081                     (rec->extent_end < rec->isize ||
1082                      first_extent_gap(&rec->holes) < rec->isize))
1083                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1084         }
1085
1086         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1087                 if (rec->found_csum_item && rec->nodatasum)
1088                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1089                 if (rec->some_csum_missing && !rec->nodatasum)
1090                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1091         }
1092
1093         BUG_ON(rec->refs != 1);
1094         if (can_free_inode_rec(rec)) {
1095                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1096                 node = container_of(cache, struct ptr_node, cache);
1097                 BUG_ON(node->data != rec);
1098                 remove_cache_extent(inode_cache, &node->cache);
1099                 free(node);
1100                 free_inode_rec(rec);
1101         }
1102 }
1103
1104 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1105 {
1106         struct btrfs_path path;
1107         struct btrfs_key key;
1108         int ret;
1109
1110         key.objectid = BTRFS_ORPHAN_OBJECTID;
1111         key.type = BTRFS_ORPHAN_ITEM_KEY;
1112         key.offset = ino;
1113
1114         btrfs_init_path(&path);
1115         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1116         btrfs_release_path(&path);
1117         if (ret > 0)
1118                 ret = -ENOENT;
1119         return ret;
1120 }
1121
1122 static int process_inode_item(struct extent_buffer *eb,
1123                               int slot, struct btrfs_key *key,
1124                               struct shared_node *active_node)
1125 {
1126         struct inode_record *rec;
1127         struct btrfs_inode_item *item;
1128
1129         rec = active_node->current;
1130         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1131         if (rec->found_inode_item) {
1132                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1133                 return 1;
1134         }
1135         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1136         rec->nlink = btrfs_inode_nlink(eb, item);
1137         rec->isize = btrfs_inode_size(eb, item);
1138         rec->nbytes = btrfs_inode_nbytes(eb, item);
1139         rec->imode = btrfs_inode_mode(eb, item);
1140         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1141                 rec->nodatasum = 1;
1142         rec->found_inode_item = 1;
1143         if (rec->nlink == 0)
1144                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1145         maybe_free_inode_rec(&active_node->inode_cache, rec);
1146         return 0;
1147 }
1148
1149 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1150                                                 const char *name,
1151                                                 int namelen, u64 dir)
1152 {
1153         struct inode_backref *backref;
1154
1155         list_for_each_entry(backref, &rec->backrefs, list) {
1156                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1157                         break;
1158                 if (backref->dir != dir || backref->namelen != namelen)
1159                         continue;
1160                 if (memcmp(name, backref->name, namelen))
1161                         continue;
1162                 return backref;
1163         }
1164
1165         backref = malloc(sizeof(*backref) + namelen + 1);
1166         if (!backref)
1167                 return NULL;
1168         memset(backref, 0, sizeof(*backref));
1169         backref->dir = dir;
1170         backref->namelen = namelen;
1171         memcpy(backref->name, name, namelen);
1172         backref->name[namelen] = '\0';
1173         list_add_tail(&backref->list, &rec->backrefs);
1174         return backref;
1175 }
1176
1177 static int add_inode_backref(struct cache_tree *inode_cache,
1178                              u64 ino, u64 dir, u64 index,
1179                              const char *name, int namelen,
1180                              u8 filetype, u8 itemtype, int errors)
1181 {
1182         struct inode_record *rec;
1183         struct inode_backref *backref;
1184
1185         rec = get_inode_rec(inode_cache, ino, 1);
1186         BUG_ON(IS_ERR(rec));
1187         backref = get_inode_backref(rec, name, namelen, dir);
1188         BUG_ON(!backref);
1189         if (errors)
1190                 backref->errors |= errors;
1191         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1192                 if (backref->found_dir_index)
1193                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1194                 if (backref->found_inode_ref && backref->index != index)
1195                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1196                 if (backref->found_dir_item && backref->filetype != filetype)
1197                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1198
1199                 backref->index = index;
1200                 backref->filetype = filetype;
1201                 backref->found_dir_index = 1;
1202         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1203                 rec->found_link++;
1204                 if (backref->found_dir_item)
1205                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1206                 if (backref->found_dir_index && backref->filetype != filetype)
1207                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1208
1209                 backref->filetype = filetype;
1210                 backref->found_dir_item = 1;
1211         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1212                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1213                 if (backref->found_inode_ref)
1214                         backref->errors |= REF_ERR_DUP_INODE_REF;
1215                 if (backref->found_dir_index && backref->index != index)
1216                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1217                 else
1218                         backref->index = index;
1219
1220                 backref->ref_type = itemtype;
1221                 backref->found_inode_ref = 1;
1222         } else {
1223                 BUG_ON(1);
1224         }
1225
1226         maybe_free_inode_rec(inode_cache, rec);
1227         return 0;
1228 }
1229
1230 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1231                             struct cache_tree *dst_cache)
1232 {
1233         struct inode_backref *backref;
1234         u32 dir_count = 0;
1235         int ret = 0;
1236
1237         dst->merging = 1;
1238         list_for_each_entry(backref, &src->backrefs, list) {
1239                 if (backref->found_dir_index) {
1240                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1241                                         backref->index, backref->name,
1242                                         backref->namelen, backref->filetype,
1243                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1244                 }
1245                 if (backref->found_dir_item) {
1246                         dir_count++;
1247                         add_inode_backref(dst_cache, dst->ino,
1248                                         backref->dir, 0, backref->name,
1249                                         backref->namelen, backref->filetype,
1250                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1251                 }
1252                 if (backref->found_inode_ref) {
1253                         add_inode_backref(dst_cache, dst->ino,
1254                                         backref->dir, backref->index,
1255                                         backref->name, backref->namelen, 0,
1256                                         backref->ref_type, backref->errors);
1257                 }
1258         }
1259
1260         if (src->found_dir_item)
1261                 dst->found_dir_item = 1;
1262         if (src->found_file_extent)
1263                 dst->found_file_extent = 1;
1264         if (src->found_csum_item)
1265                 dst->found_csum_item = 1;
1266         if (src->some_csum_missing)
1267                 dst->some_csum_missing = 1;
1268         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1269                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1270                 if (ret < 0)
1271                         return ret;
1272         }
1273
1274         BUG_ON(src->found_link < dir_count);
1275         dst->found_link += src->found_link - dir_count;
1276         dst->found_size += src->found_size;
1277         if (src->extent_start != (u64)-1) {
1278                 if (dst->extent_start == (u64)-1) {
1279                         dst->extent_start = src->extent_start;
1280                         dst->extent_end = src->extent_end;
1281                 } else {
1282                         if (dst->extent_end > src->extent_start)
1283                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1284                         else if (dst->extent_end < src->extent_start) {
1285                                 ret = add_file_extent_hole(&dst->holes,
1286                                         dst->extent_end,
1287                                         src->extent_start - dst->extent_end);
1288                         }
1289                         if (dst->extent_end < src->extent_end)
1290                                 dst->extent_end = src->extent_end;
1291                 }
1292         }
1293
1294         dst->errors |= src->errors;
1295         if (src->found_inode_item) {
1296                 if (!dst->found_inode_item) {
1297                         dst->nlink = src->nlink;
1298                         dst->isize = src->isize;
1299                         dst->nbytes = src->nbytes;
1300                         dst->imode = src->imode;
1301                         dst->nodatasum = src->nodatasum;
1302                         dst->found_inode_item = 1;
1303                 } else {
1304                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1305                 }
1306         }
1307         dst->merging = 0;
1308
1309         return 0;
1310 }
1311
1312 static int splice_shared_node(struct shared_node *src_node,
1313                               struct shared_node *dst_node)
1314 {
1315         struct cache_extent *cache;
1316         struct ptr_node *node, *ins;
1317         struct cache_tree *src, *dst;
1318         struct inode_record *rec, *conflict;
1319         u64 current_ino = 0;
1320         int splice = 0;
1321         int ret;
1322
1323         if (--src_node->refs == 0)
1324                 splice = 1;
1325         if (src_node->current)
1326                 current_ino = src_node->current->ino;
1327
1328         src = &src_node->root_cache;
1329         dst = &dst_node->root_cache;
1330 again:
1331         cache = search_cache_extent(src, 0);
1332         while (cache) {
1333                 node = container_of(cache, struct ptr_node, cache);
1334                 rec = node->data;
1335                 cache = next_cache_extent(cache);
1336
1337                 if (splice) {
1338                         remove_cache_extent(src, &node->cache);
1339                         ins = node;
1340                 } else {
1341                         ins = malloc(sizeof(*ins));
1342                         BUG_ON(!ins);
1343                         ins->cache.start = node->cache.start;
1344                         ins->cache.size = node->cache.size;
1345                         ins->data = rec;
1346                         rec->refs++;
1347                 }
1348                 ret = insert_cache_extent(dst, &ins->cache);
1349                 if (ret == -EEXIST) {
1350                         conflict = get_inode_rec(dst, rec->ino, 1);
1351                         BUG_ON(IS_ERR(conflict));
1352                         merge_inode_recs(rec, conflict, dst);
1353                         if (rec->checked) {
1354                                 conflict->checked = 1;
1355                                 if (dst_node->current == conflict)
1356                                         dst_node->current = NULL;
1357                         }
1358                         maybe_free_inode_rec(dst, conflict);
1359                         free_inode_rec(rec);
1360                         free(ins);
1361                 } else {
1362                         BUG_ON(ret);
1363                 }
1364         }
1365
1366         if (src == &src_node->root_cache) {
1367                 src = &src_node->inode_cache;
1368                 dst = &dst_node->inode_cache;
1369                 goto again;
1370         }
1371
1372         if (current_ino > 0 && (!dst_node->current ||
1373             current_ino > dst_node->current->ino)) {
1374                 if (dst_node->current) {
1375                         dst_node->current->checked = 1;
1376                         maybe_free_inode_rec(dst, dst_node->current);
1377                 }
1378                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1379                 BUG_ON(IS_ERR(dst_node->current));
1380         }
1381         return 0;
1382 }
1383
1384 static void free_inode_ptr(struct cache_extent *cache)
1385 {
1386         struct ptr_node *node;
1387         struct inode_record *rec;
1388
1389         node = container_of(cache, struct ptr_node, cache);
1390         rec = node->data;
1391         free_inode_rec(rec);
1392         free(node);
1393 }
1394
1395 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1396
1397 static struct shared_node *find_shared_node(struct cache_tree *shared,
1398                                             u64 bytenr)
1399 {
1400         struct cache_extent *cache;
1401         struct shared_node *node;
1402
1403         cache = lookup_cache_extent(shared, bytenr, 1);
1404         if (cache) {
1405                 node = container_of(cache, struct shared_node, cache);
1406                 return node;
1407         }
1408         return NULL;
1409 }
1410
1411 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1412 {
1413         int ret;
1414         struct shared_node *node;
1415
1416         node = calloc(1, sizeof(*node));
1417         if (!node)
1418                 return -ENOMEM;
1419         node->cache.start = bytenr;
1420         node->cache.size = 1;
1421         cache_tree_init(&node->root_cache);
1422         cache_tree_init(&node->inode_cache);
1423         node->refs = refs;
1424
1425         ret = insert_cache_extent(shared, &node->cache);
1426
1427         return ret;
1428 }
1429
1430 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1431                              struct walk_control *wc, int level)
1432 {
1433         struct shared_node *node;
1434         struct shared_node *dest;
1435         int ret;
1436
1437         if (level == wc->active_node)
1438                 return 0;
1439
1440         BUG_ON(wc->active_node <= level);
1441         node = find_shared_node(&wc->shared, bytenr);
1442         if (!node) {
1443                 ret = add_shared_node(&wc->shared, bytenr, refs);
1444                 BUG_ON(ret);
1445                 node = find_shared_node(&wc->shared, bytenr);
1446                 wc->nodes[level] = node;
1447                 wc->active_node = level;
1448                 return 0;
1449         }
1450
1451         if (wc->root_level == wc->active_node &&
1452             btrfs_root_refs(&root->root_item) == 0) {
1453                 if (--node->refs == 0) {
1454                         free_inode_recs_tree(&node->root_cache);
1455                         free_inode_recs_tree(&node->inode_cache);
1456                         remove_cache_extent(&wc->shared, &node->cache);
1457                         free(node);
1458                 }
1459                 return 1;
1460         }
1461
1462         dest = wc->nodes[wc->active_node];
1463         splice_shared_node(node, dest);
1464         if (node->refs == 0) {
1465                 remove_cache_extent(&wc->shared, &node->cache);
1466                 free(node);
1467         }
1468         return 1;
1469 }
1470
1471 static int leave_shared_node(struct btrfs_root *root,
1472                              struct walk_control *wc, int level)
1473 {
1474         struct shared_node *node;
1475         struct shared_node *dest;
1476         int i;
1477
1478         if (level == wc->root_level)
1479                 return 0;
1480
1481         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1482                 if (wc->nodes[i])
1483                         break;
1484         }
1485         BUG_ON(i >= BTRFS_MAX_LEVEL);
1486
1487         node = wc->nodes[wc->active_node];
1488         wc->nodes[wc->active_node] = NULL;
1489         wc->active_node = i;
1490
1491         dest = wc->nodes[wc->active_node];
1492         if (wc->active_node < wc->root_level ||
1493             btrfs_root_refs(&root->root_item) > 0) {
1494                 BUG_ON(node->refs <= 1);
1495                 splice_shared_node(node, dest);
1496         } else {
1497                 BUG_ON(node->refs < 2);
1498                 node->refs--;
1499         }
1500         return 0;
1501 }
1502
1503 /*
1504  * Returns:
1505  * < 0 - on error
1506  * 1   - if the root with id child_root_id is a child of root parent_root_id
1507  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1508  *       has other root(s) as parent(s)
1509  * 2   - if the root child_root_id doesn't have any parent roots
1510  */
1511 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1512                          u64 child_root_id)
1513 {
1514         struct btrfs_path path;
1515         struct btrfs_key key;
1516         struct extent_buffer *leaf;
1517         int has_parent = 0;
1518         int ret;
1519
1520         btrfs_init_path(&path);
1521
1522         key.objectid = parent_root_id;
1523         key.type = BTRFS_ROOT_REF_KEY;
1524         key.offset = child_root_id;
1525         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1526                                 0, 0);
1527         if (ret < 0)
1528                 return ret;
1529         btrfs_release_path(&path);
1530         if (!ret)
1531                 return 1;
1532
1533         key.objectid = child_root_id;
1534         key.type = BTRFS_ROOT_BACKREF_KEY;
1535         key.offset = 0;
1536         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1537                                 0, 0);
1538         if (ret < 0)
1539                 goto out;
1540
1541         while (1) {
1542                 leaf = path.nodes[0];
1543                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1544                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1545                         if (ret)
1546                                 break;
1547                         leaf = path.nodes[0];
1548                 }
1549
1550                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1551                 if (key.objectid != child_root_id ||
1552                     key.type != BTRFS_ROOT_BACKREF_KEY)
1553                         break;
1554
1555                 has_parent = 1;
1556
1557                 if (key.offset == parent_root_id) {
1558                         btrfs_release_path(&path);
1559                         return 1;
1560                 }
1561
1562                 path.slots[0]++;
1563         }
1564 out:
1565         btrfs_release_path(&path);
1566         if (ret < 0)
1567                 return ret;
1568         return has_parent ? 0 : 2;
1569 }
1570
1571 static int process_dir_item(struct extent_buffer *eb,
1572                             int slot, struct btrfs_key *key,
1573                             struct shared_node *active_node)
1574 {
1575         u32 total;
1576         u32 cur = 0;
1577         u32 len;
1578         u32 name_len;
1579         u32 data_len;
1580         int error;
1581         int nritems = 0;
1582         u8 filetype;
1583         struct btrfs_dir_item *di;
1584         struct inode_record *rec;
1585         struct cache_tree *root_cache;
1586         struct cache_tree *inode_cache;
1587         struct btrfs_key location;
1588         char namebuf[BTRFS_NAME_LEN];
1589
1590         root_cache = &active_node->root_cache;
1591         inode_cache = &active_node->inode_cache;
1592         rec = active_node->current;
1593         rec->found_dir_item = 1;
1594
1595         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1596         total = btrfs_item_size_nr(eb, slot);
1597         while (cur < total) {
1598                 nritems++;
1599                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1600                 name_len = btrfs_dir_name_len(eb, di);
1601                 data_len = btrfs_dir_data_len(eb, di);
1602                 filetype = btrfs_dir_type(eb, di);
1603
1604                 rec->found_size += name_len;
1605                 if (cur + sizeof(*di) + name_len > total ||
1606                     name_len > BTRFS_NAME_LEN) {
1607                         error = REF_ERR_NAME_TOO_LONG;
1608
1609                         if (cur + sizeof(*di) > total)
1610                                 break;
1611                         len = min_t(u32, total - cur - sizeof(*di),
1612                                     BTRFS_NAME_LEN);
1613                 } else {
1614                         len = name_len;
1615                         error = 0;
1616                 }
1617
1618                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1619
1620                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1621                     key->offset != btrfs_name_hash(namebuf, len)) {
1622                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1623                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1624                         key->objectid, key->offset, namebuf, len, filetype,
1625                         key->offset, btrfs_name_hash(namebuf, len));
1626                 }
1627
1628                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1629                         add_inode_backref(inode_cache, location.objectid,
1630                                           key->objectid, key->offset, namebuf,
1631                                           len, filetype, key->type, error);
1632                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1633                         add_inode_backref(root_cache, location.objectid,
1634                                           key->objectid, key->offset,
1635                                           namebuf, len, filetype,
1636                                           key->type, error);
1637                 } else {
1638                         fprintf(stderr, "invalid location in dir item %u\n",
1639                                 location.type);
1640                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1641                                           key->objectid, key->offset, namebuf,
1642                                           len, filetype, key->type, error);
1643                 }
1644
1645                 len = sizeof(*di) + name_len + data_len;
1646                 di = (struct btrfs_dir_item *)((char *)di + len);
1647                 cur += len;
1648         }
1649         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1650                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1651
1652         return 0;
1653 }
1654
1655 static int process_inode_ref(struct extent_buffer *eb,
1656                              int slot, struct btrfs_key *key,
1657                              struct shared_node *active_node)
1658 {
1659         u32 total;
1660         u32 cur = 0;
1661         u32 len;
1662         u32 name_len;
1663         u64 index;
1664         int error;
1665         struct cache_tree *inode_cache;
1666         struct btrfs_inode_ref *ref;
1667         char namebuf[BTRFS_NAME_LEN];
1668
1669         inode_cache = &active_node->inode_cache;
1670
1671         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1672         total = btrfs_item_size_nr(eb, slot);
1673         while (cur < total) {
1674                 name_len = btrfs_inode_ref_name_len(eb, ref);
1675                 index = btrfs_inode_ref_index(eb, ref);
1676
1677                 /* inode_ref + namelen should not cross item boundary */
1678                 if (cur + sizeof(*ref) + name_len > total ||
1679                     name_len > BTRFS_NAME_LEN) {
1680                         if (total < cur + sizeof(*ref))
1681                                 break;
1682
1683                         /* Still try to read out the remaining part */
1684                         len = min_t(u32, total - cur - sizeof(*ref),
1685                                     BTRFS_NAME_LEN);
1686                         error = REF_ERR_NAME_TOO_LONG;
1687                 } else {
1688                         len = name_len;
1689                         error = 0;
1690                 }
1691
1692                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1693                 add_inode_backref(inode_cache, key->objectid, key->offset,
1694                                   index, namebuf, len, 0, key->type, error);
1695
1696                 len = sizeof(*ref) + name_len;
1697                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1698                 cur += len;
1699         }
1700         return 0;
1701 }
1702
1703 static int process_inode_extref(struct extent_buffer *eb,
1704                                 int slot, struct btrfs_key *key,
1705                                 struct shared_node *active_node)
1706 {
1707         u32 total;
1708         u32 cur = 0;
1709         u32 len;
1710         u32 name_len;
1711         u64 index;
1712         u64 parent;
1713         int error;
1714         struct cache_tree *inode_cache;
1715         struct btrfs_inode_extref *extref;
1716         char namebuf[BTRFS_NAME_LEN];
1717
1718         inode_cache = &active_node->inode_cache;
1719
1720         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1721         total = btrfs_item_size_nr(eb, slot);
1722         while (cur < total) {
1723                 name_len = btrfs_inode_extref_name_len(eb, extref);
1724                 index = btrfs_inode_extref_index(eb, extref);
1725                 parent = btrfs_inode_extref_parent(eb, extref);
1726                 if (name_len <= BTRFS_NAME_LEN) {
1727                         len = name_len;
1728                         error = 0;
1729                 } else {
1730                         len = BTRFS_NAME_LEN;
1731                         error = REF_ERR_NAME_TOO_LONG;
1732                 }
1733                 read_extent_buffer(eb, namebuf,
1734                                    (unsigned long)(extref + 1), len);
1735                 add_inode_backref(inode_cache, key->objectid, parent,
1736                                   index, namebuf, len, 0, key->type, error);
1737
1738                 len = sizeof(*extref) + name_len;
1739                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1740                 cur += len;
1741         }
1742         return 0;
1743
1744 }
1745
1746 static int count_csum_range(struct btrfs_root *root, u64 start,
1747                             u64 len, u64 *found)
1748 {
1749         struct btrfs_key key;
1750         struct btrfs_path path;
1751         struct extent_buffer *leaf;
1752         int ret;
1753         size_t size;
1754         *found = 0;
1755         u64 csum_end;
1756         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1757
1758         btrfs_init_path(&path);
1759
1760         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1761         key.offset = start;
1762         key.type = BTRFS_EXTENT_CSUM_KEY;
1763
1764         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1765                                 &key, &path, 0, 0);
1766         if (ret < 0)
1767                 goto out;
1768         if (ret > 0 && path.slots[0] > 0) {
1769                 leaf = path.nodes[0];
1770                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1771                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1772                     key.type == BTRFS_EXTENT_CSUM_KEY)
1773                         path.slots[0]--;
1774         }
1775
1776         while (len > 0) {
1777                 leaf = path.nodes[0];
1778                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1779                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1780                         if (ret > 0)
1781                                 break;
1782                         else if (ret < 0)
1783                                 goto out;
1784                         leaf = path.nodes[0];
1785                 }
1786
1787                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1788                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1789                     key.type != BTRFS_EXTENT_CSUM_KEY)
1790                         break;
1791
1792                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1793                 if (key.offset >= start + len)
1794                         break;
1795
1796                 if (key.offset > start)
1797                         start = key.offset;
1798
1799                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1800                 csum_end = key.offset + (size / csum_size) *
1801                            root->fs_info->sectorsize;
1802                 if (csum_end > start) {
1803                         size = min(csum_end - start, len);
1804                         len -= size;
1805                         start += size;
1806                         *found += size;
1807                 }
1808
1809                 path.slots[0]++;
1810         }
1811 out:
1812         btrfs_release_path(&path);
1813         if (ret < 0)
1814                 return ret;
1815         return 0;
1816 }
1817
1818 static int process_file_extent(struct btrfs_root *root,
1819                                 struct extent_buffer *eb,
1820                                 int slot, struct btrfs_key *key,
1821                                 struct shared_node *active_node)
1822 {
1823         struct inode_record *rec;
1824         struct btrfs_file_extent_item *fi;
1825         u64 num_bytes = 0;
1826         u64 disk_bytenr = 0;
1827         u64 extent_offset = 0;
1828         u64 mask = root->fs_info->sectorsize - 1;
1829         int extent_type;
1830         int ret;
1831
1832         rec = active_node->current;
1833         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1834         rec->found_file_extent = 1;
1835
1836         if (rec->extent_start == (u64)-1) {
1837                 rec->extent_start = key->offset;
1838                 rec->extent_end = key->offset;
1839         }
1840
1841         if (rec->extent_end > key->offset)
1842                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1843         else if (rec->extent_end < key->offset) {
1844                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1845                                            key->offset - rec->extent_end);
1846                 if (ret < 0)
1847                         return ret;
1848         }
1849
1850         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1851         extent_type = btrfs_file_extent_type(eb, fi);
1852
1853         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1854                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1855                 if (num_bytes == 0)
1856                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1857                 rec->found_size += num_bytes;
1858                 num_bytes = (num_bytes + mask) & ~mask;
1859         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1860                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1861                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1862                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1863                 extent_offset = btrfs_file_extent_offset(eb, fi);
1864                 if (num_bytes == 0 || (num_bytes & mask))
1865                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1866                 if (num_bytes + extent_offset >
1867                     btrfs_file_extent_ram_bytes(eb, fi))
1868                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1869                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1870                     (btrfs_file_extent_compression(eb, fi) ||
1871                      btrfs_file_extent_encryption(eb, fi) ||
1872                      btrfs_file_extent_other_encoding(eb, fi)))
1873                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1874                 if (disk_bytenr > 0)
1875                         rec->found_size += num_bytes;
1876         } else {
1877                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1878         }
1879         rec->extent_end = key->offset + num_bytes;
1880
1881         /*
1882          * The data reloc tree will copy full extents into its inode and then
1883          * copy the corresponding csums.  Because the extent it copied could be
1884          * a preallocated extent that hasn't been written to yet there may be no
1885          * csums to copy, ergo we won't have csums for our file extent.  This is
1886          * ok so just don't bother checking csums if the inode belongs to the
1887          * data reloc tree.
1888          */
1889         if (disk_bytenr > 0 &&
1890             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1891                 u64 found;
1892                 if (btrfs_file_extent_compression(eb, fi))
1893                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1894                 else
1895                         disk_bytenr += extent_offset;
1896
1897                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1898                 if (ret < 0)
1899                         return ret;
1900                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1901                         if (found > 0)
1902                                 rec->found_csum_item = 1;
1903                         if (found < num_bytes)
1904                                 rec->some_csum_missing = 1;
1905                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1906                         if (found > 0)
1907                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1908                 }
1909         }
1910         return 0;
1911 }
1912
1913 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1914                             struct walk_control *wc)
1915 {
1916         struct btrfs_key key;
1917         u32 nritems;
1918         int i;
1919         int ret = 0;
1920         struct cache_tree *inode_cache;
1921         struct shared_node *active_node;
1922
1923         if (wc->root_level == wc->active_node &&
1924             btrfs_root_refs(&root->root_item) == 0)
1925                 return 0;
1926
1927         active_node = wc->nodes[wc->active_node];
1928         inode_cache = &active_node->inode_cache;
1929         nritems = btrfs_header_nritems(eb);
1930         for (i = 0; i < nritems; i++) {
1931                 btrfs_item_key_to_cpu(eb, &key, i);
1932
1933                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1934                         continue;
1935                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1936                         continue;
1937
1938                 if (active_node->current == NULL ||
1939                     active_node->current->ino < key.objectid) {
1940                         if (active_node->current) {
1941                                 active_node->current->checked = 1;
1942                                 maybe_free_inode_rec(inode_cache,
1943                                                      active_node->current);
1944                         }
1945                         active_node->current = get_inode_rec(inode_cache,
1946                                                              key.objectid, 1);
1947                         BUG_ON(IS_ERR(active_node->current));
1948                 }
1949                 switch (key.type) {
1950                 case BTRFS_DIR_ITEM_KEY:
1951                 case BTRFS_DIR_INDEX_KEY:
1952                         ret = process_dir_item(eb, i, &key, active_node);
1953                         break;
1954                 case BTRFS_INODE_REF_KEY:
1955                         ret = process_inode_ref(eb, i, &key, active_node);
1956                         break;
1957                 case BTRFS_INODE_EXTREF_KEY:
1958                         ret = process_inode_extref(eb, i, &key, active_node);
1959                         break;
1960                 case BTRFS_INODE_ITEM_KEY:
1961                         ret = process_inode_item(eb, i, &key, active_node);
1962                         break;
1963                 case BTRFS_EXTENT_DATA_KEY:
1964                         ret = process_file_extent(root, eb, i, &key,
1965                                                   active_node);
1966                         break;
1967                 default:
1968                         break;
1969                 };
1970         }
1971         return ret;
1972 }
1973
1974 struct node_refs {
1975         u64 bytenr[BTRFS_MAX_LEVEL];
1976         u64 refs[BTRFS_MAX_LEVEL];
1977         int need_check[BTRFS_MAX_LEVEL];
1978         /* field for checking all trees */
1979         int checked[BTRFS_MAX_LEVEL];
1980         /* the corresponding extent should be marked as full backref or not */
1981         int full_backref[BTRFS_MAX_LEVEL];
1982 };
1983
1984 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1985                              struct extent_buffer *eb, struct node_refs *nrefs,
1986                              u64 level, int check_all);
1987 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1988                             unsigned int ext_ref);
1989
1990 /*
1991  * Returns >0  Found error, not fatal, should continue
1992  * Returns <0  Fatal error, must exit the whole check
1993  * Returns 0   No errors found
1994  */
1995 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1996                                struct node_refs *nrefs, int *level, int ext_ref)
1997 {
1998         struct extent_buffer *cur = path->nodes[0];
1999         struct btrfs_key key;
2000         u64 cur_bytenr;
2001         u32 nritems;
2002         u64 first_ino = 0;
2003         int root_level = btrfs_header_level(root->node);
2004         int i;
2005         int ret = 0; /* Final return value */
2006         int err = 0; /* Positive error bitmap */
2007
2008         cur_bytenr = cur->start;
2009
2010         /* skip to first inode item or the first inode number change */
2011         nritems = btrfs_header_nritems(cur);
2012         for (i = 0; i < nritems; i++) {
2013                 btrfs_item_key_to_cpu(cur, &key, i);
2014                 if (i == 0)
2015                         first_ino = key.objectid;
2016                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2017                     (first_ino && first_ino != key.objectid))
2018                         break;
2019         }
2020         if (i == nritems) {
2021                 path->slots[0] = nritems;
2022                 return 0;
2023         }
2024         path->slots[0] = i;
2025
2026 again:
2027         err |= check_inode_item(root, path, ext_ref);
2028
2029         /* modify cur since check_inode_item may change path */
2030         cur = path->nodes[0];
2031
2032         if (err & LAST_ITEM)
2033                 goto out;
2034
2035         /* still have inode items in thie leaf */
2036         if (cur->start == cur_bytenr)
2037                 goto again;
2038
2039         /*
2040          * we have switched to another leaf, above nodes may
2041          * have changed, here walk down the path, if a node
2042          * or leaf is shared, check whether we can skip this
2043          * node or leaf.
2044          */
2045         for (i = root_level; i >= 0; i--) {
2046                 if (path->nodes[i]->start == nrefs->bytenr[i])
2047                         continue;
2048
2049                 ret = update_nodes_refs(root, path->nodes[i]->start,
2050                                 path->nodes[i], nrefs, i, 0);
2051                 if (ret)
2052                         goto out;
2053
2054                 if (!nrefs->need_check[i]) {
2055                         *level += 1;
2056                         break;
2057                 }
2058         }
2059
2060         for (i = 0; i < *level; i++) {
2061                 free_extent_buffer(path->nodes[i]);
2062                 path->nodes[i] = NULL;
2063         }
2064 out:
2065         err &= ~LAST_ITEM;
2066         if (err && !ret)
2067                 ret = err;
2068         return ret;
2069 }
2070
2071 static void reada_walk_down(struct btrfs_root *root,
2072                             struct extent_buffer *node, int slot)
2073 {
2074         struct btrfs_fs_info *fs_info = root->fs_info;
2075         u64 bytenr;
2076         u64 ptr_gen;
2077         u32 nritems;
2078         int i;
2079         int level;
2080
2081         level = btrfs_header_level(node);
2082         if (level != 1)
2083                 return;
2084
2085         nritems = btrfs_header_nritems(node);
2086         for (i = slot; i < nritems; i++) {
2087                 bytenr = btrfs_node_blockptr(node, i);
2088                 ptr_gen = btrfs_node_ptr_generation(node, i);
2089                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2090         }
2091 }
2092
2093 /*
2094  * Check the child node/leaf by the following condition:
2095  * 1. the first item key of the node/leaf should be the same with the one
2096  *    in parent.
2097  * 2. block in parent node should match the child node/leaf.
2098  * 3. generation of parent node and child's header should be consistent.
2099  *
2100  * Or the child node/leaf pointed by the key in parent is not valid.
2101  *
2102  * We hope to check leaf owner too, but since subvol may share leaves,
2103  * which makes leaf owner check not so strong, key check should be
2104  * sufficient enough for that case.
2105  */
2106 static int check_child_node(struct extent_buffer *parent, int slot,
2107                             struct extent_buffer *child)
2108 {
2109         struct btrfs_key parent_key;
2110         struct btrfs_key child_key;
2111         int ret = 0;
2112
2113         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2114         if (btrfs_header_level(child) == 0)
2115                 btrfs_item_key_to_cpu(child, &child_key, 0);
2116         else
2117                 btrfs_node_key_to_cpu(child, &child_key, 0);
2118
2119         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2120                 ret = -EINVAL;
2121                 fprintf(stderr,
2122                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2123                         parent_key.objectid, parent_key.type, parent_key.offset,
2124                         child_key.objectid, child_key.type, child_key.offset);
2125         }
2126         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2127                 ret = -EINVAL;
2128                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2129                         btrfs_node_blockptr(parent, slot),
2130                         btrfs_header_bytenr(child));
2131         }
2132         if (btrfs_node_ptr_generation(parent, slot) !=
2133             btrfs_header_generation(child)) {
2134                 ret = -EINVAL;
2135                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2136                         btrfs_header_generation(child),
2137                         btrfs_node_ptr_generation(parent, slot));
2138         }
2139         return ret;
2140 }
2141
2142 /*
2143  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2144  * in every fs or file tree check. Here we find its all root ids, and only check
2145  * it in the fs or file tree which has the smallest root id.
2146  */
2147 static int need_check(struct btrfs_root *root, struct ulist *roots)
2148 {
2149         struct rb_node *node;
2150         struct ulist_node *u;
2151
2152         /*
2153          * @roots can be empty if it belongs to tree reloc tree
2154          * In that case, we should always check the leaf, as we can't use
2155          * the tree owner to ensure some other root will check it.
2156          */
2157         if (roots->nnodes == 1 || roots->nnodes == 0)
2158                 return 1;
2159
2160         node = rb_first(&roots->root);
2161         u = rb_entry(node, struct ulist_node, rb_node);
2162         /*
2163          * current root id is not smallest, we skip it and let it be checked
2164          * in the fs or file tree who hash the smallest root id.
2165          */
2166         if (root->objectid != u->val)
2167                 return 0;
2168
2169         return 1;
2170 }
2171
2172 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
2173                                u64 *flags_ret)
2174 {
2175         struct btrfs_root *extent_root = root->fs_info->extent_root;
2176         struct btrfs_root_item *ri = &root->root_item;
2177         struct btrfs_extent_inline_ref *iref;
2178         struct btrfs_extent_item *ei;
2179         struct btrfs_key key;
2180         struct btrfs_path *path = NULL;
2181         unsigned long ptr;
2182         unsigned long end;
2183         u64 flags;
2184         u64 owner = 0;
2185         u64 offset;
2186         int slot;
2187         int type;
2188         int ret = 0;
2189
2190         /*
2191          * Except file/reloc tree, we can not have FULL BACKREF MODE
2192          */
2193         if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
2194                 goto normal;
2195
2196         /* root node */
2197         if (eb->start == btrfs_root_bytenr(ri))
2198                 goto normal;
2199
2200         if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
2201                 goto full_backref;
2202
2203         owner = btrfs_header_owner(eb);
2204         if (owner == root->objectid)
2205                 goto normal;
2206
2207         path = btrfs_alloc_path();
2208         if (!path)
2209                 return -ENOMEM;
2210
2211         key.objectid = btrfs_header_bytenr(eb);
2212         key.type = (u8)-1;
2213         key.offset = (u64)-1;
2214
2215         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2216         if (ret <= 0) {
2217                 ret = -EIO;
2218                 goto out;
2219         }
2220
2221         if (ret > 0) {
2222                 ret = btrfs_previous_extent_item(extent_root, path,
2223                                                  key.objectid);
2224                 if (ret)
2225                         goto full_backref;
2226
2227         }
2228         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2229
2230         eb = path->nodes[0];
2231         slot = path->slots[0];
2232         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
2233
2234         flags = btrfs_extent_flags(eb, ei);
2235         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2236                 goto full_backref;
2237
2238         ptr = (unsigned long)(ei + 1);
2239         end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
2240
2241         if (key.type == BTRFS_EXTENT_ITEM_KEY)
2242                 ptr += sizeof(struct btrfs_tree_block_info);
2243
2244 next:
2245         /* Reached extent item ends normally */
2246         if (ptr == end)
2247                 goto full_backref;
2248
2249         /* Beyond extent item end, wrong item size */
2250         if (ptr > end) {
2251                 error("extent item at bytenr %llu slot %d has wrong size",
2252                         eb->start, slot);
2253                 goto full_backref;
2254         }
2255
2256         iref = (struct btrfs_extent_inline_ref *)ptr;
2257         offset = btrfs_extent_inline_ref_offset(eb, iref);
2258         type = btrfs_extent_inline_ref_type(eb, iref);
2259
2260         if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
2261                 goto normal;
2262         ptr += btrfs_extent_inline_ref_size(type);
2263         goto next;
2264
2265 normal:
2266         *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
2267         goto out;
2268
2269 full_backref:
2270         *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2271 out:
2272         btrfs_free_path(path);
2273         return ret;
2274 }
2275
2276 /*
2277  * for a tree node or leaf, we record its reference count, so later if we still
2278  * process this node or leaf, don't need to compute its reference count again.
2279  *
2280  * @bytenr  if @bytenr == (u64)-1, only update nrefs->full_backref[level]
2281  */
2282 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2283                              struct extent_buffer *eb, struct node_refs *nrefs,
2284                              u64 level, int check_all)
2285 {
2286         struct ulist *roots;
2287         u64 refs = 0;
2288         u64 flags = 0;
2289         int root_level = btrfs_header_level(root->node);
2290         int check;
2291         int ret;
2292
2293         if (nrefs->bytenr[level] == bytenr)
2294                 return 0;
2295
2296         if (bytenr != (u64)-1) {
2297                 /* the return value of this function seems a mistake */
2298                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2299                                        level, 1, &refs, &flags);
2300                 /* temporary fix */
2301                 if (ret < 0 && !check_all)
2302                         return ret;
2303
2304                 nrefs->bytenr[level] = bytenr;
2305                 nrefs->refs[level] = refs;
2306                 nrefs->full_backref[level] = 0;
2307                 nrefs->checked[level] = 0;
2308
2309                 if (refs > 1) {
2310                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2311                                                    0, &roots);
2312                         if (ret)
2313                                 return -EIO;
2314
2315                         check = need_check(root, roots);
2316                         ulist_free(roots);
2317                         nrefs->need_check[level] = check;
2318                 } else {
2319                         if (!check_all) {
2320                                 nrefs->need_check[level] = 1;
2321                         } else {
2322                                 if (level == root_level) {
2323                                         nrefs->need_check[level] = 1;
2324                                 } else {
2325                                         /*
2326                                          * The node refs may have not been
2327                                          * updated if upper needs checking (the
2328                                          * lowest root_objectid) the node can
2329                                          * be checked.
2330                                          */
2331                                         nrefs->need_check[level] =
2332                                                 nrefs->need_check[level + 1];
2333                                 }
2334                         }
2335                 }
2336         }
2337
2338         if (check_all && eb) {
2339                 calc_extent_flag_v2(root, eb, &flags);
2340                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2341                         nrefs->full_backref[level] = 1;
2342         }
2343
2344         return 0;
2345 }
2346
2347 /*
2348  * @level           if @level == -1 means extent data item
2349  *                  else normal treeblocl.
2350  */
2351 static int should_check_extent_strictly(struct btrfs_root *root,
2352                                         struct node_refs *nrefs, int level)
2353 {
2354         int root_level = btrfs_header_level(root->node);
2355
2356         if (level > root_level || level < -1)
2357                 return 1;
2358         if (level == root_level)
2359                 return 1;
2360         /*
2361          * if the upper node is marked full backref, it should contain shared
2362          * backref of the parent (except owner == root->objectid).
2363          */
2364         while (++level <= root_level)
2365                 if (nrefs->refs[level] > 1)
2366                         return 0;
2367
2368         return 1;
2369 }
2370
2371 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2372                           struct walk_control *wc, int *level,
2373                           struct node_refs *nrefs)
2374 {
2375         enum btrfs_tree_block_status status;
2376         u64 bytenr;
2377         u64 ptr_gen;
2378         struct btrfs_fs_info *fs_info = root->fs_info;
2379         struct extent_buffer *next;
2380         struct extent_buffer *cur;
2381         int ret, err = 0;
2382         u64 refs;
2383
2384         WARN_ON(*level < 0);
2385         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2386
2387         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2388                 refs = nrefs->refs[*level];
2389                 ret = 0;
2390         } else {
2391                 ret = btrfs_lookup_extent_info(NULL, root,
2392                                        path->nodes[*level]->start,
2393                                        *level, 1, &refs, NULL);
2394                 if (ret < 0) {
2395                         err = ret;
2396                         goto out;
2397                 }
2398                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2399                 nrefs->refs[*level] = refs;
2400         }
2401
2402         if (refs > 1) {
2403                 ret = enter_shared_node(root, path->nodes[*level]->start,
2404                                         refs, wc, *level);
2405                 if (ret > 0) {
2406                         err = ret;
2407                         goto out;
2408                 }
2409         }
2410
2411         while (*level >= 0) {
2412                 WARN_ON(*level < 0);
2413                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2414                 cur = path->nodes[*level];
2415
2416                 if (btrfs_header_level(cur) != *level)
2417                         WARN_ON(1);
2418
2419                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2420                         break;
2421                 if (*level == 0) {
2422                         ret = process_one_leaf(root, cur, wc);
2423                         if (ret < 0)
2424                                 err = ret;
2425                         break;
2426                 }
2427                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2428                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2429
2430                 if (bytenr == nrefs->bytenr[*level - 1]) {
2431                         refs = nrefs->refs[*level - 1];
2432                 } else {
2433                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2434                                         *level - 1, 1, &refs, NULL);
2435                         if (ret < 0) {
2436                                 refs = 0;
2437                         } else {
2438                                 nrefs->bytenr[*level - 1] = bytenr;
2439                                 nrefs->refs[*level - 1] = refs;
2440                         }
2441                 }
2442
2443                 if (refs > 1) {
2444                         ret = enter_shared_node(root, bytenr, refs,
2445                                                 wc, *level - 1);
2446                         if (ret > 0) {
2447                                 path->slots[*level]++;
2448                                 continue;
2449                         }
2450                 }
2451
2452                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2453                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2454                         free_extent_buffer(next);
2455                         reada_walk_down(root, cur, path->slots[*level]);
2456                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2457                         if (!extent_buffer_uptodate(next)) {
2458                                 struct btrfs_key node_key;
2459
2460                                 btrfs_node_key_to_cpu(path->nodes[*level],
2461                                                       &node_key,
2462                                                       path->slots[*level]);
2463                                 btrfs_add_corrupt_extent_record(root->fs_info,
2464                                                 &node_key,
2465                                                 path->nodes[*level]->start,
2466                                                 root->fs_info->nodesize,
2467                                                 *level);
2468                                 err = -EIO;
2469                                 goto out;
2470                         }
2471                 }
2472
2473                 ret = check_child_node(cur, path->slots[*level], next);
2474                 if (ret) {
2475                         free_extent_buffer(next);
2476                         err = ret;
2477                         goto out;
2478                 }
2479
2480                 if (btrfs_is_leaf(next))
2481                         status = btrfs_check_leaf(root, NULL, next);
2482                 else
2483                         status = btrfs_check_node(root, NULL, next);
2484                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2485                         free_extent_buffer(next);
2486                         err = -EIO;
2487                         goto out;
2488                 }
2489
2490                 *level = *level - 1;
2491                 free_extent_buffer(path->nodes[*level]);
2492                 path->nodes[*level] = next;
2493                 path->slots[*level] = 0;
2494         }
2495 out:
2496         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2497         return err;
2498 }
2499
2500 static int fs_root_objectid(u64 objectid);
2501
2502 /*
2503  * Update global fs information.
2504  */
2505 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2506                          int level)
2507 {
2508         u32 free_nrs;
2509         struct extent_buffer *eb = path->nodes[level];
2510
2511         total_btree_bytes += eb->len;
2512         if (fs_root_objectid(root->objectid))
2513                 total_fs_tree_bytes += eb->len;
2514         if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2515                 total_extent_tree_bytes += eb->len;
2516
2517         if (level == 0) {
2518                 btree_space_waste += btrfs_leaf_free_space(root, eb);
2519         } else {
2520                 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root) -
2521                             btrfs_header_nritems(eb));
2522                 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2523         }
2524 }
2525
2526 /*
2527  * This function only handles BACKREF_MISSING,
2528  * If corresponding extent item exists, increase the ref, else insert an extent
2529  * item and backref.
2530  *
2531  * Returns error bits after repair.
2532  */
2533 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2534                                  struct btrfs_root *root,
2535                                  struct extent_buffer *node,
2536                                  struct node_refs *nrefs, int level, int err)
2537 {
2538         struct btrfs_fs_info *fs_info = root->fs_info;
2539         struct btrfs_root *extent_root = fs_info->extent_root;
2540         struct btrfs_path path;
2541         struct btrfs_extent_item *ei;
2542         struct btrfs_tree_block_info *bi;
2543         struct btrfs_key key;
2544         struct extent_buffer *eb;
2545         u32 size = sizeof(*ei);
2546         u32 node_size = root->fs_info->nodesize;
2547         int insert_extent = 0;
2548         int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2549         int root_level = btrfs_header_level(root->node);
2550         int generation;
2551         int ret;
2552         u64 owner;
2553         u64 bytenr;
2554         u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2555         u64 parent = 0;
2556
2557         if ((err & BACKREF_MISSING) == 0)
2558                 return err;
2559
2560         WARN_ON(level > BTRFS_MAX_LEVEL);
2561         WARN_ON(level < 0);
2562
2563         btrfs_init_path(&path);
2564         bytenr = btrfs_header_bytenr(node);
2565         owner = btrfs_header_owner(node);
2566         generation = btrfs_header_generation(node);
2567
2568         key.objectid = bytenr;
2569         key.type = (u8)-1;
2570         key.offset = (u64)-1;
2571
2572         /* Search for the extent item */
2573         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2574         if (ret <= 0) {
2575                 ret = -EIO;
2576                 goto out;
2577         }
2578
2579         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2580         if (ret)
2581                 insert_extent = 1;
2582
2583         /* calculate if the extent item flag is full backref or not */
2584         if (nrefs->full_backref[level] != 0)
2585                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2586
2587         /* insert an extent item */
2588         if (insert_extent) {
2589                 struct btrfs_disk_key copy_key;
2590
2591                 generation = btrfs_header_generation(node);
2592
2593                 if (level < root_level && nrefs->full_backref[level + 1] &&
2594                     owner != root->objectid) {
2595                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2596                 }
2597
2598                 key.objectid = bytenr;
2599                 if (!skinny_metadata) {
2600                         key.type = BTRFS_EXTENT_ITEM_KEY;
2601                         key.offset = node_size;
2602                         size += sizeof(*bi);
2603                 } else {
2604                         key.type = BTRFS_METADATA_ITEM_KEY;
2605                         key.offset = level;
2606                 }
2607
2608                 btrfs_release_path(&path);
2609                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2610                                               size);
2611                 if (ret)
2612                         goto out;
2613
2614                 eb = path.nodes[0];
2615                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2616
2617                 btrfs_set_extent_refs(eb, ei, 0);
2618                 btrfs_set_extent_generation(eb, ei, generation);
2619                 btrfs_set_extent_flags(eb, ei, flags);
2620
2621                 if (!skinny_metadata) {
2622                         bi = (struct btrfs_tree_block_info *)(ei + 1);
2623                         memset_extent_buffer(eb, 0, (unsigned long)bi,
2624                                              sizeof(*bi));
2625                         btrfs_set_disk_key_objectid(&copy_key, root->objectid);
2626                         btrfs_set_disk_key_type(&copy_key, 0);
2627                         btrfs_set_disk_key_offset(&copy_key, 0);
2628
2629                         btrfs_set_tree_block_level(eb, bi, level);
2630                         btrfs_set_tree_block_key(eb, bi, &copy_key);
2631                 }
2632                 btrfs_mark_buffer_dirty(eb);
2633                 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2634                 btrfs_update_block_group(trans, extent_root, bytenr, node_size,
2635                                          1, 0);
2636
2637                 nrefs->refs[level] = 0;
2638                 nrefs->full_backref[level] =
2639                         flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2640                 btrfs_release_path(&path);
2641         }
2642
2643         if (level < root_level && nrefs->full_backref[level + 1] &&
2644             owner != root->objectid)
2645                 parent = nrefs->bytenr[level + 1];
2646
2647         /* increase the ref */
2648         ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2649                         parent, root->objectid, level, 0);
2650
2651         nrefs->refs[level]++;
2652 out:
2653         btrfs_release_path(&path);
2654         if (ret) {
2655                 error(
2656         "failed to repair tree block ref start %llu root %llu due to %s",
2657                       bytenr, root->objectid, strerror(-ret));
2658         } else {
2659                 printf("Added one tree block ref start %llu %s %llu\n",
2660                        bytenr, parent ? "parent" : "root",
2661                        parent ? parent : root->objectid);
2662                 err &= ~BACKREF_MISSING;
2663         }
2664
2665         return err;
2666 }
2667
2668 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2669                             unsigned int ext_ref);
2670 static int check_tree_block_ref(struct btrfs_root *root,
2671                                 struct extent_buffer *eb, u64 bytenr,
2672                                 int level, u64 owner, struct node_refs *nrefs);
2673 static int check_leaf_items(struct btrfs_trans_handle *trans,
2674                             struct btrfs_root *root, struct btrfs_path *path,
2675                             struct node_refs *nrefs, int account_bytes);
2676
2677 /*
2678  * @trans      just for lowmem repair mode
2679  * @check all  if not 0 then check all tree block backrefs and items
2680  *             0 then just check relationship of items in fs tree(s)
2681  *
2682  * Returns >0  Found error, should continue
2683  * Returns <0  Fatal error, must exit the whole check
2684  * Returns 0   No errors found
2685  */
2686 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2687                              struct btrfs_root *root, struct btrfs_path *path,
2688                              int *level, struct node_refs *nrefs, int ext_ref,
2689                              int check_all)
2690
2691 {
2692         enum btrfs_tree_block_status status;
2693         u64 bytenr;
2694         u64 ptr_gen;
2695         struct btrfs_fs_info *fs_info = root->fs_info;
2696         struct extent_buffer *next;
2697         struct extent_buffer *cur;
2698         int ret;
2699         int err = 0;
2700         int check;
2701         int account_file_data = 0;
2702
2703         WARN_ON(*level < 0);
2704         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2705
2706         ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2707                                 path->nodes[*level], nrefs, *level, check_all);
2708         if (ret < 0)
2709                 return ret;
2710
2711         while (*level >= 0) {
2712                 WARN_ON(*level < 0);
2713                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2714                 cur = path->nodes[*level];
2715                 bytenr = btrfs_header_bytenr(cur);
2716                 check = nrefs->need_check[*level];
2717
2718                 if (btrfs_header_level(cur) != *level)
2719                         WARN_ON(1);
2720                /*
2721                 * Update bytes accounting and check tree block ref
2722                 * NOTE: Doing accounting and check before checking nritems
2723                 * is necessary because of empty node/leaf.
2724                 */
2725                 if ((check_all && !nrefs->checked[*level]) ||
2726                     (!check_all && nrefs->need_check[*level])) {
2727                         ret = check_tree_block_ref(root, cur,
2728                            btrfs_header_bytenr(cur), btrfs_header_level(cur),
2729                            btrfs_header_owner(cur), nrefs);
2730
2731                         if (repair && ret)
2732                                 ret = repair_tree_block_ref(trans, root,
2733                                     path->nodes[*level], nrefs, *level, ret);
2734                         err |= ret;
2735
2736                         if (check_all && nrefs->need_check[*level] &&
2737                                 nrefs->refs[*level]) {
2738                                 account_bytes(root, path, *level);
2739                                 account_file_data = 1;
2740                         }
2741                         nrefs->checked[*level] = 1;
2742                 }
2743
2744                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2745                         break;
2746
2747                 /* Don't forgot to check leaf/node validation */
2748                 if (*level == 0) {
2749                         /* skip duplicate check */
2750                         if (check || !check_all) {
2751                                 ret = btrfs_check_leaf(root, NULL, cur);
2752                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2753                                         err |= -EIO;
2754                                         break;
2755                                 }
2756                         }
2757
2758                         ret = 0;
2759                         if (!check_all)
2760                                 ret = process_one_leaf_v2(root, path, nrefs,
2761                                                           level, ext_ref);
2762                         else
2763                                 ret = check_leaf_items(trans, root, path,
2764                                                nrefs, account_file_data);
2765                         err |= ret;
2766                         break;
2767                 } else {
2768                         if (check || !check_all) {
2769                                 ret = btrfs_check_node(root, NULL, cur);
2770                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2771                                         err |= -EIO;
2772                                         break;
2773                                 }
2774                         }
2775                 }
2776
2777                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2778                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2779
2780                 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2781                                         check_all);
2782                 if (ret < 0)
2783                         break;
2784                 /*
2785                  * check all trees in check_chunks_and_extent_v2
2786                  * check shared node once in check_fs_roots
2787                  */
2788                 if (!check_all && !nrefs->need_check[*level - 1]) {
2789                         path->slots[*level]++;
2790                         continue;
2791                 }
2792
2793                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2794                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2795                         free_extent_buffer(next);
2796                         reada_walk_down(root, cur, path->slots[*level]);
2797                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2798                         if (!extent_buffer_uptodate(next)) {
2799                                 struct btrfs_key node_key;
2800
2801                                 btrfs_node_key_to_cpu(path->nodes[*level],
2802                                                       &node_key,
2803                                                       path->slots[*level]);
2804                                 btrfs_add_corrupt_extent_record(fs_info,
2805                                         &node_key, path->nodes[*level]->start,
2806                                         fs_info->nodesize, *level);
2807                                 err |= -EIO;
2808                                 break;
2809                         }
2810                 }
2811
2812                 ret = check_child_node(cur, path->slots[*level], next);
2813                 err |= ret;
2814                 if (ret < 0) 
2815                         break;
2816
2817                 if (btrfs_is_leaf(next))
2818                         status = btrfs_check_leaf(root, NULL, next);
2819                 else
2820                         status = btrfs_check_node(root, NULL, next);
2821                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2822                         free_extent_buffer(next);
2823                         err |= -EIO;
2824                         break;
2825                 }
2826
2827                 *level = *level - 1;
2828                 free_extent_buffer(path->nodes[*level]);
2829                 path->nodes[*level] = next;
2830                 path->slots[*level] = 0;
2831                 account_file_data = 0;
2832
2833                 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2834         }
2835         return err;
2836 }
2837
2838 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2839                         struct walk_control *wc, int *level)
2840 {
2841         int i;
2842         struct extent_buffer *leaf;
2843
2844         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2845                 leaf = path->nodes[i];
2846                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2847                         path->slots[i]++;
2848                         *level = i;
2849                         return 0;
2850                 } else {
2851                         free_extent_buffer(path->nodes[*level]);
2852                         path->nodes[*level] = NULL;
2853                         BUG_ON(*level > wc->active_node);
2854                         if (*level == wc->active_node)
2855                                 leave_shared_node(root, wc, *level);
2856                         *level = i + 1;
2857                 }
2858         }
2859         return 1;
2860 }
2861
2862 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2863                            int *level)
2864 {
2865         int i;
2866         struct extent_buffer *leaf;
2867
2868         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2869                 leaf = path->nodes[i];
2870                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2871                         path->slots[i]++;
2872                         *level = i;
2873                         return 0;
2874                 } else {
2875                         free_extent_buffer(path->nodes[*level]);
2876                         path->nodes[*level] = NULL;
2877                         *level = i + 1;
2878                 }
2879         }
2880         return 1;
2881 }
2882
2883 static int check_root_dir(struct inode_record *rec)
2884 {
2885         struct inode_backref *backref;
2886         int ret = -1;
2887
2888         if (!rec->found_inode_item || rec->errors)
2889                 goto out;
2890         if (rec->nlink != 1 || rec->found_link != 0)
2891                 goto out;
2892         if (list_empty(&rec->backrefs))
2893                 goto out;
2894         backref = to_inode_backref(rec->backrefs.next);
2895         if (!backref->found_inode_ref)
2896                 goto out;
2897         if (backref->index != 0 || backref->namelen != 2 ||
2898             memcmp(backref->name, "..", 2))
2899                 goto out;
2900         if (backref->found_dir_index || backref->found_dir_item)
2901                 goto out;
2902         ret = 0;
2903 out:
2904         return ret;
2905 }
2906
2907 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2908                               struct btrfs_root *root, struct btrfs_path *path,
2909                               struct inode_record *rec)
2910 {
2911         struct btrfs_inode_item *ei;
2912         struct btrfs_key key;
2913         int ret;
2914
2915         key.objectid = rec->ino;
2916         key.type = BTRFS_INODE_ITEM_KEY;
2917         key.offset = (u64)-1;
2918
2919         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2920         if (ret < 0)
2921                 goto out;
2922         if (ret) {
2923                 if (!path->slots[0]) {
2924                         ret = -ENOENT;
2925                         goto out;
2926                 }
2927                 path->slots[0]--;
2928                 ret = 0;
2929         }
2930         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2931         if (key.objectid != rec->ino) {
2932                 ret = -ENOENT;
2933                 goto out;
2934         }
2935
2936         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2937                             struct btrfs_inode_item);
2938         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2939         btrfs_mark_buffer_dirty(path->nodes[0]);
2940         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2941         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2942                root->root_key.objectid);
2943 out:
2944         btrfs_release_path(path);
2945         return ret;
2946 }
2947
2948 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2949                                     struct btrfs_root *root,
2950                                     struct btrfs_path *path,
2951                                     struct inode_record *rec)
2952 {
2953         int ret;
2954
2955         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2956         btrfs_release_path(path);
2957         if (!ret)
2958                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2959         return ret;
2960 }
2961
2962 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2963                                struct btrfs_root *root,
2964                                struct btrfs_path *path,
2965                                struct inode_record *rec)
2966 {
2967         struct btrfs_inode_item *ei;
2968         struct btrfs_key key;
2969         int ret = 0;
2970
2971         key.objectid = rec->ino;
2972         key.type = BTRFS_INODE_ITEM_KEY;
2973         key.offset = 0;
2974
2975         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2976         if (ret) {
2977                 if (ret > 0)
2978                         ret = -ENOENT;
2979                 goto out;
2980         }
2981
2982         /* Since ret == 0, no need to check anything */
2983         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2984                             struct btrfs_inode_item);
2985         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2986         btrfs_mark_buffer_dirty(path->nodes[0]);
2987         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2988         printf("reset nbytes for ino %llu root %llu\n",
2989                rec->ino, root->root_key.objectid);
2990 out:
2991         btrfs_release_path(path);
2992         return ret;
2993 }
2994
2995 static int add_missing_dir_index(struct btrfs_root *root,
2996                                  struct cache_tree *inode_cache,
2997                                  struct inode_record *rec,
2998                                  struct inode_backref *backref)
2999 {
3000         struct btrfs_path path;
3001         struct btrfs_trans_handle *trans;
3002         struct btrfs_dir_item *dir_item;
3003         struct extent_buffer *leaf;
3004         struct btrfs_key key;
3005         struct btrfs_disk_key disk_key;
3006         struct inode_record *dir_rec;
3007         unsigned long name_ptr;
3008         u32 data_size = sizeof(*dir_item) + backref->namelen;
3009         int ret;
3010
3011         trans = btrfs_start_transaction(root, 1);
3012         if (IS_ERR(trans))
3013                 return PTR_ERR(trans);
3014
3015         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
3016                 (unsigned long long)rec->ino);
3017
3018         btrfs_init_path(&path);
3019         key.objectid = backref->dir;
3020         key.type = BTRFS_DIR_INDEX_KEY;
3021         key.offset = backref->index;
3022         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
3023         BUG_ON(ret);
3024
3025         leaf = path.nodes[0];
3026         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
3027
3028         disk_key.objectid = cpu_to_le64(rec->ino);
3029         disk_key.type = BTRFS_INODE_ITEM_KEY;
3030         disk_key.offset = 0;
3031
3032         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
3033         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
3034         btrfs_set_dir_data_len(leaf, dir_item, 0);
3035         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
3036         name_ptr = (unsigned long)(dir_item + 1);
3037         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
3038         btrfs_mark_buffer_dirty(leaf);
3039         btrfs_release_path(&path);
3040         btrfs_commit_transaction(trans, root);
3041
3042         backref->found_dir_index = 1;
3043         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
3044         BUG_ON(IS_ERR(dir_rec));
3045         if (!dir_rec)
3046                 return 0;
3047         dir_rec->found_size += backref->namelen;
3048         if (dir_rec->found_size == dir_rec->isize &&
3049             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
3050                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
3051         if (dir_rec->found_size != dir_rec->isize)
3052                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
3053
3054         return 0;
3055 }
3056
3057 static int delete_dir_index(struct btrfs_root *root,
3058                             struct inode_backref *backref)
3059 {
3060         struct btrfs_trans_handle *trans;
3061         struct btrfs_dir_item *di;
3062         struct btrfs_path path;
3063         int ret = 0;
3064
3065         trans = btrfs_start_transaction(root, 1);
3066         if (IS_ERR(trans))
3067                 return PTR_ERR(trans);
3068
3069         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
3070                 (unsigned long long)backref->dir,
3071                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
3072                 (unsigned long long)root->objectid);
3073
3074         btrfs_init_path(&path);
3075         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
3076                                     backref->name, backref->namelen,
3077                                     backref->index, -1);
3078         if (IS_ERR(di)) {
3079                 ret = PTR_ERR(di);
3080                 btrfs_release_path(&path);
3081                 btrfs_commit_transaction(trans, root);
3082                 if (ret == -ENOENT)
3083                         return 0;
3084                 return ret;
3085         }
3086
3087         if (!di)
3088                 ret = btrfs_del_item(trans, root, &path);
3089         else
3090                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
3091         BUG_ON(ret);
3092         btrfs_release_path(&path);
3093         btrfs_commit_transaction(trans, root);
3094         return ret;
3095 }
3096
3097 static int __create_inode_item(struct btrfs_trans_handle *trans,
3098                                struct btrfs_root *root, u64 ino, u64 size,
3099                                u64 nbytes, u64 nlink, u32 mode)
3100 {
3101         struct btrfs_inode_item ii;
3102         time_t now = time(NULL);
3103         int ret;
3104
3105         btrfs_set_stack_inode_size(&ii, size);
3106         btrfs_set_stack_inode_nbytes(&ii, nbytes);
3107         btrfs_set_stack_inode_nlink(&ii, nlink);
3108         btrfs_set_stack_inode_mode(&ii, mode);
3109         btrfs_set_stack_inode_generation(&ii, trans->transid);
3110         btrfs_set_stack_timespec_nsec(&ii.atime, 0);
3111         btrfs_set_stack_timespec_sec(&ii.ctime, now);
3112         btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
3113         btrfs_set_stack_timespec_sec(&ii.mtime, now);
3114         btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
3115         btrfs_set_stack_timespec_sec(&ii.otime, 0);
3116         btrfs_set_stack_timespec_nsec(&ii.otime, 0);
3117
3118         ret = btrfs_insert_inode(trans, root, ino, &ii);
3119         ASSERT(!ret);
3120
3121         warning("root %llu inode %llu recreating inode item, this may "
3122                 "be incomplete, please check permissions and content after "
3123                 "the fsck completes.\n", (unsigned long long)root->objectid,
3124                 (unsigned long long)ino);
3125
3126         return 0;
3127 }
3128
3129 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
3130                                     struct btrfs_root *root, u64 ino,
3131                                     u8 filetype)
3132 {
3133         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
3134
3135         return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
3136 }
3137
3138 static int create_inode_item(struct btrfs_root *root,
3139                              struct inode_record *rec, int root_dir)
3140 {
3141         struct btrfs_trans_handle *trans;
3142         u64 nlink = 0;
3143         u32 mode = 0;
3144         u64 size = 0;
3145         int ret;
3146
3147         trans = btrfs_start_transaction(root, 1);
3148         if (IS_ERR(trans)) {
3149                 ret = PTR_ERR(trans);
3150                 return ret;
3151         }
3152
3153         nlink = root_dir ? 1 : rec->found_link;
3154         if (rec->found_dir_item) {
3155                 if (rec->found_file_extent)
3156                         fprintf(stderr, "root %llu inode %llu has both a dir "
3157                                 "item and extents, unsure if it is a dir or a "
3158                                 "regular file so setting it as a directory\n",
3159                                 (unsigned long long)root->objectid,
3160                                 (unsigned long long)rec->ino);
3161                 mode = S_IFDIR | 0755;
3162                 size = rec->found_size;
3163         } else if (!rec->found_dir_item) {
3164                 size = rec->extent_end;
3165                 mode =  S_IFREG | 0755;
3166         }
3167
3168         ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
3169                                   nlink, mode);
3170         btrfs_commit_transaction(trans, root);
3171         return 0;
3172 }
3173
3174 static int repair_inode_backrefs(struct btrfs_root *root,
3175                                  struct inode_record *rec,
3176                                  struct cache_tree *inode_cache,
3177                                  int delete)
3178 {
3179         struct inode_backref *tmp, *backref;
3180         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3181         int ret = 0;
3182         int repaired = 0;
3183
3184         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3185                 if (!delete && rec->ino == root_dirid) {
3186                         if (!rec->found_inode_item) {
3187                                 ret = create_inode_item(root, rec, 1);
3188                                 if (ret)
3189                                         break;
3190                                 repaired++;
3191                         }
3192                 }
3193
3194                 /* Index 0 for root dir's are special, don't mess with it */
3195                 if (rec->ino == root_dirid && backref->index == 0)
3196                         continue;
3197
3198                 if (delete &&
3199                     ((backref->found_dir_index && !backref->found_inode_ref) ||
3200                      (backref->found_dir_index && backref->found_inode_ref &&
3201                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
3202                         ret = delete_dir_index(root, backref);
3203                         if (ret)
3204                                 break;
3205                         repaired++;
3206                         list_del(&backref->list);
3207                         free(backref);
3208                         continue;
3209                 }
3210
3211                 if (!delete && !backref->found_dir_index &&
3212                     backref->found_dir_item && backref->found_inode_ref) {
3213                         ret = add_missing_dir_index(root, inode_cache, rec,
3214                                                     backref);
3215                         if (ret)
3216                                 break;
3217                         repaired++;
3218                         if (backref->found_dir_item &&
3219                             backref->found_dir_index) {
3220                                 if (!backref->errors &&
3221                                     backref->found_inode_ref) {
3222                                         list_del(&backref->list);
3223                                         free(backref);
3224                                         continue;
3225                                 }
3226                         }
3227                 }
3228
3229                 if (!delete && (!backref->found_dir_index &&
3230                                 !backref->found_dir_item &&
3231                                 backref->found_inode_ref)) {
3232                         struct btrfs_trans_handle *trans;
3233                         struct btrfs_key location;
3234
3235                         ret = check_dir_conflict(root, backref->name,
3236                                                  backref->namelen,
3237                                                  backref->dir,
3238                                                  backref->index);
3239                         if (ret) {
3240                                 /*
3241                                  * let nlink fixing routine to handle it,
3242                                  * which can do it better.
3243                                  */
3244                                 ret = 0;
3245                                 break;
3246                         }
3247                         location.objectid = rec->ino;
3248                         location.type = BTRFS_INODE_ITEM_KEY;
3249                         location.offset = 0;
3250
3251                         trans = btrfs_start_transaction(root, 1);
3252                         if (IS_ERR(trans)) {
3253                                 ret = PTR_ERR(trans);
3254                                 break;
3255                         }
3256                         fprintf(stderr, "adding missing dir index/item pair "
3257                                 "for inode %llu\n",
3258                                 (unsigned long long)rec->ino);
3259                         ret = btrfs_insert_dir_item(trans, root, backref->name,
3260                                                     backref->namelen,
3261                                                     backref->dir, &location,
3262                                                     imode_to_type(rec->imode),
3263                                                     backref->index);
3264                         BUG_ON(ret);
3265                         btrfs_commit_transaction(trans, root);
3266                         repaired++;
3267                 }
3268
3269                 if (!delete && (backref->found_inode_ref &&
3270                                 backref->found_dir_index &&
3271                                 backref->found_dir_item &&
3272                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
3273                                 !rec->found_inode_item)) {
3274                         ret = create_inode_item(root, rec, 0);
3275                         if (ret)
3276                                 break;
3277                         repaired++;
3278                 }
3279
3280         }
3281         return ret ? ret : repaired;
3282 }
3283
3284 /*
3285  * To determine the file type for nlink/inode_item repair
3286  *
3287  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
3288  * Return -ENOENT if file type is not found.
3289  */
3290 static int find_file_type(struct inode_record *rec, u8 *type)
3291 {
3292         struct inode_backref *backref;
3293
3294         /* For inode item recovered case */
3295         if (rec->found_inode_item) {
3296                 *type = imode_to_type(rec->imode);
3297                 return 0;
3298         }
3299
3300         list_for_each_entry(backref, &rec->backrefs, list) {
3301                 if (backref->found_dir_index || backref->found_dir_item) {
3302                         *type = backref->filetype;
3303                         return 0;
3304                 }
3305         }
3306         return -ENOENT;
3307 }
3308
3309 /*
3310  * To determine the file name for nlink repair
3311  *
3312  * Return 0 if file name is found, set name and namelen.
3313  * Return -ENOENT if file name is not found.
3314  */
3315 static int find_file_name(struct inode_record *rec,
3316                           char *name, int *namelen)
3317 {
3318         struct inode_backref *backref;
3319
3320         list_for_each_entry(backref, &rec->backrefs, list) {
3321                 if (backref->found_dir_index || backref->found_dir_item ||
3322                     backref->found_inode_ref) {
3323                         memcpy(name, backref->name, backref->namelen);
3324                         *namelen = backref->namelen;
3325                         return 0;
3326                 }
3327         }
3328         return -ENOENT;
3329 }
3330
3331 /* Reset the nlink of the inode to the correct one */
3332 static int reset_nlink(struct btrfs_trans_handle *trans,
3333                        struct btrfs_root *root,
3334                        struct btrfs_path *path,
3335                        struct inode_record *rec)
3336 {
3337         struct inode_backref *backref;
3338         struct inode_backref *tmp;
3339         struct btrfs_key key;
3340         struct btrfs_inode_item *inode_item;
3341         int ret = 0;
3342
3343         /* We don't believe this either, reset it and iterate backref */
3344         rec->found_link = 0;
3345
3346         /* Remove all backref including the valid ones */
3347         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3348                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3349                                    backref->index, backref->name,
3350                                    backref->namelen, 0);
3351                 if (ret < 0)
3352                         goto out;
3353
3354                 /* remove invalid backref, so it won't be added back */
3355                 if (!(backref->found_dir_index &&
3356                       backref->found_dir_item &&
3357                       backref->found_inode_ref)) {
3358                         list_del(&backref->list);
3359                         free(backref);
3360                 } else {
3361                         rec->found_link++;
3362                 }
3363         }
3364
3365         /* Set nlink to 0 */
3366         key.objectid = rec->ino;
3367         key.type = BTRFS_INODE_ITEM_KEY;
3368         key.offset = 0;
3369         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3370         if (ret < 0)
3371                 goto out;
3372         if (ret > 0) {
3373                 ret = -ENOENT;
3374                 goto out;
3375         }
3376         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3377                                     struct btrfs_inode_item);
3378         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3379         btrfs_mark_buffer_dirty(path->nodes[0]);
3380         btrfs_release_path(path);
3381
3382         /*
3383          * Add back valid inode_ref/dir_item/dir_index,
3384          * add_link() will handle the nlink inc, so new nlink must be correct
3385          */
3386         list_for_each_entry(backref, &rec->backrefs, list) {
3387                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3388                                      backref->name, backref->namelen,
3389                                      backref->filetype, &backref->index, 1, 0);
3390                 if (ret < 0)
3391                         goto out;
3392         }
3393 out:
3394         btrfs_release_path(path);
3395         return ret;
3396 }
3397
3398 static int get_highest_inode(struct btrfs_trans_handle *trans,
3399                                 struct btrfs_root *root,
3400                                 struct btrfs_path *path,
3401                                 u64 *highest_ino)
3402 {
3403         struct btrfs_key key, found_key;
3404         int ret;
3405
3406         btrfs_init_path(path);
3407         key.objectid = BTRFS_LAST_FREE_OBJECTID;
3408         key.offset = -1;
3409         key.type = BTRFS_INODE_ITEM_KEY;
3410         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3411         if (ret == 1) {
3412                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3413                                 path->slots[0] - 1);
3414                 *highest_ino = found_key.objectid;
3415                 ret = 0;
3416         }
3417         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3418                 ret = -EOVERFLOW;
3419         btrfs_release_path(path);
3420         return ret;
3421 }
3422
3423 /*
3424  * Link inode to dir 'lost+found'. Increase @ref_count.
3425  *
3426  * Returns 0 means success.
3427  * Returns <0 means failure.
3428  */
3429 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3430                                    struct btrfs_root *root,
3431                                    struct btrfs_path *path,
3432                                    u64 ino, char *namebuf, u32 name_len,
3433                                    u8 filetype, u64 *ref_count)
3434 {
3435         char *dir_name = "lost+found";
3436         u64 lost_found_ino;
3437         int ret;
3438         u32 mode = 0700;
3439
3440         btrfs_release_path(path);
3441         ret = get_highest_inode(trans, root, path, &lost_found_ino);
3442         if (ret < 0)
3443                 goto out;
3444         lost_found_ino++;
3445
3446         ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3447                           BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3448                           mode);
3449         if (ret < 0) {
3450                 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3451                 goto out;
3452         }
3453         ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3454                              namebuf, name_len, filetype, NULL, 1, 0);
3455         /*
3456          * Add ".INO" suffix several times to handle case where
3457          * "FILENAME.INO" is already taken by another file.
3458          */
3459         while (ret == -EEXIST) {
3460                 /*
3461                  * Conflicting file name, add ".INO" as suffix * +1 for '.'
3462                  */
3463                 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3464                         ret = -EFBIG;
3465                         goto out;
3466                 }
3467                 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3468                          ".%llu", ino);
3469                 name_len += count_digits(ino) + 1;
3470                 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3471                                      name_len, filetype, NULL, 1, 0);
3472         }
3473         if (ret < 0) {
3474                 error("failed to link the inode %llu to %s dir: %s",
3475                       ino, dir_name, strerror(-ret));
3476                 goto out;
3477         }
3478
3479         ++*ref_count;
3480         printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3481                name_len, namebuf, dir_name);
3482 out:
3483         btrfs_release_path(path);
3484         if (ret)
3485                 error("failed to move file '%.*s' to '%s' dir", name_len,
3486                                 namebuf, dir_name);
3487         return ret;
3488 }
3489
3490 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3491                                struct btrfs_root *root,
3492                                struct btrfs_path *path,
3493                                struct inode_record *rec)
3494 {
3495         char namebuf[BTRFS_NAME_LEN] = {0};
3496         u8 type = 0;
3497         int namelen = 0;
3498         int name_recovered = 0;
3499         int type_recovered = 0;
3500         int ret = 0;
3501
3502         /*
3503          * Get file name and type first before these invalid inode ref
3504          * are deleted by remove_all_invalid_backref()
3505          */
3506         name_recovered = !find_file_name(rec, namebuf, &namelen);
3507         type_recovered = !find_file_type(rec, &type);
3508
3509         if (!name_recovered) {
3510                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3511                        rec->ino, rec->ino);
3512                 namelen = count_digits(rec->ino);
3513                 sprintf(namebuf, "%llu", rec->ino);
3514                 name_recovered = 1;
3515         }
3516         if (!type_recovered) {
3517                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3518                        rec->ino);
3519                 type = BTRFS_FT_REG_FILE;
3520                 type_recovered = 1;
3521         }
3522
3523         ret = reset_nlink(trans, root, path, rec);
3524         if (ret < 0) {
3525                 fprintf(stderr,
3526                         "Failed to reset nlink for inode %llu: %s\n",
3527                         rec->ino, strerror(-ret));
3528                 goto out;
3529         }
3530
3531         if (rec->found_link == 0) {
3532                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3533                                               namebuf, namelen, type,
3534                                               (u64 *)&rec->found_link);
3535                 if (ret)
3536                         goto out;
3537         }
3538         printf("Fixed the nlink of inode %llu\n", rec->ino);
3539 out:
3540         /*
3541          * Clear the flag anyway, or we will loop forever for the same inode
3542          * as it will not be removed from the bad inode list and the dead loop
3543          * happens.
3544          */
3545         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3546         btrfs_release_path(path);
3547         return ret;
3548 }
3549
3550 /*
3551  * Check if there is any normal(reg or prealloc) file extent for given
3552  * ino.
3553  * This is used to determine the file type when neither its dir_index/item or
3554  * inode_item exists.
3555  *
3556  * This will *NOT* report error, if any error happens, just consider it does
3557  * not have any normal file extent.
3558  */
3559 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3560 {
3561         struct btrfs_path path;
3562         struct btrfs_key key;
3563         struct btrfs_key found_key;
3564         struct btrfs_file_extent_item *fi;
3565         u8 type;
3566         int ret = 0;
3567
3568         btrfs_init_path(&path);
3569         key.objectid = ino;
3570         key.type = BTRFS_EXTENT_DATA_KEY;
3571         key.offset = 0;
3572
3573         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3574         if (ret < 0) {
3575                 ret = 0;
3576                 goto out;
3577         }
3578         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3579                 ret = btrfs_next_leaf(root, &path);
3580                 if (ret) {
3581                         ret = 0;
3582                         goto out;
3583                 }
3584         }
3585         while (1) {
3586                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3587                                       path.slots[0]);
3588                 if (found_key.objectid != ino ||
3589                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3590                         break;
3591                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3592                                     struct btrfs_file_extent_item);
3593                 type = btrfs_file_extent_type(path.nodes[0], fi);
3594                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3595                         ret = 1;
3596                         goto out;
3597                 }
3598         }
3599 out:
3600         btrfs_release_path(&path);
3601         return ret;
3602 }
3603
3604 static u32 btrfs_type_to_imode(u8 type)
3605 {
3606         static u32 imode_by_btrfs_type[] = {
3607                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3608                 [BTRFS_FT_DIR]          = S_IFDIR,
3609                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3610                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3611                 [BTRFS_FT_FIFO]         = S_IFIFO,
3612                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3613                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3614         };
3615
3616         return imode_by_btrfs_type[(type)];
3617 }
3618
3619 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3620                                 struct btrfs_root *root,
3621                                 struct btrfs_path *path,
3622                                 struct inode_record *rec)
3623 {
3624         u8 filetype;
3625         u32 mode = 0700;
3626         int type_recovered = 0;
3627         int ret = 0;
3628
3629         printf("Trying to rebuild inode:%llu\n", rec->ino);
3630
3631         type_recovered = !find_file_type(rec, &filetype);
3632
3633         /*
3634          * Try to determine inode type if type not found.
3635          *
3636          * For found regular file extent, it must be FILE.
3637          * For found dir_item/index, it must be DIR.
3638          *
3639          * For undetermined one, use FILE as fallback.
3640          *
3641          * TODO:
3642          * 1. If found backref(inode_index/item is already handled) to it,
3643          *    it must be DIR.
3644          *    Need new inode-inode ref structure to allow search for that.
3645          */
3646         if (!type_recovered) {
3647                 if (rec->found_file_extent &&
3648                     find_normal_file_extent(root, rec->ino)) {
3649                         type_recovered = 1;
3650                         filetype = BTRFS_FT_REG_FILE;
3651                 } else if (rec->found_dir_item) {
3652                         type_recovered = 1;
3653                         filetype = BTRFS_FT_DIR;
3654                 } else if (!list_empty(&rec->orphan_extents)) {
3655                         type_recovered = 1;
3656                         filetype = BTRFS_FT_REG_FILE;
3657                 } else{
3658                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3659                                rec->ino);
3660                         type_recovered = 1;
3661                         filetype = BTRFS_FT_REG_FILE;
3662                 }
3663         }
3664
3665         ret = btrfs_new_inode(trans, root, rec->ino,
3666                               mode | btrfs_type_to_imode(filetype));
3667         if (ret < 0)
3668                 goto out;
3669
3670         /*
3671          * Here inode rebuild is done, we only rebuild the inode item,
3672          * don't repair the nlink(like move to lost+found).
3673          * That is the job of nlink repair.
3674          *
3675          * We just fill the record and return
3676          */
3677         rec->found_dir_item = 1;
3678         rec->imode = mode | btrfs_type_to_imode(filetype);
3679         rec->nlink = 0;
3680         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3681         /* Ensure the inode_nlinks repair function will be called */
3682         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3683 out:
3684         return ret;
3685 }
3686
3687 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3688                                       struct btrfs_root *root,
3689                                       struct btrfs_path *path,
3690                                       struct inode_record *rec)
3691 {
3692         struct orphan_data_extent *orphan;
3693         struct orphan_data_extent *tmp;
3694         int ret = 0;
3695
3696         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3697                 /*
3698                  * Check for conflicting file extents
3699                  *
3700                  * Here we don't know whether the extents is compressed or not,
3701                  * so we can only assume it not compressed nor data offset,
3702                  * and use its disk_len as extent length.
3703                  */
3704                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3705                                        orphan->offset, orphan->disk_len, 0);
3706                 btrfs_release_path(path);
3707                 if (ret < 0)
3708                         goto out;
3709                 if (!ret) {
3710                         fprintf(stderr,
3711                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3712                                 orphan->disk_bytenr, orphan->disk_len);
3713                         ret = btrfs_free_extent(trans,
3714                                         root->fs_info->extent_root,
3715                                         orphan->disk_bytenr, orphan->disk_len,
3716                                         0, root->objectid, orphan->objectid,
3717                                         orphan->offset);
3718                         if (ret < 0)
3719                                 goto out;
3720                 }
3721                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3722                                 orphan->offset, orphan->disk_bytenr,
3723                                 orphan->disk_len, orphan->disk_len);
3724                 if (ret < 0)
3725                         goto out;
3726
3727                 /* Update file size info */
3728                 rec->found_size += orphan->disk_len;
3729                 if (rec->found_size == rec->nbytes)
3730                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3731
3732                 /* Update the file extent hole info too */
3733                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3734                                            orphan->disk_len);
3735                 if (ret < 0)
3736                         goto out;
3737                 if (RB_EMPTY_ROOT(&rec->holes))
3738                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3739
3740                 list_del(&orphan->list);
3741                 free(orphan);
3742         }
3743         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3744 out:
3745         return ret;
3746 }
3747
3748 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3749                                         struct btrfs_root *root,
3750                                         struct btrfs_path *path,
3751                                         struct inode_record *rec)
3752 {
3753         struct rb_node *node;
3754         struct file_extent_hole *hole;
3755         int found = 0;
3756         int ret = 0;
3757
3758         node = rb_first(&rec->holes);
3759
3760         while (node) {
3761                 found = 1;
3762                 hole = rb_entry(node, struct file_extent_hole, node);
3763                 ret = btrfs_punch_hole(trans, root, rec->ino,
3764                                        hole->start, hole->len);
3765                 if (ret < 0)
3766                         goto out;
3767                 ret = del_file_extent_hole(&rec->holes, hole->start,
3768                                            hole->len);
3769                 if (ret < 0)
3770                         goto out;
3771                 if (RB_EMPTY_ROOT(&rec->holes))
3772                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3773                 node = rb_first(&rec->holes);
3774         }
3775         /* special case for a file losing all its file extent */
3776         if (!found) {
3777                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3778                                        round_up(rec->isize,
3779                                                 root->fs_info->sectorsize));
3780                 if (ret < 0)
3781                         goto out;
3782         }
3783         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3784                rec->ino, root->objectid);
3785 out:
3786         return ret;
3787 }
3788
3789 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3790 {
3791         struct btrfs_trans_handle *trans;
3792         struct btrfs_path path;
3793         int ret = 0;
3794
3795         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3796                              I_ERR_NO_ORPHAN_ITEM |
3797                              I_ERR_LINK_COUNT_WRONG |
3798                              I_ERR_NO_INODE_ITEM |
3799                              I_ERR_FILE_EXTENT_ORPHAN |
3800                              I_ERR_FILE_EXTENT_DISCOUNT|
3801                              I_ERR_FILE_NBYTES_WRONG)))
3802                 return rec->errors;
3803
3804         /*
3805          * For nlink repair, it may create a dir and add link, so
3806          * 2 for parent(256)'s dir_index and dir_item
3807          * 2 for lost+found dir's inode_item and inode_ref
3808          * 1 for the new inode_ref of the file
3809          * 2 for lost+found dir's dir_index and dir_item for the file
3810          */
3811         trans = btrfs_start_transaction(root, 7);
3812         if (IS_ERR(trans))
3813                 return PTR_ERR(trans);
3814
3815         btrfs_init_path(&path);
3816         if (rec->errors & I_ERR_NO_INODE_ITEM)
3817                 ret = repair_inode_no_item(trans, root, &path, rec);
3818         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3819                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3820         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3821                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3822         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3823                 ret = repair_inode_isize(trans, root, &path, rec);
3824         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3825                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3826         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3827                 ret = repair_inode_nlinks(trans, root, &path, rec);
3828         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3829                 ret = repair_inode_nbytes(trans, root, &path, rec);
3830         btrfs_commit_transaction(trans, root);
3831         btrfs_release_path(&path);
3832         return ret;
3833 }
3834
3835 static int check_inode_recs(struct btrfs_root *root,
3836                             struct cache_tree *inode_cache)
3837 {
3838         struct cache_extent *cache;
3839         struct ptr_node *node;
3840         struct inode_record *rec;
3841         struct inode_backref *backref;
3842         int stage = 0;
3843         int ret = 0;
3844         int err = 0;
3845         u64 error = 0;
3846         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3847
3848         if (btrfs_root_refs(&root->root_item) == 0) {
3849                 if (!cache_tree_empty(inode_cache))
3850                         fprintf(stderr, "warning line %d\n", __LINE__);
3851                 return 0;
3852         }
3853
3854         /*
3855          * We need to repair backrefs first because we could change some of the
3856          * errors in the inode recs.
3857          *
3858          * We also need to go through and delete invalid backrefs first and then
3859          * add the correct ones second.  We do this because we may get EEXIST
3860          * when adding back the correct index because we hadn't yet deleted the
3861          * invalid index.
3862          *
3863          * For example, if we were missing a dir index then the directories
3864          * isize would be wrong, so if we fixed the isize to what we thought it
3865          * would be and then fixed the backref we'd still have a invalid fs, so
3866          * we need to add back the dir index and then check to see if the isize
3867          * is still wrong.
3868          */
3869         while (stage < 3) {
3870                 stage++;
3871                 if (stage == 3 && !err)
3872                         break;
3873
3874                 cache = search_cache_extent(inode_cache, 0);
3875                 while (repair && cache) {
3876                         node = container_of(cache, struct ptr_node, cache);
3877                         rec = node->data;
3878                         cache = next_cache_extent(cache);
3879
3880                         /* Need to free everything up and rescan */
3881                         if (stage == 3) {
3882                                 remove_cache_extent(inode_cache, &node->cache);
3883                                 free(node);
3884                                 free_inode_rec(rec);
3885                                 continue;
3886                         }
3887
3888                         if (list_empty(&rec->backrefs))
3889                                 continue;
3890
3891                         ret = repair_inode_backrefs(root, rec, inode_cache,
3892                                                     stage == 1);
3893                         if (ret < 0) {
3894                                 err = ret;
3895                                 stage = 2;
3896                                 break;
3897                         } if (ret > 0) {
3898                                 err = -EAGAIN;
3899                         }
3900                 }
3901         }
3902         if (err)
3903                 return err;
3904
3905         rec = get_inode_rec(inode_cache, root_dirid, 0);
3906         BUG_ON(IS_ERR(rec));
3907         if (rec) {
3908                 ret = check_root_dir(rec);
3909                 if (ret) {
3910                         fprintf(stderr, "root %llu root dir %llu error\n",
3911                                 (unsigned long long)root->root_key.objectid,
3912                                 (unsigned long long)root_dirid);
3913                         print_inode_error(root, rec);
3914                         error++;
3915                 }
3916         } else {
3917                 if (repair) {
3918                         struct btrfs_trans_handle *trans;
3919
3920                         trans = btrfs_start_transaction(root, 1);
3921                         if (IS_ERR(trans)) {
3922                                 err = PTR_ERR(trans);
3923                                 return err;
3924                         }
3925
3926                         fprintf(stderr,
3927                                 "root %llu missing its root dir, recreating\n",
3928                                 (unsigned long long)root->objectid);
3929
3930                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3931                         BUG_ON(ret);
3932
3933                         btrfs_commit_transaction(trans, root);
3934                         return -EAGAIN;
3935                 }
3936
3937                 fprintf(stderr, "root %llu root dir %llu not found\n",
3938                         (unsigned long long)root->root_key.objectid,
3939                         (unsigned long long)root_dirid);
3940         }
3941
3942         while (1) {
3943                 cache = search_cache_extent(inode_cache, 0);
3944                 if (!cache)
3945                         break;
3946                 node = container_of(cache, struct ptr_node, cache);
3947                 rec = node->data;
3948                 remove_cache_extent(inode_cache, &node->cache);
3949                 free(node);
3950                 if (rec->ino == root_dirid ||
3951                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3952                         free_inode_rec(rec);
3953                         continue;
3954                 }
3955
3956                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3957                         ret = check_orphan_item(root, rec->ino);
3958                         if (ret == 0)
3959                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3960                         if (can_free_inode_rec(rec)) {
3961                                 free_inode_rec(rec);
3962                                 continue;
3963                         }
3964                 }
3965
3966                 if (!rec->found_inode_item)
3967                         rec->errors |= I_ERR_NO_INODE_ITEM;
3968                 if (rec->found_link != rec->nlink)
3969                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3970                 if (repair) {
3971                         ret = try_repair_inode(root, rec);
3972                         if (ret == 0 && can_free_inode_rec(rec)) {
3973                                 free_inode_rec(rec);
3974                                 continue;
3975                         }
3976                         ret = 0;
3977                 }
3978
3979                 if (!(repair && ret == 0))
3980                         error++;
3981                 print_inode_error(root, rec);
3982                 list_for_each_entry(backref, &rec->backrefs, list) {
3983                         if (!backref->found_dir_item)
3984                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3985                         if (!backref->found_dir_index)
3986                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3987                         if (!backref->found_inode_ref)
3988                                 backref->errors |= REF_ERR_NO_INODE_REF;
3989                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3990                                 " namelen %u name %s filetype %d errors %x",
3991                                 (unsigned long long)backref->dir,
3992                                 (unsigned long long)backref->index,
3993                                 backref->namelen, backref->name,
3994                                 backref->filetype, backref->errors);
3995                         print_ref_error(backref->errors);
3996                 }
3997                 free_inode_rec(rec);
3998         }
3999         return (error > 0) ? -1 : 0;
4000 }
4001
4002 static struct root_record *get_root_rec(struct cache_tree *root_cache,
4003                                         u64 objectid)
4004 {
4005         struct cache_extent *cache;
4006         struct root_record *rec = NULL;
4007         int ret;
4008
4009         cache = lookup_cache_extent(root_cache, objectid, 1);
4010         if (cache) {
4011                 rec = container_of(cache, struct root_record, cache);
4012         } else {
4013                 rec = calloc(1, sizeof(*rec));
4014                 if (!rec)
4015                         return ERR_PTR(-ENOMEM);
4016                 rec->objectid = objectid;
4017                 INIT_LIST_HEAD(&rec->backrefs);
4018                 rec->cache.start = objectid;
4019                 rec->cache.size = 1;
4020
4021                 ret = insert_cache_extent(root_cache, &rec->cache);
4022                 if (ret)
4023                         return ERR_PTR(-EEXIST);
4024         }
4025         return rec;
4026 }
4027
4028 static struct root_backref *get_root_backref(struct root_record *rec,
4029                                              u64 ref_root, u64 dir, u64 index,
4030                                              const char *name, int namelen)
4031 {
4032         struct root_backref *backref;
4033
4034         list_for_each_entry(backref, &rec->backrefs, list) {
4035                 if (backref->ref_root != ref_root || backref->dir != dir ||
4036                     backref->namelen != namelen)
4037                         continue;
4038                 if (memcmp(name, backref->name, namelen))
4039                         continue;
4040                 return backref;
4041         }
4042
4043         backref = calloc(1, sizeof(*backref) + namelen + 1);
4044         if (!backref)
4045                 return NULL;
4046         backref->ref_root = ref_root;
4047         backref->dir = dir;
4048         backref->index = index;
4049         backref->namelen = namelen;
4050         memcpy(backref->name, name, namelen);
4051         backref->name[namelen] = '\0';
4052         list_add_tail(&backref->list, &rec->backrefs);
4053         return backref;
4054 }
4055
4056 static void free_root_record(struct cache_extent *cache)
4057 {
4058         struct root_record *rec;
4059         struct root_backref *backref;
4060
4061         rec = container_of(cache, struct root_record, cache);
4062         while (!list_empty(&rec->backrefs)) {
4063                 backref = to_root_backref(rec->backrefs.next);
4064                 list_del(&backref->list);
4065                 free(backref);
4066         }
4067
4068         free(rec);
4069 }
4070
4071 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
4072
4073 static int add_root_backref(struct cache_tree *root_cache,
4074                             u64 root_id, u64 ref_root, u64 dir, u64 index,
4075                             const char *name, int namelen,
4076                             int item_type, int errors)
4077 {
4078         struct root_record *rec;
4079         struct root_backref *backref;
4080
4081         rec = get_root_rec(root_cache, root_id);
4082         BUG_ON(IS_ERR(rec));
4083         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
4084         BUG_ON(!backref);
4085
4086         backref->errors |= errors;
4087
4088         if (item_type != BTRFS_DIR_ITEM_KEY) {
4089                 if (backref->found_dir_index || backref->found_back_ref ||
4090                     backref->found_forward_ref) {
4091                         if (backref->index != index)
4092                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
4093                 } else {
4094                         backref->index = index;
4095                 }
4096         }
4097
4098         if (item_type == BTRFS_DIR_ITEM_KEY) {
4099                 if (backref->found_forward_ref)
4100                         rec->found_ref++;
4101                 backref->found_dir_item = 1;
4102         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
4103                 backref->found_dir_index = 1;
4104         } else if (item_type == BTRFS_ROOT_REF_KEY) {
4105                 if (backref->found_forward_ref)
4106                         backref->errors |= REF_ERR_DUP_ROOT_REF;
4107                 else if (backref->found_dir_item)
4108                         rec->found_ref++;
4109                 backref->found_forward_ref = 1;
4110         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
4111                 if (backref->found_back_ref)
4112                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
4113                 backref->found_back_ref = 1;
4114         } else {
4115                 BUG_ON(1);
4116         }
4117
4118         if (backref->found_forward_ref && backref->found_dir_item)
4119                 backref->reachable = 1;
4120         return 0;
4121 }
4122
4123 static int merge_root_recs(struct btrfs_root *root,
4124                            struct cache_tree *src_cache,
4125                            struct cache_tree *dst_cache)
4126 {
4127         struct cache_extent *cache;
4128         struct ptr_node *node;
4129         struct inode_record *rec;
4130         struct inode_backref *backref;
4131         int ret = 0;
4132
4133         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4134                 free_inode_recs_tree(src_cache);
4135                 return 0;
4136         }
4137
4138         while (1) {
4139                 cache = search_cache_extent(src_cache, 0);
4140                 if (!cache)
4141                         break;
4142                 node = container_of(cache, struct ptr_node, cache);
4143                 rec = node->data;
4144                 remove_cache_extent(src_cache, &node->cache);
4145                 free(node);
4146
4147                 ret = is_child_root(root, root->objectid, rec->ino);
4148                 if (ret < 0)
4149                         break;
4150                 else if (ret == 0)
4151                         goto skip;
4152
4153                 list_for_each_entry(backref, &rec->backrefs, list) {
4154                         BUG_ON(backref->found_inode_ref);
4155                         if (backref->found_dir_item)
4156                                 add_root_backref(dst_cache, rec->ino,
4157                                         root->root_key.objectid, backref->dir,
4158                                         backref->index, backref->name,
4159                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
4160                                         backref->errors);
4161                         if (backref->found_dir_index)
4162                                 add_root_backref(dst_cache, rec->ino,
4163                                         root->root_key.objectid, backref->dir,
4164                                         backref->index, backref->name,
4165                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
4166                                         backref->errors);
4167                 }
4168 skip:
4169                 free_inode_rec(rec);
4170         }
4171         if (ret < 0)
4172                 return ret;
4173         return 0;
4174 }
4175
4176 static int check_root_refs(struct btrfs_root *root,
4177                            struct cache_tree *root_cache)
4178 {
4179         struct root_record *rec;
4180         struct root_record *ref_root;
4181         struct root_backref *backref;
4182         struct cache_extent *cache;
4183         int loop = 1;
4184         int ret;
4185         int error;
4186         int errors = 0;
4187
4188         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
4189         BUG_ON(IS_ERR(rec));
4190         rec->found_ref = 1;
4191
4192         /* fixme: this can not detect circular references */
4193         while (loop) {
4194                 loop = 0;
4195                 cache = search_cache_extent(root_cache, 0);
4196                 while (1) {
4197                         if (!cache)
4198                                 break;
4199                         rec = container_of(cache, struct root_record, cache);
4200                         cache = next_cache_extent(cache);
4201
4202                         if (rec->found_ref == 0)
4203                                 continue;
4204
4205                         list_for_each_entry(backref, &rec->backrefs, list) {
4206                                 if (!backref->reachable)
4207                                         continue;
4208
4209                                 ref_root = get_root_rec(root_cache,
4210                                                         backref->ref_root);
4211                                 BUG_ON(IS_ERR(ref_root));
4212                                 if (ref_root->found_ref > 0)
4213                                         continue;
4214
4215                                 backref->reachable = 0;
4216                                 rec->found_ref--;
4217                                 if (rec->found_ref == 0)
4218                                         loop = 1;
4219                         }
4220                 }
4221         }
4222
4223         cache = search_cache_extent(root_cache, 0);
4224         while (1) {
4225                 if (!cache)
4226                         break;
4227                 rec = container_of(cache, struct root_record, cache);
4228                 cache = next_cache_extent(cache);
4229
4230                 if (rec->found_ref == 0 &&
4231                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
4232                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
4233                         ret = check_orphan_item(root->fs_info->tree_root,
4234                                                 rec->objectid);
4235                         if (ret == 0)
4236                                 continue;
4237
4238                         /*
4239                          * If we don't have a root item then we likely just have
4240                          * a dir item in a snapshot for this root but no actual
4241                          * ref key or anything so it's meaningless.
4242                          */
4243                         if (!rec->found_root_item)
4244                                 continue;
4245                         errors++;
4246                         fprintf(stderr, "fs tree %llu not referenced\n",
4247                                 (unsigned long long)rec->objectid);
4248                 }
4249
4250                 error = 0;
4251                 if (rec->found_ref > 0 && !rec->found_root_item)
4252                         error = 1;
4253                 list_for_each_entry(backref, &rec->backrefs, list) {
4254                         if (!backref->found_dir_item)
4255                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
4256                         if (!backref->found_dir_index)
4257                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
4258                         if (!backref->found_back_ref)
4259                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
4260                         if (!backref->found_forward_ref)
4261                                 backref->errors |= REF_ERR_NO_ROOT_REF;
4262                         if (backref->reachable && backref->errors)
4263                                 error = 1;
4264                 }
4265                 if (!error)
4266                         continue;
4267
4268                 errors++;
4269                 fprintf(stderr, "fs tree %llu refs %u %s\n",
4270                         (unsigned long long)rec->objectid, rec->found_ref,
4271                          rec->found_root_item ? "" : "not found");
4272
4273                 list_for_each_entry(backref, &rec->backrefs, list) {
4274                         if (!backref->reachable)
4275                                 continue;
4276                         if (!backref->errors && rec->found_root_item)
4277                                 continue;
4278                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
4279                                 " index %llu namelen %u name %s errors %x\n",
4280                                 (unsigned long long)backref->ref_root,
4281                                 (unsigned long long)backref->dir,
4282                                 (unsigned long long)backref->index,
4283                                 backref->namelen, backref->name,
4284                                 backref->errors);
4285                         print_ref_error(backref->errors);
4286                 }
4287         }
4288         return errors > 0 ? 1 : 0;
4289 }
4290
4291 static int process_root_ref(struct extent_buffer *eb, int slot,
4292                             struct btrfs_key *key,
4293                             struct cache_tree *root_cache)
4294 {
4295         u64 dirid;
4296         u64 index;
4297         u32 len;
4298         u32 name_len;
4299         struct btrfs_root_ref *ref;
4300         char namebuf[BTRFS_NAME_LEN];
4301         int error;
4302
4303         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
4304
4305         dirid = btrfs_root_ref_dirid(eb, ref);
4306         index = btrfs_root_ref_sequence(eb, ref);
4307         name_len = btrfs_root_ref_name_len(eb, ref);
4308
4309         if (name_len <= BTRFS_NAME_LEN) {
4310                 len = name_len;
4311                 error = 0;
4312         } else {
4313                 len = BTRFS_NAME_LEN;
4314                 error = REF_ERR_NAME_TOO_LONG;
4315         }
4316         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
4317
4318         if (key->type == BTRFS_ROOT_REF_KEY) {
4319                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
4320                                  index, namebuf, len, key->type, error);
4321         } else {
4322                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
4323                                  index, namebuf, len, key->type, error);
4324         }
4325         return 0;
4326 }
4327
4328 static void free_corrupt_block(struct cache_extent *cache)
4329 {
4330         struct btrfs_corrupt_block *corrupt;
4331
4332         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4333         free(corrupt);
4334 }
4335
4336 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4337
4338 /*
4339  * Repair the btree of the given root.
4340  *
4341  * The fix is to remove the node key in corrupt_blocks cache_tree.
4342  * and rebalance the tree.
4343  * After the fix, the btree should be writeable.
4344  */
4345 static int repair_btree(struct btrfs_root *root,
4346                         struct cache_tree *corrupt_blocks)
4347 {
4348         struct btrfs_trans_handle *trans;
4349         struct btrfs_path path;
4350         struct btrfs_corrupt_block *corrupt;
4351         struct cache_extent *cache;
4352         struct btrfs_key key;
4353         u64 offset;
4354         int level;
4355         int ret = 0;
4356
4357         if (cache_tree_empty(corrupt_blocks))
4358                 return 0;
4359
4360         trans = btrfs_start_transaction(root, 1);
4361         if (IS_ERR(trans)) {
4362                 ret = PTR_ERR(trans);
4363                 fprintf(stderr, "Error starting transaction: %s\n",
4364                         strerror(-ret));
4365                 return ret;
4366         }
4367         btrfs_init_path(&path);
4368         cache = first_cache_extent(corrupt_blocks);
4369         while (cache) {
4370                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4371                                        cache);
4372                 level = corrupt->level;
4373                 path.lowest_level = level;
4374                 key.objectid = corrupt->key.objectid;
4375                 key.type = corrupt->key.type;
4376                 key.offset = corrupt->key.offset;
4377
4378                 /*
4379                  * Here we don't want to do any tree balance, since it may
4380                  * cause a balance with corrupted brother leaf/node,
4381                  * so ins_len set to 0 here.
4382                  * Balance will be done after all corrupt node/leaf is deleted.
4383                  */
4384                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4385                 if (ret < 0)
4386                         goto out;
4387                 offset = btrfs_node_blockptr(path.nodes[level],
4388                                              path.slots[level]);
4389
4390                 /* Remove the ptr */
4391                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4392                 if (ret < 0)
4393                         goto out;
4394                 /*
4395                  * Remove the corresponding extent
4396                  * return value is not concerned.
4397                  */
4398                 btrfs_release_path(&path);
4399                 ret = btrfs_free_extent(trans, root, offset,
4400                                 root->fs_info->nodesize, 0,
4401                                 root->root_key.objectid, level - 1, 0);
4402                 cache = next_cache_extent(cache);
4403         }
4404
4405         /* Balance the btree using btrfs_search_slot() */
4406         cache = first_cache_extent(corrupt_blocks);
4407         while (cache) {
4408                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4409                                        cache);
4410                 memcpy(&key, &corrupt->key, sizeof(key));
4411                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4412                 if (ret < 0)
4413                         goto out;
4414                 /* return will always >0 since it won't find the item */
4415                 ret = 0;
4416                 btrfs_release_path(&path);
4417                 cache = next_cache_extent(cache);
4418         }
4419 out:
4420         btrfs_commit_transaction(trans, root);
4421         btrfs_release_path(&path);
4422         return ret;
4423 }
4424
4425 static int check_fs_root(struct btrfs_root *root,
4426                          struct cache_tree *root_cache,
4427                          struct walk_control *wc)
4428 {
4429         int ret = 0;
4430         int err = 0;
4431         int wret;
4432         int level;
4433         struct btrfs_path path;
4434         struct shared_node root_node;
4435         struct root_record *rec;
4436         struct btrfs_root_item *root_item = &root->root_item;
4437         struct cache_tree corrupt_blocks;
4438         struct orphan_data_extent *orphan;
4439         struct orphan_data_extent *tmp;
4440         enum btrfs_tree_block_status status;
4441         struct node_refs nrefs;
4442
4443         /*
4444          * Reuse the corrupt_block cache tree to record corrupted tree block
4445          *
4446          * Unlike the usage in extent tree check, here we do it in a per
4447          * fs/subvol tree base.
4448          */
4449         cache_tree_init(&corrupt_blocks);
4450         root->fs_info->corrupt_blocks = &corrupt_blocks;
4451
4452         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4453                 rec = get_root_rec(root_cache, root->root_key.objectid);
4454                 BUG_ON(IS_ERR(rec));
4455                 if (btrfs_root_refs(root_item) > 0)
4456                         rec->found_root_item = 1;
4457         }
4458
4459         btrfs_init_path(&path);
4460         memset(&root_node, 0, sizeof(root_node));
4461         cache_tree_init(&root_node.root_cache);
4462         cache_tree_init(&root_node.inode_cache);
4463         memset(&nrefs, 0, sizeof(nrefs));
4464
4465         /* Move the orphan extent record to corresponding inode_record */
4466         list_for_each_entry_safe(orphan, tmp,
4467                                  &root->orphan_data_extents, list) {
4468                 struct inode_record *inode;
4469
4470                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4471                                       1);
4472                 BUG_ON(IS_ERR(inode));
4473                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4474                 list_move(&orphan->list, &inode->orphan_extents);
4475         }
4476
4477         level = btrfs_header_level(root->node);
4478         memset(wc->nodes, 0, sizeof(wc->nodes));
4479         wc->nodes[level] = &root_node;
4480         wc->active_node = level;
4481         wc->root_level = level;
4482
4483         /* We may not have checked the root block, lets do that now */
4484         if (btrfs_is_leaf(root->node))
4485                 status = btrfs_check_leaf(root, NULL, root->node);
4486         else
4487                 status = btrfs_check_node(root, NULL, root->node);
4488         if (status != BTRFS_TREE_BLOCK_CLEAN)
4489                 return -EIO;
4490
4491         if (btrfs_root_refs(root_item) > 0 ||
4492             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4493                 path.nodes[level] = root->node;
4494                 extent_buffer_get(root->node);
4495                 path.slots[level] = 0;
4496         } else {
4497                 struct btrfs_key key;
4498                 struct btrfs_disk_key found_key;
4499
4500                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4501                 level = root_item->drop_level;
4502                 path.lowest_level = level;
4503                 if (level > btrfs_header_level(root->node) ||
4504                     level >= BTRFS_MAX_LEVEL) {
4505                         error("ignoring invalid drop level: %u", level);
4506                         goto skip_walking;
4507                 }
4508                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4509                 if (wret < 0)
4510                         goto skip_walking;
4511                 btrfs_node_key(path.nodes[level], &found_key,
4512                                 path.slots[level]);
4513                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4514                                         sizeof(found_key)));
4515         }
4516
4517         while (1) {
4518                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4519                 if (wret < 0)
4520                         ret = wret;
4521                 if (wret != 0)
4522                         break;
4523
4524                 wret = walk_up_tree(root, &path, wc, &level);
4525                 if (wret < 0)
4526                         ret = wret;
4527                 if (wret != 0)
4528                         break;
4529         }
4530 skip_walking:
4531         btrfs_release_path(&path);
4532
4533         if (!cache_tree_empty(&corrupt_blocks)) {
4534                 struct cache_extent *cache;
4535                 struct btrfs_corrupt_block *corrupt;
4536
4537                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4538                        root->root_key.objectid);
4539                 cache = first_cache_extent(&corrupt_blocks);
4540                 while (cache) {
4541                         corrupt = container_of(cache,
4542                                                struct btrfs_corrupt_block,
4543                                                cache);
4544                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4545                                cache->start, corrupt->level,
4546                                corrupt->key.objectid, corrupt->key.type,
4547                                corrupt->key.offset);
4548                         cache = next_cache_extent(cache);
4549                 }
4550                 if (repair) {
4551                         printf("Try to repair the btree for root %llu\n",
4552                                root->root_key.objectid);
4553                         ret = repair_btree(root, &corrupt_blocks);
4554                         if (ret < 0)
4555                                 fprintf(stderr, "Failed to repair btree: %s\n",
4556                                         strerror(-ret));
4557                         if (!ret)
4558                                 printf("Btree for root %llu is fixed\n",
4559                                        root->root_key.objectid);
4560                 }
4561         }
4562
4563         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4564         if (err < 0)
4565                 ret = err;
4566
4567         if (root_node.current) {
4568                 root_node.current->checked = 1;
4569                 maybe_free_inode_rec(&root_node.inode_cache,
4570                                 root_node.current);
4571         }
4572
4573         err = check_inode_recs(root, &root_node.inode_cache);
4574         if (!ret)
4575                 ret = err;
4576
4577         free_corrupt_blocks_tree(&corrupt_blocks);
4578         root->fs_info->corrupt_blocks = NULL;
4579         free_orphan_data_extents(&root->orphan_data_extents);
4580         return ret;
4581 }
4582
4583 static int fs_root_objectid(u64 objectid)
4584 {
4585         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4586             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4587                 return 1;
4588         return is_fstree(objectid);
4589 }
4590
4591 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4592                           struct cache_tree *root_cache)
4593 {
4594         struct btrfs_path path;
4595         struct btrfs_key key;
4596         struct walk_control wc;
4597         struct extent_buffer *leaf, *tree_node;
4598         struct btrfs_root *tmp_root;
4599         struct btrfs_root *tree_root = fs_info->tree_root;
4600         int ret;
4601         int err = 0;
4602
4603         if (ctx.progress_enabled) {
4604                 ctx.tp = TASK_FS_ROOTS;
4605                 task_start(ctx.info);
4606         }
4607
4608         /*
4609          * Just in case we made any changes to the extent tree that weren't
4610          * reflected into the free space cache yet.
4611          */
4612         if (repair)
4613                 reset_cached_block_groups(fs_info);
4614         memset(&wc, 0, sizeof(wc));
4615         cache_tree_init(&wc.shared);
4616         btrfs_init_path(&path);
4617
4618 again:
4619         key.offset = 0;
4620         key.objectid = 0;
4621         key.type = BTRFS_ROOT_ITEM_KEY;
4622         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4623         if (ret < 0) {
4624                 err = 1;
4625                 goto out;
4626         }
4627         tree_node = tree_root->node;
4628         while (1) {
4629                 if (tree_node != tree_root->node) {
4630                         free_root_recs_tree(root_cache);
4631                         btrfs_release_path(&path);
4632                         goto again;
4633                 }
4634                 leaf = path.nodes[0];
4635                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4636                         ret = btrfs_next_leaf(tree_root, &path);
4637                         if (ret) {
4638                                 if (ret < 0)
4639                                         err = 1;
4640                                 break;
4641                         }
4642                         leaf = path.nodes[0];
4643                 }
4644                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4645                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4646                     fs_root_objectid(key.objectid)) {
4647                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4648                                 tmp_root = btrfs_read_fs_root_no_cache(
4649                                                 fs_info, &key);
4650                         } else {
4651                                 key.offset = (u64)-1;
4652                                 tmp_root = btrfs_read_fs_root(
4653                                                 fs_info, &key);
4654                         }
4655                         if (IS_ERR(tmp_root)) {
4656                                 err = 1;
4657                                 goto next;
4658                         }
4659                         ret = check_fs_root(tmp_root, root_cache, &wc);
4660                         if (ret == -EAGAIN) {
4661                                 free_root_recs_tree(root_cache);
4662                                 btrfs_release_path(&path);
4663                                 goto again;
4664                         }
4665                         if (ret)
4666                                 err = 1;
4667                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4668                                 btrfs_free_fs_root(tmp_root);
4669                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4670                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4671                         process_root_ref(leaf, path.slots[0], &key,
4672                                          root_cache);
4673                 }
4674 next:
4675                 path.slots[0]++;
4676         }
4677 out:
4678         btrfs_release_path(&path);
4679         if (err)
4680                 free_extent_cache_tree(&wc.shared);
4681         if (!cache_tree_empty(&wc.shared))
4682                 fprintf(stderr, "warning line %d\n", __LINE__);
4683
4684         task_stop(ctx.info);
4685
4686         return err;
4687 }
4688
4689 /*
4690  * Find the @index according by @ino and name.
4691  * Notice:time efficiency is O(N)
4692  *
4693  * @root:       the root of the fs/file tree
4694  * @index_ret:  the index as return value
4695  * @namebuf:    the name to match
4696  * @name_len:   the length of name to match
4697  * @file_type:  the file_type of INODE_ITEM to match
4698  *
4699  * Returns 0 if found and *@index_ret will be modified with right value
4700  * Returns< 0 not found and *@index_ret will be (u64)-1
4701  */
4702 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4703                           u64 *index_ret, char *namebuf, u32 name_len,
4704                           u8 file_type)
4705 {
4706         struct btrfs_path path;
4707         struct extent_buffer *node;
4708         struct btrfs_dir_item *di;
4709         struct btrfs_key key;
4710         struct btrfs_key location;
4711         char name[BTRFS_NAME_LEN] = {0};
4712
4713         u32 total;
4714         u32 cur = 0;
4715         u32 len;
4716         u32 data_len;
4717         u8 filetype;
4718         int slot;
4719         int ret;
4720
4721         ASSERT(index_ret);
4722
4723         /* search from the last index */
4724         key.objectid = dirid;
4725         key.offset = (u64)-1;
4726         key.type = BTRFS_DIR_INDEX_KEY;
4727
4728         btrfs_init_path(&path);
4729         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4730         if (ret < 0)
4731                 return ret;
4732
4733 loop:
4734         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4735         if (ret) {
4736                 ret = -ENOENT;
4737                 *index_ret = (64)-1;
4738                 goto out;
4739         }
4740         /* Check whether inode_id/filetype/name match */
4741         node = path.nodes[0];
4742         slot = path.slots[0];
4743         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4744         total = btrfs_item_size_nr(node, slot);
4745         while (cur < total) {
4746                 ret = -ENOENT;
4747                 len = btrfs_dir_name_len(node, di);
4748                 data_len = btrfs_dir_data_len(node, di);
4749
4750                 btrfs_dir_item_key_to_cpu(node, di, &location);
4751                 if (location.objectid != location_id ||
4752                     location.type != BTRFS_INODE_ITEM_KEY ||
4753                     location.offset != 0)
4754                         goto next;
4755
4756                 filetype = btrfs_dir_type(node, di);
4757                 if (file_type != filetype)
4758                         goto next;
4759
4760                 if (len > BTRFS_NAME_LEN)
4761                         len = BTRFS_NAME_LEN;
4762
4763                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4764                 if (len != name_len || strncmp(namebuf, name, len))
4765                         goto next;
4766
4767                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4768                 *index_ret = key.offset;
4769                 ret = 0;
4770                 goto out;
4771 next:
4772                 len += sizeof(*di) + data_len;
4773                 di = (struct btrfs_dir_item *)((char *)di + len);
4774                 cur += len;
4775         }
4776         goto loop;
4777
4778 out:
4779         btrfs_release_path(&path);
4780         return ret;
4781 }
4782
4783 /*
4784  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4785  * INODE_REF/INODE_EXTREF match.
4786  *
4787  * @root:       the root of the fs/file tree
4788  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4789  *              value while find index
4790  * @location_key: location key of the struct btrfs_dir_item to match
4791  * @name:       the name to match
4792  * @namelen:    the length of name
4793  * @file_type:  the type of file to math
4794  *
4795  * Return 0 if no error occurred.
4796  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4797  * DIR_ITEM/DIR_INDEX
4798  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4799  * and DIR_ITEM/DIR_INDEX mismatch
4800  */
4801 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4802                          struct btrfs_key *location_key, char *name,
4803                          u32 namelen, u8 file_type)
4804 {
4805         struct btrfs_path path;
4806         struct extent_buffer *node;
4807         struct btrfs_dir_item *di;
4808         struct btrfs_key location;
4809         char namebuf[BTRFS_NAME_LEN] = {0};
4810         u32 total;
4811         u32 cur = 0;
4812         u32 len;
4813         u32 data_len;
4814         u8 filetype;
4815         int slot;
4816         int ret;
4817
4818         /* get the index by traversing all index */
4819         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4820                 ret = find_dir_index(root, key->objectid,
4821                                      location_key->objectid, &key->offset,
4822                                      name, namelen, file_type);
4823                 if (ret)
4824                         ret = DIR_INDEX_MISSING;
4825                 return ret;
4826         }
4827
4828         btrfs_init_path(&path);
4829         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4830         if (ret) {
4831                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4832                         DIR_INDEX_MISSING;
4833                 goto out;
4834         }
4835
4836         /* Check whether inode_id/filetype/name match */
4837         node = path.nodes[0];
4838         slot = path.slots[0];
4839         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4840         total = btrfs_item_size_nr(node, slot);
4841         while (cur < total) {
4842                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4843                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4844
4845                 len = btrfs_dir_name_len(node, di);
4846                 data_len = btrfs_dir_data_len(node, di);
4847
4848                 btrfs_dir_item_key_to_cpu(node, di, &location);
4849                 if (location.objectid != location_key->objectid ||
4850                     location.type != location_key->type ||
4851                     location.offset != location_key->offset)
4852                         goto next;
4853
4854                 filetype = btrfs_dir_type(node, di);
4855                 if (file_type != filetype)
4856                         goto next;
4857
4858                 if (len > BTRFS_NAME_LEN) {
4859                         len = BTRFS_NAME_LEN;
4860                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4861                         root->objectid,
4862                         key->type == BTRFS_DIR_ITEM_KEY ?
4863                         "DIR_ITEM" : "DIR_INDEX",
4864                         key->objectid, key->offset, len);
4865                 }
4866                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4867                                    len);
4868                 if (len != namelen || strncmp(namebuf, name, len))
4869                         goto next;
4870
4871                 ret = 0;
4872                 goto out;
4873 next:
4874                 len += sizeof(*di) + data_len;
4875                 di = (struct btrfs_dir_item *)((char *)di + len);
4876                 cur += len;
4877         }
4878
4879 out:
4880         btrfs_release_path(&path);
4881         return ret;
4882 }
4883
4884 /*
4885  * Prints inode ref error message
4886  */
4887 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4888                                 u64 index, const char *namebuf, int name_len,
4889                                 u8 filetype, int err)
4890 {
4891         if (!err)
4892                 return;
4893
4894         /* root dir error */
4895         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4896                 error(
4897         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4898                       root->objectid, key->objectid, key->offset, namebuf);
4899                 return;
4900         }
4901
4902         /* normal error */
4903         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4904                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4905                       root->objectid, key->offset,
4906                       btrfs_name_hash(namebuf, name_len),
4907                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4908                       namebuf, filetype);
4909         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4910                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4911                       root->objectid, key->offset, index,
4912                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4913                       namebuf, filetype);
4914 }
4915
4916 /*
4917  * Insert the missing inode item.
4918  *
4919  * Returns 0 means success.
4920  * Returns <0 means error.
4921  */
4922 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4923                                      u8 filetype)
4924 {
4925         struct btrfs_key key;
4926         struct btrfs_trans_handle *trans;
4927         struct btrfs_path path;
4928         int ret;
4929
4930         key.objectid = ino;
4931         key.type = BTRFS_INODE_ITEM_KEY;
4932         key.offset = 0;
4933
4934         btrfs_init_path(&path);
4935         trans = btrfs_start_transaction(root, 1);
4936         if (IS_ERR(trans)) {
4937                 ret = -EIO;
4938                 goto out;
4939         }
4940
4941         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4942         if (ret < 0 || !ret)
4943                 goto fail;
4944
4945         /* insert inode item */
4946         create_inode_item_lowmem(trans, root, ino, filetype);
4947         ret = 0;
4948 fail:
4949         btrfs_commit_transaction(trans, root);
4950 out:
4951         if (ret)
4952                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4953                       root->objectid, ino);
4954         btrfs_release_path(&path);
4955         return ret;
4956 }
4957
4958 /*
4959  * The ternary means dir item, dir index and relative inode ref.
4960  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4961  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4962  * strategy:
4963  * If two of three is missing or mismatched, delete the existing one.
4964  * If one of three is missing or mismatched, add the missing one.
4965  *
4966  * returns 0 means success.
4967  * returns not 0 means on error;
4968  */
4969 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4970                           u64 index, char *name, int name_len, u8 filetype,
4971                           int err)
4972 {
4973         struct btrfs_trans_handle *trans;
4974         int stage = 0;
4975         int ret = 0;
4976
4977         /*
4978          * stage shall be one of following valild values:
4979          *      0: Fine, nothing to do.
4980          *      1: One of three is wrong, so add missing one.
4981          *      2: Two of three is wrong, so delete existed one.
4982          */
4983         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4984                 stage++;
4985         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4986                 stage++;
4987         if (err & (INODE_REF_MISSING))
4988                 stage++;
4989
4990         /* stage must be smllarer than 3 */
4991         ASSERT(stage < 3);
4992
4993         trans = btrfs_start_transaction(root, 1);
4994         if (stage == 2) {
4995                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4996                                    name_len, 0);
4997                 goto out;
4998         }
4999         if (stage == 1) {
5000                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
5001                                filetype, &index, 1, 1);
5002                 goto out;
5003         }
5004 out:
5005         btrfs_commit_transaction(trans, root);
5006
5007         if (ret)
5008                 error("fail to repair inode %llu name %s filetype %u",
5009                       ino, name, filetype);
5010         else
5011                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
5012                        stage == 2 ? "Delete" : "Add",
5013                        ino, name, filetype);
5014
5015         return ret;
5016 }
5017
5018 /*
5019  * Traverse the given INODE_REF and call find_dir_item() to find related
5020  * DIR_ITEM/DIR_INDEX.
5021  *
5022  * @root:       the root of the fs/file tree
5023  * @ref_key:    the key of the INODE_REF
5024  * @path        the path provides node and slot
5025  * @refs:       the count of INODE_REF
5026  * @mode:       the st_mode of INODE_ITEM
5027  * @name_ret:   returns with the first ref's name
5028  * @name_len_ret:    len of the name_ret
5029  *
5030  * Return 0 if no error occurred.
5031  */
5032 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5033                            struct btrfs_path *path, char *name_ret,
5034                            u32 *namelen_ret, u64 *refs_ret, int mode)
5035 {
5036         struct btrfs_key key;
5037         struct btrfs_key location;
5038         struct btrfs_inode_ref *ref;
5039         struct extent_buffer *node;
5040         char namebuf[BTRFS_NAME_LEN] = {0};
5041         u32 total;
5042         u32 cur = 0;
5043         u32 len;
5044         u32 name_len;
5045         u64 index;
5046         int ret;
5047         int err = 0;
5048         int tmp_err;
5049         int slot;
5050         int need_research = 0;
5051         u64 refs;
5052
5053 begin:
5054         err = 0;
5055         cur = 0;
5056         refs = *refs_ret;
5057
5058         /* since after repair, path and the dir item may be changed */
5059         if (need_research) {
5060                 need_research = 0;
5061                 btrfs_release_path(path);
5062                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
5063                 /* the item was deleted, let path point to the last checked item */
5064                 if (ret > 0) {
5065                         if (path->slots[0] == 0)
5066                                 btrfs_prev_leaf(root, path);
5067                         else
5068                                 path->slots[0]--;
5069                 }
5070                 if (ret)
5071                         goto out;
5072         }
5073
5074         location.objectid = ref_key->objectid;
5075         location.type = BTRFS_INODE_ITEM_KEY;
5076         location.offset = 0;
5077         node = path->nodes[0];
5078         slot = path->slots[0];
5079
5080         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5081         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5082         total = btrfs_item_size_nr(node, slot);
5083
5084 next:
5085         /* Update inode ref count */
5086         refs++;
5087         tmp_err = 0;
5088         index = btrfs_inode_ref_index(node, ref);
5089         name_len = btrfs_inode_ref_name_len(node, ref);
5090
5091         if (name_len <= BTRFS_NAME_LEN) {
5092                 len = name_len;
5093         } else {
5094                 len = BTRFS_NAME_LEN;
5095                 warning("root %llu INODE_REF[%llu %llu] name too long",
5096                         root->objectid, ref_key->objectid, ref_key->offset);
5097         }
5098
5099         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
5100
5101         /* copy the first name found to name_ret */
5102         if (refs == 1 && name_ret) {
5103                 memcpy(name_ret, namebuf, len);
5104                 *namelen_ret = len;
5105         }
5106
5107         /* Check root dir ref */
5108         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
5109                 if (index != 0 || len != strlen("..") ||
5110                     strncmp("..", namebuf, len) ||
5111                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
5112                         /* set err bits then repair will delete the ref */
5113                         err |= DIR_INDEX_MISSING;
5114                         err |= DIR_ITEM_MISSING;
5115                 }
5116                 goto end;
5117         }
5118
5119         /* Find related DIR_INDEX */
5120         key.objectid = ref_key->offset;
5121         key.type = BTRFS_DIR_INDEX_KEY;
5122         key.offset = index;
5123         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5124                             imode_to_type(mode));
5125
5126         /* Find related dir_item */
5127         key.objectid = ref_key->offset;
5128         key.type = BTRFS_DIR_ITEM_KEY;
5129         key.offset = btrfs_name_hash(namebuf, len);
5130         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5131                             imode_to_type(mode));
5132 end:
5133         if (tmp_err && repair) {
5134                 ret = repair_ternary_lowmem(root, ref_key->offset,
5135                                             ref_key->objectid, index, namebuf,
5136                                             name_len, imode_to_type(mode),
5137                                             tmp_err);
5138                 if (!ret) {
5139                         need_research = 1;
5140                         goto begin;
5141                 }
5142         }
5143         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
5144                             imode_to_type(mode), tmp_err);
5145         err |= tmp_err;
5146         len = sizeof(*ref) + name_len;
5147         ref = (struct btrfs_inode_ref *)((char *)ref + len);
5148         cur += len;
5149         if (cur < total)
5150                 goto next;
5151
5152 out:
5153         *refs_ret = refs;
5154         return err;
5155 }
5156
5157 /*
5158  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
5159  * DIR_ITEM/DIR_INDEX.
5160  *
5161  * @root:       the root of the fs/file tree
5162  * @ref_key:    the key of the INODE_EXTREF
5163  * @refs:       the count of INODE_EXTREF
5164  * @mode:       the st_mode of INODE_ITEM
5165  *
5166  * Return 0 if no error occurred.
5167  */
5168 static int check_inode_extref(struct btrfs_root *root,
5169                               struct btrfs_key *ref_key,
5170                               struct extent_buffer *node, int slot, u64 *refs,
5171                               int mode)
5172 {
5173         struct btrfs_key key;
5174         struct btrfs_key location;
5175         struct btrfs_inode_extref *extref;
5176         char namebuf[BTRFS_NAME_LEN] = {0};
5177         u32 total;
5178         u32 cur = 0;
5179         u32 len;
5180         u32 name_len;
5181         u64 index;
5182         u64 parent;
5183         int ret;
5184         int err = 0;
5185
5186         location.objectid = ref_key->objectid;
5187         location.type = BTRFS_INODE_ITEM_KEY;
5188         location.offset = 0;
5189
5190         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5191         total = btrfs_item_size_nr(node, slot);
5192
5193 next:
5194         /* update inode ref count */
5195         (*refs)++;
5196         name_len = btrfs_inode_extref_name_len(node, extref);
5197         index = btrfs_inode_extref_index(node, extref);
5198         parent = btrfs_inode_extref_parent(node, extref);
5199         if (name_len <= BTRFS_NAME_LEN) {
5200                 len = name_len;
5201         } else {
5202                 len = BTRFS_NAME_LEN;
5203                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
5204                         root->objectid, ref_key->objectid, ref_key->offset);
5205         }
5206         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
5207
5208         /* Check root dir ref name */
5209         if (index == 0 && strncmp(namebuf, "..", name_len)) {
5210                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
5211                       root->objectid, ref_key->objectid, ref_key->offset,
5212                       namebuf);
5213                 err |= ROOT_DIR_ERROR;
5214         }
5215
5216         /* find related dir_index */
5217         key.objectid = parent;
5218         key.type = BTRFS_DIR_INDEX_KEY;
5219         key.offset = index;
5220         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5221         err |= ret;
5222
5223         /* find related dir_item */
5224         key.objectid = parent;
5225         key.type = BTRFS_DIR_ITEM_KEY;
5226         key.offset = btrfs_name_hash(namebuf, len);
5227         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5228         err |= ret;
5229
5230         len = sizeof(*extref) + name_len;
5231         extref = (struct btrfs_inode_extref *)((char *)extref + len);
5232         cur += len;
5233
5234         if (cur < total)
5235                 goto next;
5236
5237         return err;
5238 }
5239
5240 /*
5241  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
5242  * DIR_ITEM/DIR_INDEX match.
5243  * Return with @index_ret.
5244  *
5245  * @root:       the root of the fs/file tree
5246  * @key:        the key of the INODE_REF/INODE_EXTREF
5247  * @name:       the name in the INODE_REF/INODE_EXTREF
5248  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
5249  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
5250  *              value (64)-1 means do not check index
5251  * @ext_ref:    the EXTENDED_IREF feature
5252  *
5253  * Return 0 if no error occurred.
5254  * Return >0 for error bitmap
5255  */
5256 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
5257                           char *name, int namelen, u64 *index_ret,
5258                           unsigned int ext_ref)
5259 {
5260         struct btrfs_path path;
5261         struct btrfs_inode_ref *ref;
5262         struct btrfs_inode_extref *extref;
5263         struct extent_buffer *node;
5264         char ref_namebuf[BTRFS_NAME_LEN] = {0};
5265         u32 total;
5266         u32 cur = 0;
5267         u32 len;
5268         u32 ref_namelen;
5269         u64 ref_index;
5270         u64 parent;
5271         u64 dir_id;
5272         int slot;
5273         int ret;
5274
5275         ASSERT(index_ret);
5276
5277         btrfs_init_path(&path);
5278         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5279         if (ret) {
5280                 ret = INODE_REF_MISSING;
5281                 goto extref;
5282         }
5283
5284         node = path.nodes[0];
5285         slot = path.slots[0];
5286
5287         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5288         total = btrfs_item_size_nr(node, slot);
5289
5290         /* Iterate all entry of INODE_REF */
5291         while (cur < total) {
5292                 ret = INODE_REF_MISSING;
5293
5294                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
5295                 ref_index = btrfs_inode_ref_index(node, ref);
5296                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5297                         goto next_ref;
5298
5299                 if (cur + sizeof(*ref) + ref_namelen > total ||
5300                     ref_namelen > BTRFS_NAME_LEN) {
5301                         warning("root %llu INODE %s[%llu %llu] name too long",
5302                                 root->objectid,
5303                                 key->type == BTRFS_INODE_REF_KEY ?
5304                                         "REF" : "EXTREF",
5305                                 key->objectid, key->offset);
5306
5307                         if (cur + sizeof(*ref) > total)
5308                                 break;
5309                         len = min_t(u32, total - cur - sizeof(*ref),
5310                                     BTRFS_NAME_LEN);
5311                 } else {
5312                         len = ref_namelen;
5313                 }
5314
5315                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
5316                                    len);
5317
5318                 if (len != namelen || strncmp(ref_namebuf, name, len))
5319                         goto next_ref;
5320
5321                 *index_ret = ref_index;
5322                 ret = 0;
5323                 goto out;
5324 next_ref:
5325                 len = sizeof(*ref) + ref_namelen;
5326                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5327                 cur += len;
5328         }
5329
5330 extref:
5331         /* Skip if not support EXTENDED_IREF feature */
5332         if (!ext_ref)
5333                 goto out;
5334
5335         btrfs_release_path(&path);
5336         btrfs_init_path(&path);
5337
5338         dir_id = key->offset;
5339         key->type = BTRFS_INODE_EXTREF_KEY;
5340         key->offset = btrfs_extref_hash(dir_id, name, namelen);
5341
5342         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5343         if (ret) {
5344                 ret = INODE_REF_MISSING;
5345                 goto out;
5346         }
5347
5348         node = path.nodes[0];
5349         slot = path.slots[0];
5350
5351         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5352         cur = 0;
5353         total = btrfs_item_size_nr(node, slot);
5354
5355         /* Iterate all entry of INODE_EXTREF */
5356         while (cur < total) {
5357                 ret = INODE_REF_MISSING;
5358
5359                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5360                 ref_index = btrfs_inode_extref_index(node, extref);
5361                 parent = btrfs_inode_extref_parent(node, extref);
5362                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5363                         goto next_extref;
5364
5365                 if (parent != dir_id)
5366                         goto next_extref;
5367
5368                 if (ref_namelen <= BTRFS_NAME_LEN) {
5369                         len = ref_namelen;
5370                 } else {
5371                         len = BTRFS_NAME_LEN;
5372                         warning("root %llu INODE %s[%llu %llu] name too long",
5373                                 root->objectid,
5374                                 key->type == BTRFS_INODE_REF_KEY ?
5375                                         "REF" : "EXTREF",
5376                                 key->objectid, key->offset);
5377                 }
5378                 read_extent_buffer(node, ref_namebuf,
5379                                    (unsigned long)(extref + 1), len);
5380
5381                 if (len != namelen || strncmp(ref_namebuf, name, len))
5382                         goto next_extref;
5383
5384                 *index_ret = ref_index;
5385                 ret = 0;
5386                 goto out;
5387
5388 next_extref:
5389                 len = sizeof(*extref) + ref_namelen;
5390                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5391                 cur += len;
5392
5393         }
5394 out:
5395         btrfs_release_path(&path);
5396         return ret;
5397 }
5398
5399 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5400                                u64 ino, u64 index, const char *namebuf,
5401                                int name_len, u8 filetype, int err)
5402 {
5403         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5404                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5405                       root->objectid, key->objectid, key->offset, namebuf,
5406                       filetype,
5407                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5408         }
5409
5410         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5411                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5412                       root->objectid, key->objectid, index, namebuf, filetype,
5413                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5414         }
5415
5416         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5417                 error(
5418                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5419                       root->objectid, ino, index, namebuf, filetype,
5420                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5421         }
5422
5423         if (err & INODE_REF_MISSING)
5424                 error(
5425                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5426                       root->objectid, ino, key->objectid, namebuf, filetype);
5427
5428 }
5429
5430 /*
5431  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5432  *
5433  * Returns error after repair
5434  */
5435 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5436                            u64 index, u8 filetype, char *namebuf, u32 name_len,
5437                            int err)
5438 {
5439         int ret;
5440
5441         if (err & INODE_ITEM_MISSING) {
5442                 ret = repair_inode_item_missing(root, ino, filetype);
5443                 if (!ret)
5444                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5445         }
5446
5447         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5448                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5449                                             name_len, filetype, err);
5450                 if (!ret) {
5451                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5452                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5453                         err &= ~(INODE_REF_MISSING);
5454                 }
5455         }
5456         return err;
5457 }
5458
5459 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5460                 u64 *size_ret)
5461 {
5462         struct btrfs_key key;
5463         struct btrfs_path path;
5464         u32 len;
5465         struct btrfs_dir_item *di;
5466         int ret;
5467         int cur = 0;
5468         int total = 0;
5469
5470         ASSERT(size_ret);
5471         *size_ret = 0;
5472
5473         key.objectid = ino;
5474         key.type = type;
5475         key.offset = (u64)-1;
5476
5477         btrfs_init_path(&path);
5478         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5479         if (ret < 0) {
5480                 ret = -EIO;
5481                 goto out;
5482         }
5483         /* if found, go to spacial case */
5484         if (ret == 0)
5485                 goto special_case;
5486
5487 loop:
5488         ret = btrfs_previous_item(root, &path, ino, type);
5489
5490         if (ret) {
5491                 ret = 0;
5492                 goto out;
5493         }
5494
5495 special_case:
5496         di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5497         cur = 0;
5498         total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5499
5500         while (cur < total) {
5501                 len = btrfs_dir_name_len(path.nodes[0], di);
5502                 if (len > BTRFS_NAME_LEN)
5503                         len = BTRFS_NAME_LEN;
5504                 *size_ret += len;
5505
5506                 len += btrfs_dir_data_len(path.nodes[0], di);
5507                 len += sizeof(*di);
5508                 di = (struct btrfs_dir_item *)((char *)di + len);
5509                 cur += len;
5510         }
5511         goto loop;
5512
5513 out:
5514         btrfs_release_path(&path);
5515         return ret;
5516 }
5517
5518 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5519 {
5520         u64 item_size;
5521         u64 index_size;
5522         int ret;
5523
5524         ASSERT(size);
5525         ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5526         if (ret)
5527                 goto out;
5528
5529         ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5530         if (ret)
5531                 goto out;
5532
5533         *size = item_size + index_size;
5534
5535 out:
5536         if (ret)
5537                 error("failed to count root %llu INODE[%llu] root size",
5538                       root->objectid, ino);
5539         return ret;
5540 }
5541
5542 /*
5543  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5544  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5545  *
5546  * @root:       the root of the fs/file tree
5547  * @key:        the key of the INODE_REF/INODE_EXTREF
5548  * @path:       the path
5549  * @size:       the st_size of the INODE_ITEM
5550  * @ext_ref:    the EXTENDED_IREF feature
5551  *
5552  * Return 0 if no error occurred.
5553  * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5554  */
5555 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5556                           struct btrfs_path *path, u64 *size,
5557                           unsigned int ext_ref)
5558 {
5559         struct btrfs_dir_item *di;
5560         struct btrfs_inode_item *ii;
5561         struct btrfs_key key;
5562         struct btrfs_key location;
5563         struct extent_buffer *node;
5564         int slot;
5565         char namebuf[BTRFS_NAME_LEN] = {0};
5566         u32 total;
5567         u32 cur = 0;
5568         u32 len;
5569         u32 name_len;
5570         u32 data_len;
5571         u8 filetype;
5572         u32 mode = 0;
5573         u64 index;
5574         int ret;
5575         int err;
5576         int tmp_err;
5577         int need_research = 0;
5578
5579         /*
5580          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5581          * ignore index check.
5582          */
5583         if (di_key->type == BTRFS_DIR_INDEX_KEY)
5584                 index = di_key->offset;
5585         else
5586                 index = (u64)-1;
5587 begin:
5588         err = 0;
5589         cur = 0;
5590
5591         /* since after repair, path and the dir item may be changed */
5592         if (need_research) {
5593                 need_research = 0;
5594                 err |= DIR_COUNT_AGAIN;
5595                 btrfs_release_path(path);
5596                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5597                 /* the item was deleted, let path point the last checked item */
5598                 if (ret > 0) {
5599                         if (path->slots[0] == 0)
5600                                 btrfs_prev_leaf(root, path);
5601                         else
5602                                 path->slots[0]--;
5603                 }
5604                 if (ret)
5605                         goto out;
5606         }
5607
5608         node = path->nodes[0];
5609         slot = path->slots[0];
5610
5611         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5612         total = btrfs_item_size_nr(node, slot);
5613         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5614
5615         while (cur < total) {
5616                 data_len = btrfs_dir_data_len(node, di);
5617                 tmp_err = 0;
5618                 if (data_len)
5619                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5620                               root->objectid,
5621               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5622                               di_key->objectid, di_key->offset, data_len);
5623
5624                 name_len = btrfs_dir_name_len(node, di);
5625                 if (name_len <= BTRFS_NAME_LEN) {
5626                         len = name_len;
5627                 } else {
5628                         len = BTRFS_NAME_LEN;
5629                         warning("root %llu %s[%llu %llu] name too long",
5630                                 root->objectid,
5631                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5632                                 di_key->objectid, di_key->offset);
5633                 }
5634                 (*size) += name_len;
5635                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5636                                    len);
5637                 filetype = btrfs_dir_type(node, di);
5638
5639                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5640                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5641                         err |= -EIO;
5642                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5643                         root->objectid, di_key->objectid, di_key->offset,
5644                         namebuf, len, filetype, di_key->offset,
5645                         btrfs_name_hash(namebuf, len));
5646                 }
5647
5648                 btrfs_dir_item_key_to_cpu(node, di, &location);
5649                 /* Ignore related ROOT_ITEM check */
5650                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5651                         goto next;
5652
5653                 btrfs_release_path(path);
5654                 /* Check relative INODE_ITEM(existence/filetype) */
5655                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5656                 if (ret) {
5657                         tmp_err |= INODE_ITEM_MISSING;
5658                         goto next;
5659                 }
5660
5661                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5662                                     struct btrfs_inode_item);
5663                 mode = btrfs_inode_mode(path->nodes[0], ii);
5664                 if (imode_to_type(mode) != filetype) {
5665                         tmp_err |= INODE_ITEM_MISMATCH;
5666                         goto next;
5667                 }
5668
5669                 /* Check relative INODE_REF/INODE_EXTREF */
5670                 key.objectid = location.objectid;
5671                 key.type = BTRFS_INODE_REF_KEY;
5672                 key.offset = di_key->objectid;
5673                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5674                                           &index, ext_ref);
5675
5676                 /* check relative INDEX/ITEM */
5677                 key.objectid = di_key->objectid;
5678                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5679                         key.type = BTRFS_DIR_INDEX_KEY;
5680                         key.offset = index;
5681                 } else {
5682                         key.type = BTRFS_DIR_ITEM_KEY;
5683                         key.offset = btrfs_name_hash(namebuf, name_len);
5684                 }
5685
5686                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5687                                          name_len, filetype);
5688                 /* find_dir_item may find index */
5689                 if (key.type == BTRFS_DIR_INDEX_KEY)
5690                         index = key.offset;
5691 next:
5692
5693                 if (tmp_err && repair) {
5694                         ret = repair_dir_item(root, di_key->objectid,
5695                                               location.objectid, index,
5696                                               imode_to_type(mode), namebuf,
5697                                               name_len, tmp_err);
5698                         if (ret != tmp_err) {
5699                                 need_research = 1;
5700                                 goto begin;
5701                         }
5702                 }
5703                 btrfs_release_path(path);
5704                 print_dir_item_err(root, di_key, location.objectid, index,
5705                                    namebuf, name_len, filetype, tmp_err);
5706                 err |= tmp_err;
5707                 len = sizeof(*di) + name_len + data_len;
5708                 di = (struct btrfs_dir_item *)((char *)di + len);
5709                 cur += len;
5710
5711                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5712                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5713                               root->objectid, di_key->objectid,
5714                               di_key->offset);
5715                         break;
5716                 }
5717         }
5718 out:
5719         /* research path */
5720         btrfs_release_path(path);
5721         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5722         if (ret)
5723                 err |= ret > 0 ? -ENOENT : ret;
5724         return err;
5725 }
5726
5727 /*
5728  * Wrapper function of btrfs_punch_hole.
5729  *
5730  * Returns 0 means success.
5731  * Returns not 0 means error.
5732  */
5733 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5734                              u64 len)
5735 {
5736         struct btrfs_trans_handle *trans;
5737         int ret = 0;
5738
5739         trans = btrfs_start_transaction(root, 1);
5740         if (IS_ERR(trans))
5741                 return PTR_ERR(trans);
5742
5743         ret = btrfs_punch_hole(trans, root, ino, start, len);
5744         if (ret)
5745                 error("failed to add hole [%llu, %llu] in inode [%llu]",
5746                       start, len, ino);
5747         else
5748                 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5749                        ino);
5750
5751         btrfs_commit_transaction(trans, root);
5752         return ret;
5753 }
5754
5755 /*
5756  * Check file extent datasum/hole, update the size of the file extents,
5757  * check and update the last offset of the file extent.
5758  *
5759  * @root:       the root of fs/file tree.
5760  * @fkey:       the key of the file extent.
5761  * @nodatasum:  INODE_NODATASUM feature.
5762  * @size:       the sum of all EXTENT_DATA items size for this inode.
5763  * @end:        the offset of the last extent.
5764  *
5765  * Return 0 if no error occurred.
5766  */
5767 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5768                              struct extent_buffer *node, int slot,
5769                              unsigned int nodatasum, u64 *size, u64 *end)
5770 {
5771         struct btrfs_file_extent_item *fi;
5772         u64 disk_bytenr;
5773         u64 disk_num_bytes;
5774         u64 extent_num_bytes;
5775         u64 extent_offset;
5776         u64 csum_found;         /* In byte size, sectorsize aligned */
5777         u64 search_start;       /* Logical range start we search for csum */
5778         u64 search_len;         /* Logical range len we search for csum */
5779         unsigned int extent_type;
5780         unsigned int is_hole;
5781         int compressed = 0;
5782         int ret;
5783         int err = 0;
5784
5785         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5786
5787         /* Check inline extent */
5788         extent_type = btrfs_file_extent_type(node, fi);
5789         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5790                 struct btrfs_item *e = btrfs_item_nr(slot);
5791                 u32 item_inline_len;
5792
5793                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5794                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5795                 compressed = btrfs_file_extent_compression(node, fi);
5796                 if (extent_num_bytes == 0) {
5797                         error(
5798                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5799                                 root->objectid, fkey->objectid, fkey->offset);
5800                         err |= FILE_EXTENT_ERROR;
5801                 }
5802                 if (!compressed && extent_num_bytes != item_inline_len) {
5803                         error(
5804                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5805                                 root->objectid, fkey->objectid, fkey->offset,
5806                                 extent_num_bytes, item_inline_len);
5807                         err |= FILE_EXTENT_ERROR;
5808                 }
5809                 *end += extent_num_bytes;
5810                 *size += extent_num_bytes;
5811                 return err;
5812         }
5813
5814         /* Check extent type */
5815         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5816                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5817                 err |= FILE_EXTENT_ERROR;
5818                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5819                       root->objectid, fkey->objectid, fkey->offset);
5820                 return err;
5821         }
5822
5823         /* Check REG_EXTENT/PREALLOC_EXTENT */
5824         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5825         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5826         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5827         extent_offset = btrfs_file_extent_offset(node, fi);
5828         compressed = btrfs_file_extent_compression(node, fi);
5829         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5830
5831         /*
5832          * Check EXTENT_DATA csum
5833          *
5834          * For plain (uncompressed) extent, we should only check the range
5835          * we're referring to, as it's possible that part of prealloc extent
5836          * has been written, and has csum:
5837          *
5838          * |<--- Original large preallocated extent A ---->|
5839          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5840          *      No csum                         Has csum
5841          *
5842          * For compressed extent, we should check the whole range.
5843          */
5844         if (!compressed) {
5845                 search_start = disk_bytenr + extent_offset;
5846                 search_len = extent_num_bytes;
5847         } else {
5848                 search_start = disk_bytenr;
5849                 search_len = disk_num_bytes;
5850         }
5851         ret = count_csum_range(root, search_start, search_len, &csum_found);
5852         if (csum_found > 0 && nodatasum) {
5853                 err |= ODD_CSUM_ITEM;
5854                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5855                       root->objectid, fkey->objectid, fkey->offset);
5856         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5857                    !is_hole && (ret < 0 || csum_found < search_len)) {
5858                 err |= CSUM_ITEM_MISSING;
5859                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5860                       root->objectid, fkey->objectid, fkey->offset,
5861                       csum_found, search_len);
5862         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5863                 err |= ODD_CSUM_ITEM;
5864                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5865                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5866         }
5867
5868         /* Check EXTENT_DATA hole */
5869         if (!no_holes && *end != fkey->offset) {
5870                 if (repair)
5871                         ret = punch_extent_hole(root, fkey->objectid,
5872                                                 *end, fkey->offset - *end);
5873                 if (!repair || ret) {
5874                         err |= FILE_EXTENT_ERROR;
5875                         error(
5876 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
5877                                 root->objectid, fkey->objectid, fkey->offset,
5878                                 fkey->objectid, *end);
5879                 }
5880         }
5881
5882         *end += extent_num_bytes;
5883         if (!is_hole)
5884                 *size += extent_num_bytes;
5885
5886         return err;
5887 }
5888
5889 /*
5890  * Set inode item nbytes to @nbytes
5891  *
5892  * Returns  0     on success
5893  * Returns  != 0  on error
5894  */
5895 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5896                                       struct btrfs_path *path,
5897                                       u64 ino, u64 nbytes)
5898 {
5899         struct btrfs_trans_handle *trans;
5900         struct btrfs_inode_item *ii;
5901         struct btrfs_key key;
5902         struct btrfs_key research_key;
5903         int err = 0;
5904         int ret;
5905
5906         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5907
5908         key.objectid = ino;
5909         key.type = BTRFS_INODE_ITEM_KEY;
5910         key.offset = 0;
5911
5912         trans = btrfs_start_transaction(root, 1);
5913         if (IS_ERR(trans)) {
5914                 ret = PTR_ERR(trans);
5915                 err |= ret;
5916                 goto out;
5917         }
5918
5919         btrfs_release_path(path);
5920         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5921         if (ret > 0)
5922                 ret = -ENOENT;
5923         if (ret) {
5924                 err |= ret;
5925                 goto fail;
5926         }
5927
5928         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5929                             struct btrfs_inode_item);
5930         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5931         btrfs_mark_buffer_dirty(path->nodes[0]);
5932 fail:
5933         btrfs_commit_transaction(trans, root);
5934 out:
5935         if (ret)
5936                 error("failed to set nbytes in inode %llu root %llu",
5937                       ino, root->root_key.objectid);
5938         else
5939                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5940                        root->root_key.objectid, nbytes);
5941
5942         /* research path */
5943         btrfs_release_path(path);
5944         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5945         err |= ret;
5946
5947         return err;
5948 }
5949
5950 /*
5951  * Set directory inode isize to @isize.
5952  *
5953  * Returns 0     on success.
5954  * Returns != 0  on error.
5955  */
5956 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5957                                    struct btrfs_path *path,
5958                                    u64 ino, u64 isize)
5959 {
5960         struct btrfs_trans_handle *trans;
5961         struct btrfs_inode_item *ii;
5962         struct btrfs_key key;
5963         struct btrfs_key research_key;
5964         int ret;
5965         int err = 0;
5966
5967         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5968
5969         key.objectid = ino;
5970         key.type = BTRFS_INODE_ITEM_KEY;
5971         key.offset = 0;
5972
5973         trans = btrfs_start_transaction(root, 1);
5974         if (IS_ERR(trans)) {
5975                 ret = PTR_ERR(trans);
5976                 err |= ret;
5977                 goto out;
5978         }
5979
5980         btrfs_release_path(path);
5981         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5982         if (ret > 0)
5983                 ret = -ENOENT;
5984         if (ret) {
5985                 err |= ret;
5986                 goto fail;
5987         }
5988
5989         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5990                             struct btrfs_inode_item);
5991         btrfs_set_inode_size(path->nodes[0], ii, isize);
5992         btrfs_mark_buffer_dirty(path->nodes[0]);
5993 fail:
5994         btrfs_commit_transaction(trans, root);
5995 out:
5996         if (ret)
5997                 error("failed to set isize in inode %llu root %llu",
5998                       ino, root->root_key.objectid);
5999         else
6000                 printf("Set isize in inode %llu root %llu to %llu\n",
6001                        ino, root->root_key.objectid, isize);
6002
6003         btrfs_release_path(path);
6004         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6005         err |= ret;
6006
6007         return err;
6008 }
6009
6010 /*
6011  * Wrapper function for btrfs_add_orphan_item().
6012  *
6013  * Returns 0     on success.
6014  * Returns != 0  on error.
6015  */
6016 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
6017                                            struct btrfs_path *path, u64 ino)
6018 {
6019         struct btrfs_trans_handle *trans;
6020         struct btrfs_key research_key;
6021         int ret;
6022         int err = 0;
6023
6024         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
6025
6026         trans = btrfs_start_transaction(root, 1);
6027         if (IS_ERR(trans)) {
6028                 ret = PTR_ERR(trans);
6029                 err |= ret;
6030                 goto out;
6031         }
6032
6033         btrfs_release_path(path);
6034         ret = btrfs_add_orphan_item(trans, root, path, ino);
6035         err |= ret;
6036         btrfs_commit_transaction(trans, root);
6037 out:
6038         if (ret)
6039                 error("failed to add inode %llu as orphan item root %llu",
6040                       ino, root->root_key.objectid);
6041         else
6042                 printf("Added inode %llu as orphan item root %llu\n",
6043                        ino, root->root_key.objectid);
6044
6045         btrfs_release_path(path);
6046         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6047         err |= ret;
6048
6049         return err;
6050 }
6051
6052 /* Set inode_item nlink to @ref_count.
6053  * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
6054  *
6055  * Returns 0 on success
6056  */
6057 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
6058                                       struct btrfs_path *path, u64 ino,
6059                                       const char *name, u32 namelen,
6060                                       u64 ref_count, u8 filetype, u64 *nlink)
6061 {
6062         struct btrfs_trans_handle *trans;
6063         struct btrfs_inode_item *ii;
6064         struct btrfs_key key;
6065         struct btrfs_key old_key;
6066         char namebuf[BTRFS_NAME_LEN] = {0};
6067         int name_len;
6068         int ret;
6069         int ret2;
6070
6071         /* save the key */
6072         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
6073
6074         if (name && namelen) {
6075                 ASSERT(namelen <= BTRFS_NAME_LEN);
6076                 memcpy(namebuf, name, namelen);
6077                 name_len = namelen;
6078         } else {
6079                 sprintf(namebuf, "%llu", ino);
6080                 name_len = count_digits(ino);
6081                 printf("Can't find file name for inode %llu, use %s instead\n",
6082                        ino, namebuf);
6083         }
6084
6085         trans = btrfs_start_transaction(root, 1);
6086         if (IS_ERR(trans)) {
6087                 ret = PTR_ERR(trans);
6088                 goto out;
6089         }
6090
6091         btrfs_release_path(path);
6092         /* if refs is 0, put it into lostfound */
6093         if (ref_count == 0) {
6094                 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
6095                                               name_len, filetype, &ref_count);
6096                 if (ret)
6097                         goto fail;
6098         }
6099
6100         /* reset inode_item's nlink to ref_count */
6101         key.objectid = ino;
6102         key.type = BTRFS_INODE_ITEM_KEY;
6103         key.offset = 0;
6104
6105         btrfs_release_path(path);
6106         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6107         if (ret > 0)
6108                 ret = -ENOENT;
6109         if (ret)
6110                 goto fail;
6111
6112         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
6113                             struct btrfs_inode_item);
6114         btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
6115         btrfs_mark_buffer_dirty(path->nodes[0]);
6116
6117         if (nlink)
6118                 *nlink = ref_count;
6119 fail:
6120         btrfs_commit_transaction(trans, root);
6121 out:
6122         if (ret)
6123                 error(
6124         "fail to repair nlink of inode %llu root %llu name %s filetype %u",
6125                        root->objectid, ino, namebuf, filetype);
6126         else
6127                 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
6128                        root->objectid, ino, namebuf, filetype);
6129
6130         /* research */
6131         btrfs_release_path(path);
6132         ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
6133         if (ret2 < 0)
6134                 return ret |= ret2;
6135         return ret;
6136 }
6137
6138 /*
6139  * Check INODE_ITEM and related ITEMs (the same inode number)
6140  * 1. check link count
6141  * 2. check inode ref/extref
6142  * 3. check dir item/index
6143  *
6144  * @ext_ref:    the EXTENDED_IREF feature
6145  *
6146  * Return 0 if no error occurred.
6147  * Return >0 for error or hit the traversal is done(by error bitmap)
6148  */
6149 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
6150                             unsigned int ext_ref)
6151 {
6152         struct extent_buffer *node;
6153         struct btrfs_inode_item *ii;
6154         struct btrfs_key key;
6155         struct btrfs_key last_key;
6156         u64 inode_id;
6157         u32 mode;
6158         u64 nlink;
6159         u64 nbytes;
6160         u64 isize;
6161         u64 size = 0;
6162         u64 refs = 0;
6163         u64 extent_end = 0;
6164         u64 extent_size = 0;
6165         unsigned int dir;
6166         unsigned int nodatasum;
6167         int slot;
6168         int ret;
6169         int err = 0;
6170         char namebuf[BTRFS_NAME_LEN] = {0};
6171         u32 name_len = 0;
6172
6173         node = path->nodes[0];
6174         slot = path->slots[0];
6175
6176         btrfs_item_key_to_cpu(node, &key, slot);
6177         inode_id = key.objectid;
6178
6179         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
6180                 ret = btrfs_next_item(root, path);
6181                 if (ret > 0)
6182                         err |= LAST_ITEM;
6183                 return err;
6184         }
6185
6186         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
6187         isize = btrfs_inode_size(node, ii);
6188         nbytes = btrfs_inode_nbytes(node, ii);
6189         mode = btrfs_inode_mode(node, ii);
6190         dir = imode_to_type(mode) == BTRFS_FT_DIR;
6191         nlink = btrfs_inode_nlink(node, ii);
6192         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
6193
6194         while (1) {
6195                 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
6196                 ret = btrfs_next_item(root, path);
6197                 if (ret < 0) {
6198                         /* out will fill 'err' rusing current statistics */
6199                         goto out;
6200                 } else if (ret > 0) {
6201                         err |= LAST_ITEM;
6202                         goto out;
6203                 }
6204
6205                 node = path->nodes[0];
6206                 slot = path->slots[0];
6207                 btrfs_item_key_to_cpu(node, &key, slot);
6208                 if (key.objectid != inode_id)
6209                         goto out;
6210
6211                 switch (key.type) {
6212                 case BTRFS_INODE_REF_KEY:
6213                         ret = check_inode_ref(root, &key, path, namebuf,
6214                                               &name_len, &refs, mode);
6215                         err |= ret;
6216                         break;
6217                 case BTRFS_INODE_EXTREF_KEY:
6218                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
6219                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
6220                                         root->objectid, key.objectid,
6221                                         key.offset);
6222                         ret = check_inode_extref(root, &key, node, slot, &refs,
6223                                                  mode);
6224                         err |= ret;
6225                         break;
6226                 case BTRFS_DIR_ITEM_KEY:
6227                 case BTRFS_DIR_INDEX_KEY:
6228                         if (!dir) {
6229                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
6230                                         root->objectid, inode_id,
6231                                         imode_to_type(mode), key.objectid,
6232                                         key.offset);
6233                         }
6234                         ret = check_dir_item(root, &key, path, &size, ext_ref);
6235                         err |= ret;
6236                         break;
6237                 case BTRFS_EXTENT_DATA_KEY:
6238                         if (dir) {
6239                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
6240                                         root->objectid, inode_id, key.objectid,
6241                                         key.offset);
6242                         }
6243                         ret = check_file_extent(root, &key, node, slot,
6244                                                 nodatasum, &extent_size,
6245                                                 &extent_end);
6246                         err |= ret;
6247                         break;
6248                 case BTRFS_XATTR_ITEM_KEY:
6249                         break;
6250                 default:
6251                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
6252                               key.objectid, key.type, key.offset);
6253                 }
6254         }
6255
6256 out:
6257         if (err & LAST_ITEM) {
6258                 btrfs_release_path(path);
6259                 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
6260                 if (ret)
6261                         return err;
6262         }
6263
6264         /* verify INODE_ITEM nlink/isize/nbytes */
6265         if (dir) {
6266                 if (repair && (err & DIR_COUNT_AGAIN)) {
6267                         err &= ~DIR_COUNT_AGAIN;
6268                         count_dir_isize(root, inode_id, &size);
6269                 }
6270
6271                 if ((nlink != 1 || refs != 1) && repair) {
6272                         ret = repair_inode_nlinks_lowmem(root, path, inode_id,
6273                                 namebuf, name_len, refs, imode_to_type(mode),
6274                                 &nlink);
6275                 }
6276
6277                 if (nlink != 1) {
6278                         err |= LINK_COUNT_ERROR;
6279                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
6280                               root->objectid, inode_id, nlink);
6281                 }
6282
6283                 /*
6284                  * Just a warning, as dir inode nbytes is just an
6285                  * instructive value.
6286                  */
6287                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
6288                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
6289                                 root->objectid, inode_id,
6290                                 root->fs_info->nodesize);
6291                 }
6292
6293                 if (isize != size) {
6294                         if (repair)
6295                                 ret = repair_dir_isize_lowmem(root, path,
6296                                                               inode_id, size);
6297                         if (!repair || ret) {
6298                                 err |= ISIZE_ERROR;
6299                                 error(
6300                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
6301                                       root->objectid, inode_id, isize, size);
6302                         }
6303                 }
6304         } else {
6305                 if (nlink != refs) {
6306                         if (repair)
6307                                 ret = repair_inode_nlinks_lowmem(root, path,
6308                                          inode_id, namebuf, name_len, refs,
6309                                          imode_to_type(mode), &nlink);
6310                         if (!repair || ret) {
6311                                 err |= LINK_COUNT_ERROR;
6312                                 error(
6313                 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
6314                                       root->objectid, inode_id, nlink, refs);
6315                         }
6316                 } else if (!nlink) {
6317                         if (repair)
6318                                 ret = repair_inode_orphan_item_lowmem(root,
6319                                                               path, inode_id);
6320                         if (!repair || ret) {
6321                                 err |= ORPHAN_ITEM;
6322                                 error("root %llu INODE[%llu] is orphan item",
6323                                       root->objectid, inode_id);
6324                         }
6325                 }
6326
6327                 if (!nbytes && !no_holes && extent_end < isize) {
6328                         if (repair)
6329                                 ret = punch_extent_hole(root, inode_id,
6330                                                 extent_end, isize - extent_end);
6331                         if (!repair || ret) {
6332                                 err |= NBYTES_ERROR;
6333                                 error(
6334         "root %llu INODE[%llu] size %llu should have a file extent hole",
6335                                       root->objectid, inode_id, isize);
6336                         }
6337                 }
6338
6339                 if (nbytes != extent_size) {
6340                         if (repair)
6341                                 ret = repair_inode_nbytes_lowmem(root, path,
6342                                                          inode_id, extent_size);
6343                         if (!repair || ret) {
6344                                 err |= NBYTES_ERROR;
6345                                 error(
6346         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6347                                       root->objectid, inode_id, nbytes,
6348                                       extent_size);
6349                         }
6350                 }
6351         }
6352
6353         if (err & LAST_ITEM)
6354                 btrfs_next_item(root, path);
6355         return err;
6356 }
6357
6358 /*
6359  * Insert the missing inode item and inode ref.
6360  *
6361  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6362  * Root dir should be handled specially because root dir is the root of fs.
6363  *
6364  * returns err (>0 or 0) after repair
6365  */
6366 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6367 {
6368         struct btrfs_trans_handle *trans;
6369         struct btrfs_key key;
6370         struct btrfs_path path;
6371         int filetype = BTRFS_FT_DIR;
6372         int ret = 0;
6373
6374         btrfs_init_path(&path);
6375
6376         if (err & INODE_REF_MISSING) {
6377                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6378                 key.type = BTRFS_INODE_REF_KEY;
6379                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6380
6381                 trans = btrfs_start_transaction(root, 1);
6382                 if (IS_ERR(trans)) {
6383                         ret = PTR_ERR(trans);
6384                         goto out;
6385                 }
6386
6387                 btrfs_release_path(&path);
6388                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6389                 if (ret)
6390                         goto trans_fail;
6391
6392                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6393                                              BTRFS_FIRST_FREE_OBJECTID,
6394                                              BTRFS_FIRST_FREE_OBJECTID, 0);
6395                 if (ret)
6396                         goto trans_fail;
6397
6398                 printf("Add INODE_REF[%llu %llu] name %s\n",
6399                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6400                        "..");
6401                 err &= ~INODE_REF_MISSING;
6402 trans_fail:
6403                 if (ret)
6404                         error("fail to insert first inode's ref");
6405                 btrfs_commit_transaction(trans, root);
6406         }
6407
6408         if (err & INODE_ITEM_MISSING) {
6409                 ret = repair_inode_item_missing(root,
6410                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
6411                 if (ret)
6412                         goto out;
6413                 err &= ~INODE_ITEM_MISSING;
6414         }
6415 out:
6416         if (ret)
6417                 error("fail to repair first inode");
6418         btrfs_release_path(&path);
6419         return err;
6420 }
6421
6422 /*
6423  * check first root dir's inode_item and inode_ref
6424  *
6425  * returns 0 means no error
6426  * returns >0 means error
6427  * returns <0 means fatal error
6428  */
6429 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6430 {
6431         struct btrfs_path path;
6432         struct btrfs_key key;
6433         struct btrfs_inode_item *ii;
6434         u64 index;
6435         u32 mode;
6436         int err = 0;
6437         int ret;
6438
6439         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6440         key.type = BTRFS_INODE_ITEM_KEY;
6441         key.offset = 0;
6442
6443         /* For root being dropped, we don't need to check first inode */
6444         if (btrfs_root_refs(&root->root_item) == 0 &&
6445             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6446             BTRFS_FIRST_FREE_OBJECTID)
6447                 return 0;
6448
6449         btrfs_init_path(&path);
6450         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6451         if (ret < 0)
6452                 goto out;
6453         if (ret > 0) {
6454                 ret = 0;
6455                 err |= INODE_ITEM_MISSING;
6456         } else {
6457                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6458                                     struct btrfs_inode_item);
6459                 mode = btrfs_inode_mode(path.nodes[0], ii);
6460                 if (imode_to_type(mode) != BTRFS_FT_DIR)
6461                         err |= INODE_ITEM_MISMATCH;
6462         }
6463
6464         /* lookup first inode ref */
6465         key.offset = BTRFS_FIRST_FREE_OBJECTID;
6466         key.type = BTRFS_INODE_REF_KEY;
6467         /* special index value */
6468         index = 0;
6469
6470         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6471         if (ret < 0)
6472                 goto out;
6473         err |= ret;
6474
6475 out:
6476         btrfs_release_path(&path);
6477
6478         if (err && repair)
6479                 err = repair_fs_first_inode(root, err);
6480
6481         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6482                 error("root dir INODE_ITEM is %s",
6483                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6484         if (err & INODE_REF_MISSING)
6485                 error("root dir INODE_REF is missing");
6486
6487         return ret < 0 ? ret : err;
6488 }
6489
6490 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6491                                                 u64 parent, u64 root)
6492 {
6493         struct rb_node *node;
6494         struct tree_backref *back = NULL;
6495         struct tree_backref match = {
6496                 .node = {
6497                         .is_data = 0,
6498                 },
6499         };
6500
6501         if (parent) {
6502                 match.parent = parent;
6503                 match.node.full_backref = 1;
6504         } else {
6505                 match.root = root;
6506         }
6507
6508         node = rb_search(&rec->backref_tree, &match.node.node,
6509                          (rb_compare_keys)compare_extent_backref, NULL);
6510         if (node)
6511                 back = to_tree_backref(rb_node_to_extent_backref(node));
6512
6513         return back;
6514 }
6515
6516 static struct data_backref *find_data_backref(struct extent_record *rec,
6517                                                 u64 parent, u64 root,
6518                                                 u64 owner, u64 offset,
6519                                                 int found_ref,
6520                                                 u64 disk_bytenr, u64 bytes)
6521 {
6522         struct rb_node *node;
6523         struct data_backref *back = NULL;
6524         struct data_backref match = {
6525                 .node = {
6526                         .is_data = 1,
6527                 },
6528                 .owner = owner,
6529                 .offset = offset,
6530                 .bytes = bytes,
6531                 .found_ref = found_ref,
6532                 .disk_bytenr = disk_bytenr,
6533         };
6534
6535         if (parent) {
6536                 match.parent = parent;
6537                 match.node.full_backref = 1;
6538         } else {
6539                 match.root = root;
6540         }
6541
6542         node = rb_search(&rec->backref_tree, &match.node.node,
6543                          (rb_compare_keys)compare_extent_backref, NULL);
6544         if (node)
6545                 back = to_data_backref(rb_node_to_extent_backref(node));
6546
6547         return back;
6548 }
6549 /*
6550  * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6551  * blocks and integrity of fs tree items.
6552  *
6553  * @root:         the root of the tree to be checked.
6554  * @ext_ref       feature EXTENDED_IREF is enable or not.
6555  * @account       if NOT 0 means check the tree (including tree)'s treeblocks.
6556  *                otherwise means check fs tree(s) items relationship and
6557  *                @root MUST be a fs tree root.
6558  * Returns 0      represents OK.
6559  * Returns not 0  represents error.
6560  */
6561 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6562                             struct btrfs_root *root, unsigned int ext_ref,
6563                             int check_all)
6564
6565 {
6566         struct btrfs_path path;
6567         struct node_refs nrefs;
6568         struct btrfs_root_item *root_item = &root->root_item;
6569         int ret;
6570         int level;
6571         int err = 0;
6572
6573         memset(&nrefs, 0, sizeof(nrefs));
6574         if (!check_all) {
6575                 /*
6576                  * We need to manually check the first inode item (256)
6577                  * As the following traversal function will only start from
6578                  * the first inode item in the leaf, if inode item (256) is
6579                  * missing we will skip it forever.
6580                  */
6581                 ret = check_fs_first_inode(root, ext_ref);
6582                 if (ret < 0)
6583                         return ret;
6584         }
6585
6586
6587         level = btrfs_header_level(root->node);
6588         btrfs_init_path(&path);
6589
6590         if (btrfs_root_refs(root_item) > 0 ||
6591             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6592                 path.nodes[level] = root->node;
6593                 path.slots[level] = 0;
6594                 extent_buffer_get(root->node);
6595         } else {
6596                 struct btrfs_key key;
6597
6598                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6599                 level = root_item->drop_level;
6600                 path.lowest_level = level;
6601                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6602                 if (ret < 0)
6603                         goto out;
6604                 ret = 0;
6605         }
6606
6607         while (1) {
6608                 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6609                                         ext_ref, check_all);
6610
6611                 err |= !!ret;
6612
6613                 /* if ret is negative, walk shall stop */
6614                 if (ret < 0) {
6615                         ret = err;
6616                         break;
6617                 }
6618
6619                 ret = walk_up_tree_v2(root, &path, &level);
6620                 if (ret != 0) {
6621                         /* Normal exit, reset ret to err */
6622                         ret = err;
6623                         break;
6624                 }
6625         }
6626
6627 out:
6628         btrfs_release_path(&path);
6629         return ret;
6630 }
6631
6632 /*
6633  * Iterate all items in the tree and call check_inode_item() to check.
6634  *
6635  * @root:       the root of the tree to be checked.
6636  * @ext_ref:    the EXTENDED_IREF feature
6637  *
6638  * Return 0 if no error found.
6639  * Return <0 for error.
6640  */
6641 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6642 {
6643         reset_cached_block_groups(root->fs_info);
6644         return check_btrfs_root(NULL, root, ext_ref, 0);
6645 }
6646
6647 /*
6648  * Find the relative ref for root_ref and root_backref.
6649  *
6650  * @root:       the root of the root tree.
6651  * @ref_key:    the key of the root ref.
6652  *
6653  * Return 0 if no error occurred.
6654  */
6655 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6656                           struct extent_buffer *node, int slot)
6657 {
6658         struct btrfs_path path;
6659         struct btrfs_key key;
6660         struct btrfs_root_ref *ref;
6661         struct btrfs_root_ref *backref;
6662         char ref_name[BTRFS_NAME_LEN] = {0};
6663         char backref_name[BTRFS_NAME_LEN] = {0};
6664         u64 ref_dirid;
6665         u64 ref_seq;
6666         u32 ref_namelen;
6667         u64 backref_dirid;
6668         u64 backref_seq;
6669         u32 backref_namelen;
6670         u32 len;
6671         int ret;
6672         int err = 0;
6673
6674         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6675         ref_dirid = btrfs_root_ref_dirid(node, ref);
6676         ref_seq = btrfs_root_ref_sequence(node, ref);
6677         ref_namelen = btrfs_root_ref_name_len(node, ref);
6678
6679         if (ref_namelen <= BTRFS_NAME_LEN) {
6680                 len = ref_namelen;
6681         } else {
6682                 len = BTRFS_NAME_LEN;
6683                 warning("%s[%llu %llu] ref_name too long",
6684                         ref_key->type == BTRFS_ROOT_REF_KEY ?
6685                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6686                         ref_key->offset);
6687         }
6688         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6689
6690         /* Find relative root_ref */
6691         key.objectid = ref_key->offset;
6692         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6693         key.offset = ref_key->objectid;
6694
6695         btrfs_init_path(&path);
6696         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6697         if (ret) {
6698                 err |= ROOT_REF_MISSING;
6699                 error("%s[%llu %llu] couldn't find relative ref",
6700                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6701                       "ROOT_REF" : "ROOT_BACKREF",
6702                       ref_key->objectid, ref_key->offset);
6703                 goto out;
6704         }
6705
6706         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6707                                  struct btrfs_root_ref);
6708         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6709         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6710         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6711
6712         if (backref_namelen <= BTRFS_NAME_LEN) {
6713                 len = backref_namelen;
6714         } else {
6715                 len = BTRFS_NAME_LEN;
6716                 warning("%s[%llu %llu] ref_name too long",
6717                         key.type == BTRFS_ROOT_REF_KEY ?
6718                         "ROOT_REF" : "ROOT_BACKREF",
6719                         key.objectid, key.offset);
6720         }
6721         read_extent_buffer(path.nodes[0], backref_name,
6722                            (unsigned long)(backref + 1), len);
6723
6724         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6725             ref_namelen != backref_namelen ||
6726             strncmp(ref_name, backref_name, len)) {
6727                 err |= ROOT_REF_MISMATCH;
6728                 error("%s[%llu %llu] mismatch relative ref",
6729                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6730                       "ROOT_REF" : "ROOT_BACKREF",
6731                       ref_key->objectid, ref_key->offset);
6732         }
6733 out:
6734         btrfs_release_path(&path);
6735         return err;
6736 }
6737
6738 /*
6739  * Check all fs/file tree in low_memory mode.
6740  *
6741  * 1. for fs tree root item, call check_fs_root_v2()
6742  * 2. for fs tree root ref/backref, call check_root_ref()
6743  *
6744  * Return 0 if no error occurred.
6745  */
6746 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6747 {
6748         struct btrfs_root *tree_root = fs_info->tree_root;
6749         struct btrfs_root *cur_root = NULL;
6750         struct btrfs_path path;
6751         struct btrfs_key key;
6752         struct extent_buffer *node;
6753         unsigned int ext_ref;
6754         int slot;
6755         int ret;
6756         int err = 0;
6757
6758         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6759
6760         btrfs_init_path(&path);
6761         key.objectid = BTRFS_FS_TREE_OBJECTID;
6762         key.offset = 0;
6763         key.type = BTRFS_ROOT_ITEM_KEY;
6764
6765         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6766         if (ret < 0) {
6767                 err = ret;
6768                 goto out;
6769         } else if (ret > 0) {
6770                 err = -ENOENT;
6771                 goto out;
6772         }
6773
6774         while (1) {
6775                 node = path.nodes[0];
6776                 slot = path.slots[0];
6777                 btrfs_item_key_to_cpu(node, &key, slot);
6778                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6779                         goto out;
6780                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6781                     fs_root_objectid(key.objectid)) {
6782                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6783                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6784                                                                        &key);
6785                         } else {
6786                                 key.offset = (u64)-1;
6787                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6788                         }
6789
6790                         if (IS_ERR(cur_root)) {
6791                                 error("Fail to read fs/subvol tree: %lld",
6792                                       key.objectid);
6793                                 err = -EIO;
6794                                 goto next;
6795                         }
6796
6797                         ret = check_fs_root_v2(cur_root, ext_ref);
6798                         err |= ret;
6799
6800                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6801                                 btrfs_free_fs_root(cur_root);
6802                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6803                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6804                         ret = check_root_ref(tree_root, &key, node, slot);
6805                         err |= ret;
6806                 }
6807 next:
6808                 ret = btrfs_next_item(tree_root, &path);
6809                 if (ret > 0)
6810                         goto out;
6811                 if (ret < 0) {
6812                         err = ret;
6813                         goto out;
6814                 }
6815         }
6816
6817 out:
6818         btrfs_release_path(&path);
6819         return err;
6820 }
6821
6822 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6823                           struct cache_tree *root_cache)
6824 {
6825         int ret;
6826
6827         if (!ctx.progress_enabled)
6828                 fprintf(stderr, "checking fs roots\n");
6829         if (check_mode == CHECK_MODE_LOWMEM)
6830                 ret = check_fs_roots_v2(fs_info);
6831         else
6832                 ret = check_fs_roots(fs_info, root_cache);
6833
6834         return ret;
6835 }
6836
6837 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6838 {
6839         struct extent_backref *back, *tmp;
6840         struct tree_backref *tback;
6841         struct data_backref *dback;
6842         u64 found = 0;
6843         int err = 0;
6844
6845         rbtree_postorder_for_each_entry_safe(back, tmp,
6846                                              &rec->backref_tree, node) {
6847                 if (!back->found_extent_tree) {
6848                         err = 1;
6849                         if (!print_errs)
6850                                 goto out;
6851                         if (back->is_data) {
6852                                 dback = to_data_backref(back);
6853                                 fprintf(stderr, "Data backref %llu %s %llu"
6854                                         " owner %llu offset %llu num_refs %lu"
6855                                         " not found in extent tree\n",
6856                                         (unsigned long long)rec->start,
6857                                         back->full_backref ?
6858                                         "parent" : "root",
6859                                         back->full_backref ?
6860                                         (unsigned long long)dback->parent:
6861                                         (unsigned long long)dback->root,
6862                                         (unsigned long long)dback->owner,
6863                                         (unsigned long long)dback->offset,
6864                                         (unsigned long)dback->num_refs);
6865                         } else {
6866                                 tback = to_tree_backref(back);
6867                                 fprintf(stderr, "Tree backref %llu parent %llu"
6868                                         " root %llu not found in extent tree\n",
6869                                         (unsigned long long)rec->start,
6870                                         (unsigned long long)tback->parent,
6871                                         (unsigned long long)tback->root);
6872                         }
6873                 }
6874                 if (!back->is_data && !back->found_ref) {
6875                         err = 1;
6876                         if (!print_errs)
6877                                 goto out;
6878                         tback = to_tree_backref(back);
6879                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6880                                 (unsigned long long)rec->start,
6881                                 back->full_backref ? "parent" : "root",
6882                                 back->full_backref ?
6883                                 (unsigned long long)tback->parent :
6884                                 (unsigned long long)tback->root, back);
6885                 }
6886                 if (back->is_data) {
6887                         dback = to_data_backref(back);
6888                         if (dback->found_ref != dback->num_refs) {
6889                                 err = 1;
6890                                 if (!print_errs)
6891                                         goto out;
6892                                 fprintf(stderr, "Incorrect local backref count"
6893                                         " on %llu %s %llu owner %llu"
6894                                         " offset %llu found %u wanted %u back %p\n",
6895                                         (unsigned long long)rec->start,
6896                                         back->full_backref ?
6897                                         "parent" : "root",
6898                                         back->full_backref ?
6899                                         (unsigned long long)dback->parent:
6900                                         (unsigned long long)dback->root,
6901                                         (unsigned long long)dback->owner,
6902                                         (unsigned long long)dback->offset,
6903                                         dback->found_ref, dback->num_refs, back);
6904                         }
6905                         if (dback->disk_bytenr != rec->start) {
6906                                 err = 1;
6907                                 if (!print_errs)
6908                                         goto out;
6909                                 fprintf(stderr, "Backref disk bytenr does not"
6910                                         " match extent record, bytenr=%llu, "
6911                                         "ref bytenr=%llu\n",
6912                                         (unsigned long long)rec->start,
6913                                         (unsigned long long)dback->disk_bytenr);
6914                         }
6915
6916                         if (dback->bytes != rec->nr) {
6917                                 err = 1;
6918                                 if (!print_errs)
6919                                         goto out;
6920                                 fprintf(stderr, "Backref bytes do not match "
6921                                         "extent backref, bytenr=%llu, ref "
6922                                         "bytes=%llu, backref bytes=%llu\n",
6923                                         (unsigned long long)rec->start,
6924                                         (unsigned long long)rec->nr,
6925                                         (unsigned long long)dback->bytes);
6926                         }
6927                 }
6928                 if (!back->is_data) {
6929                         found += 1;
6930                 } else {
6931                         dback = to_data_backref(back);
6932                         found += dback->found_ref;
6933                 }
6934         }
6935         if (found != rec->refs) {
6936                 err = 1;
6937                 if (!print_errs)
6938                         goto out;
6939                 fprintf(stderr, "Incorrect global backref count "
6940                         "on %llu found %llu wanted %llu\n",
6941                         (unsigned long long)rec->start,
6942                         (unsigned long long)found,
6943                         (unsigned long long)rec->refs);
6944         }
6945 out:
6946         return err;
6947 }
6948
6949 static void __free_one_backref(struct rb_node *node)
6950 {
6951         struct extent_backref *back = rb_node_to_extent_backref(node);
6952
6953         free(back);
6954 }
6955
6956 static void free_all_extent_backrefs(struct extent_record *rec)
6957 {
6958         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6959 }
6960
6961 static void free_extent_record_cache(struct cache_tree *extent_cache)
6962 {
6963         struct cache_extent *cache;
6964         struct extent_record *rec;
6965
6966         while (1) {
6967                 cache = first_cache_extent(extent_cache);
6968                 if (!cache)
6969                         break;
6970                 rec = container_of(cache, struct extent_record, cache);
6971                 remove_cache_extent(extent_cache, cache);
6972                 free_all_extent_backrefs(rec);
6973                 free(rec);
6974         }
6975 }
6976
6977 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6978                                  struct extent_record *rec)
6979 {
6980         if (rec->content_checked && rec->owner_ref_checked &&
6981             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6982             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6983             !rec->bad_full_backref && !rec->crossing_stripes &&
6984             !rec->wrong_chunk_type) {
6985                 remove_cache_extent(extent_cache, &rec->cache);
6986                 free_all_extent_backrefs(rec);
6987                 list_del_init(&rec->list);
6988                 free(rec);
6989         }
6990         return 0;
6991 }
6992
6993 static int check_owner_ref(struct btrfs_root *root,
6994                             struct extent_record *rec,
6995                             struct extent_buffer *buf)
6996 {
6997         struct extent_backref *node, *tmp;
6998         struct tree_backref *back;
6999         struct btrfs_root *ref_root;
7000         struct btrfs_key key;
7001         struct btrfs_path path;
7002         struct extent_buffer *parent;
7003         int level;
7004         int found = 0;
7005         int ret;
7006
7007         rbtree_postorder_for_each_entry_safe(node, tmp,
7008                                              &rec->backref_tree, node) {
7009                 if (node->is_data)
7010                         continue;
7011                 if (!node->found_ref)
7012                         continue;
7013                 if (node->full_backref)
7014                         continue;
7015                 back = to_tree_backref(node);
7016                 if (btrfs_header_owner(buf) == back->root)
7017                         return 0;
7018         }
7019         BUG_ON(rec->is_root);
7020
7021         /* try to find the block by search corresponding fs tree */
7022         key.objectid = btrfs_header_owner(buf);
7023         key.type = BTRFS_ROOT_ITEM_KEY;
7024         key.offset = (u64)-1;
7025
7026         ref_root = btrfs_read_fs_root(root->fs_info, &key);
7027         if (IS_ERR(ref_root))
7028                 return 1;
7029
7030         level = btrfs_header_level(buf);
7031         if (level == 0)
7032                 btrfs_item_key_to_cpu(buf, &key, 0);
7033         else
7034                 btrfs_node_key_to_cpu(buf, &key, 0);
7035
7036         btrfs_init_path(&path);
7037         path.lowest_level = level + 1;
7038         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
7039         if (ret < 0)
7040                 return 0;
7041
7042         parent = path.nodes[level + 1];
7043         if (parent && buf->start == btrfs_node_blockptr(parent,
7044                                                         path.slots[level + 1]))
7045                 found = 1;
7046
7047         btrfs_release_path(&path);
7048         return found ? 0 : 1;
7049 }
7050
7051 static int is_extent_tree_record(struct extent_record *rec)
7052 {
7053         struct extent_backref *node, *tmp;
7054         struct tree_backref *back;
7055         int is_extent = 0;
7056
7057         rbtree_postorder_for_each_entry_safe(node, tmp,
7058                                              &rec->backref_tree, node) {
7059                 if (node->is_data)
7060                         return 0;
7061                 back = to_tree_backref(node);
7062                 if (node->full_backref)
7063                         return 0;
7064                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
7065                         is_extent = 1;
7066         }
7067         return is_extent;
7068 }
7069
7070
7071 static int record_bad_block_io(struct btrfs_fs_info *info,
7072                                struct cache_tree *extent_cache,
7073                                u64 start, u64 len)
7074 {
7075         struct extent_record *rec;
7076         struct cache_extent *cache;
7077         struct btrfs_key key;
7078
7079         cache = lookup_cache_extent(extent_cache, start, len);
7080         if (!cache)
7081                 return 0;
7082
7083         rec = container_of(cache, struct extent_record, cache);
7084         if (!is_extent_tree_record(rec))
7085                 return 0;
7086
7087         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
7088         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
7089 }
7090
7091 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
7092                        struct extent_buffer *buf, int slot)
7093 {
7094         if (btrfs_header_level(buf)) {
7095                 struct btrfs_key_ptr ptr1, ptr2;
7096
7097                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
7098                                    sizeof(struct btrfs_key_ptr));
7099                 read_extent_buffer(buf, &ptr2,
7100                                    btrfs_node_key_ptr_offset(slot + 1),
7101                                    sizeof(struct btrfs_key_ptr));
7102                 write_extent_buffer(buf, &ptr1,
7103                                     btrfs_node_key_ptr_offset(slot + 1),
7104                                     sizeof(struct btrfs_key_ptr));
7105                 write_extent_buffer(buf, &ptr2,
7106                                     btrfs_node_key_ptr_offset(slot),
7107                                     sizeof(struct btrfs_key_ptr));
7108                 if (slot == 0) {
7109                         struct btrfs_disk_key key;
7110                         btrfs_node_key(buf, &key, 0);
7111                         btrfs_fixup_low_keys(root, path, &key,
7112                                              btrfs_header_level(buf) + 1);
7113                 }
7114         } else {
7115                 struct btrfs_item *item1, *item2;
7116                 struct btrfs_key k1, k2;
7117                 char *item1_data, *item2_data;
7118                 u32 item1_offset, item2_offset, item1_size, item2_size;
7119
7120                 item1 = btrfs_item_nr(slot);
7121                 item2 = btrfs_item_nr(slot + 1);
7122                 btrfs_item_key_to_cpu(buf, &k1, slot);
7123                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
7124                 item1_offset = btrfs_item_offset(buf, item1);
7125                 item2_offset = btrfs_item_offset(buf, item2);
7126                 item1_size = btrfs_item_size(buf, item1);
7127                 item2_size = btrfs_item_size(buf, item2);
7128
7129                 item1_data = malloc(item1_size);
7130                 if (!item1_data)
7131                         return -ENOMEM;
7132                 item2_data = malloc(item2_size);
7133                 if (!item2_data) {
7134                         free(item1_data);
7135                         return -ENOMEM;
7136                 }
7137
7138                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
7139                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
7140
7141                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
7142                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
7143                 free(item1_data);
7144                 free(item2_data);
7145
7146                 btrfs_set_item_offset(buf, item1, item2_offset);
7147                 btrfs_set_item_offset(buf, item2, item1_offset);
7148                 btrfs_set_item_size(buf, item1, item2_size);
7149                 btrfs_set_item_size(buf, item2, item1_size);
7150
7151                 path->slots[0] = slot;
7152                 btrfs_set_item_key_unsafe(root, path, &k2);
7153                 path->slots[0] = slot + 1;
7154                 btrfs_set_item_key_unsafe(root, path, &k1);
7155         }
7156         return 0;
7157 }
7158
7159 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
7160 {
7161         struct extent_buffer *buf;
7162         struct btrfs_key k1, k2;
7163         int i;
7164         int level = path->lowest_level;
7165         int ret = -EIO;
7166
7167         buf = path->nodes[level];
7168         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
7169                 if (level) {
7170                         btrfs_node_key_to_cpu(buf, &k1, i);
7171                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
7172                 } else {
7173                         btrfs_item_key_to_cpu(buf, &k1, i);
7174                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
7175                 }
7176                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
7177                         continue;
7178                 ret = swap_values(root, path, buf, i);
7179                 if (ret)
7180                         break;
7181                 btrfs_mark_buffer_dirty(buf);
7182                 i = 0;
7183         }
7184         return ret;
7185 }
7186
7187 static int delete_bogus_item(struct btrfs_root *root,
7188                              struct btrfs_path *path,
7189                              struct extent_buffer *buf, int slot)
7190 {
7191         struct btrfs_key key;
7192         int nritems = btrfs_header_nritems(buf);
7193
7194         btrfs_item_key_to_cpu(buf, &key, slot);
7195
7196         /* These are all the keys we can deal with missing. */
7197         if (key.type != BTRFS_DIR_INDEX_KEY &&
7198             key.type != BTRFS_EXTENT_ITEM_KEY &&
7199             key.type != BTRFS_METADATA_ITEM_KEY &&
7200             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7201             key.type != BTRFS_EXTENT_DATA_REF_KEY)
7202                 return -1;
7203
7204         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
7205                (unsigned long long)key.objectid, key.type,
7206                (unsigned long long)key.offset, slot, buf->start);
7207         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
7208                               btrfs_item_nr_offset(slot + 1),
7209                               sizeof(struct btrfs_item) *
7210                               (nritems - slot - 1));
7211         btrfs_set_header_nritems(buf, nritems - 1);
7212         if (slot == 0) {
7213                 struct btrfs_disk_key disk_key;
7214
7215                 btrfs_item_key(buf, &disk_key, 0);
7216                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
7217         }
7218         btrfs_mark_buffer_dirty(buf);
7219         return 0;
7220 }
7221
7222 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
7223 {
7224         struct extent_buffer *buf;
7225         int i;
7226         int ret = 0;
7227
7228         /* We should only get this for leaves */
7229         BUG_ON(path->lowest_level);
7230         buf = path->nodes[0];
7231 again:
7232         for (i = 0; i < btrfs_header_nritems(buf); i++) {
7233                 unsigned int shift = 0, offset;
7234
7235                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
7236                     BTRFS_LEAF_DATA_SIZE(root)) {
7237                         if (btrfs_item_end_nr(buf, i) >
7238                             BTRFS_LEAF_DATA_SIZE(root)) {
7239                                 ret = delete_bogus_item(root, path, buf, i);
7240                                 if (!ret)
7241                                         goto again;
7242                                 fprintf(stderr, "item is off the end of the "
7243                                         "leaf, can't fix\n");
7244                                 ret = -EIO;
7245                                 break;
7246                         }
7247                         shift = BTRFS_LEAF_DATA_SIZE(root) -
7248                                 btrfs_item_end_nr(buf, i);
7249                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
7250                            btrfs_item_offset_nr(buf, i - 1)) {
7251                         if (btrfs_item_end_nr(buf, i) >
7252                             btrfs_item_offset_nr(buf, i - 1)) {
7253                                 ret = delete_bogus_item(root, path, buf, i);
7254                                 if (!ret)
7255                                         goto again;
7256                                 fprintf(stderr, "items overlap, can't fix\n");
7257                                 ret = -EIO;
7258                                 break;
7259                         }
7260                         shift = btrfs_item_offset_nr(buf, i - 1) -
7261                                 btrfs_item_end_nr(buf, i);
7262                 }
7263                 if (!shift)
7264                         continue;
7265
7266                 printf("Shifting item nr %d by %u bytes in block %llu\n",
7267                        i, shift, (unsigned long long)buf->start);
7268                 offset = btrfs_item_offset_nr(buf, i);
7269                 memmove_extent_buffer(buf,
7270                                       btrfs_leaf_data(buf) + offset + shift,
7271                                       btrfs_leaf_data(buf) + offset,
7272                                       btrfs_item_size_nr(buf, i));
7273                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
7274                                       offset + shift);
7275                 btrfs_mark_buffer_dirty(buf);
7276         }
7277
7278         /*
7279          * We may have moved things, in which case we want to exit so we don't
7280          * write those changes out.  Once we have proper abort functionality in
7281          * progs this can be changed to something nicer.
7282          */
7283         BUG_ON(ret);
7284         return ret;
7285 }
7286
7287 /*
7288  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
7289  * then just return -EIO.
7290  */
7291 static int try_to_fix_bad_block(struct btrfs_root *root,
7292                                 struct extent_buffer *buf,
7293                                 enum btrfs_tree_block_status status)
7294 {
7295         struct btrfs_trans_handle *trans;
7296         struct ulist *roots;
7297         struct ulist_node *node;
7298         struct btrfs_root *search_root;
7299         struct btrfs_path path;
7300         struct ulist_iterator iter;
7301         struct btrfs_key root_key, key;
7302         int ret;
7303
7304         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
7305             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7306                 return -EIO;
7307
7308         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
7309         if (ret)
7310                 return -EIO;
7311
7312         btrfs_init_path(&path);
7313         ULIST_ITER_INIT(&iter);
7314         while ((node = ulist_next(roots, &iter))) {
7315                 root_key.objectid = node->val;
7316                 root_key.type = BTRFS_ROOT_ITEM_KEY;
7317                 root_key.offset = (u64)-1;
7318
7319                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
7320                 if (IS_ERR(root)) {
7321                         ret = -EIO;
7322                         break;
7323                 }
7324
7325
7326                 trans = btrfs_start_transaction(search_root, 0);
7327                 if (IS_ERR(trans)) {
7328                         ret = PTR_ERR(trans);
7329                         break;
7330                 }
7331
7332                 path.lowest_level = btrfs_header_level(buf);
7333                 path.skip_check_block = 1;
7334                 if (path.lowest_level)
7335                         btrfs_node_key_to_cpu(buf, &key, 0);
7336                 else
7337                         btrfs_item_key_to_cpu(buf, &key, 0);
7338                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7339                 if (ret) {
7340                         ret = -EIO;
7341                         btrfs_commit_transaction(trans, search_root);
7342                         break;
7343                 }
7344                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7345                         ret = fix_key_order(search_root, &path);
7346                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7347                         ret = fix_item_offset(search_root, &path);
7348                 if (ret) {
7349                         btrfs_commit_transaction(trans, search_root);
7350                         break;
7351                 }
7352                 btrfs_release_path(&path);
7353                 btrfs_commit_transaction(trans, search_root);
7354         }
7355         ulist_free(roots);
7356         btrfs_release_path(&path);
7357         return ret;
7358 }
7359
7360 static int check_block(struct btrfs_root *root,
7361                        struct cache_tree *extent_cache,
7362                        struct extent_buffer *buf, u64 flags)
7363 {
7364         struct extent_record *rec;
7365         struct cache_extent *cache;
7366         struct btrfs_key key;
7367         enum btrfs_tree_block_status status;
7368         int ret = 0;
7369         int level;
7370
7371         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7372         if (!cache)
7373                 return 1;
7374         rec = container_of(cache, struct extent_record, cache);
7375         rec->generation = btrfs_header_generation(buf);
7376
7377         level = btrfs_header_level(buf);
7378         if (btrfs_header_nritems(buf) > 0) {
7379
7380                 if (level == 0)
7381                         btrfs_item_key_to_cpu(buf, &key, 0);
7382                 else
7383                         btrfs_node_key_to_cpu(buf, &key, 0);
7384
7385                 rec->info_objectid = key.objectid;
7386         }
7387         rec->info_level = level;
7388
7389         if (btrfs_is_leaf(buf))
7390                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7391         else
7392                 status = btrfs_check_node(root, &rec->parent_key, buf);
7393
7394         if (status != BTRFS_TREE_BLOCK_CLEAN) {
7395                 if (repair)
7396                         status = try_to_fix_bad_block(root, buf, status);
7397                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7398                         ret = -EIO;
7399                         fprintf(stderr, "bad block %llu\n",
7400                                 (unsigned long long)buf->start);
7401                 } else {
7402                         /*
7403                          * Signal to callers we need to start the scan over
7404                          * again since we'll have cowed blocks.
7405                          */
7406                         ret = -EAGAIN;
7407                 }
7408         } else {
7409                 rec->content_checked = 1;
7410                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7411                         rec->owner_ref_checked = 1;
7412                 else {
7413                         ret = check_owner_ref(root, rec, buf);
7414                         if (!ret)
7415                                 rec->owner_ref_checked = 1;
7416                 }
7417         }
7418         if (!ret)
7419                 maybe_free_extent_rec(extent_cache, rec);
7420         return ret;
7421 }
7422
7423 #if 0
7424 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7425                                                 u64 parent, u64 root)
7426 {
7427         struct list_head *cur = rec->backrefs.next;
7428         struct extent_backref *node;
7429         struct tree_backref *back;
7430
7431         while(cur != &rec->backrefs) {
7432                 node = to_extent_backref(cur);
7433                 cur = cur->next;
7434                 if (node->is_data)
7435                         continue;
7436                 back = to_tree_backref(node);
7437                 if (parent > 0) {
7438                         if (!node->full_backref)
7439                                 continue;
7440                         if (parent == back->parent)
7441                                 return back;
7442                 } else {
7443                         if (node->full_backref)
7444                                 continue;
7445                         if (back->root == root)
7446                                 return back;
7447                 }
7448         }
7449         return NULL;
7450 }
7451 #endif
7452
7453 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7454                                                 u64 parent, u64 root)
7455 {
7456         struct tree_backref *ref = malloc(sizeof(*ref));
7457
7458         if (!ref)
7459                 return NULL;
7460         memset(&ref->node, 0, sizeof(ref->node));
7461         if (parent > 0) {
7462                 ref->parent = parent;
7463                 ref->node.full_backref = 1;
7464         } else {
7465                 ref->root = root;
7466                 ref->node.full_backref = 0;
7467         }
7468
7469         return ref;
7470 }
7471
7472 #if 0
7473 static struct data_backref *find_data_backref(struct extent_record *rec,
7474                                                 u64 parent, u64 root,
7475                                                 u64 owner, u64 offset,
7476                                                 int found_ref,
7477                                                 u64 disk_bytenr, u64 bytes)
7478 {
7479         struct list_head *cur = rec->backrefs.next;
7480         struct extent_backref *node;
7481         struct data_backref *back;
7482
7483         while(cur != &rec->backrefs) {
7484                 node = to_extent_backref(cur);
7485                 cur = cur->next;
7486                 if (!node->is_data)
7487                         continue;
7488                 back = to_data_backref(node);
7489                 if (parent > 0) {
7490                         if (!node->full_backref)
7491                                 continue;
7492                         if (parent == back->parent)
7493                                 return back;
7494                 } else {
7495                         if (node->full_backref)
7496                                 continue;
7497                         if (back->root == root && back->owner == owner &&
7498                             back->offset == offset) {
7499                                 if (found_ref && node->found_ref &&
7500                                     (back->bytes != bytes ||
7501                                     back->disk_bytenr != disk_bytenr))
7502                                         continue;
7503                                 return back;
7504                         }
7505                 }
7506         }
7507         return NULL;
7508 }
7509 #endif
7510
7511 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7512                                                 u64 parent, u64 root,
7513                                                 u64 owner, u64 offset,
7514                                                 u64 max_size)
7515 {
7516         struct data_backref *ref = malloc(sizeof(*ref));
7517
7518         if (!ref)
7519                 return NULL;
7520         memset(&ref->node, 0, sizeof(ref->node));
7521         ref->node.is_data = 1;
7522
7523         if (parent > 0) {
7524                 ref->parent = parent;
7525                 ref->owner = 0;
7526                 ref->offset = 0;
7527                 ref->node.full_backref = 1;
7528         } else {
7529                 ref->root = root;
7530                 ref->owner = owner;
7531                 ref->offset = offset;
7532                 ref->node.full_backref = 0;
7533         }
7534         ref->bytes = max_size;
7535         ref->found_ref = 0;
7536         ref->num_refs = 0;
7537         if (max_size > rec->max_size)
7538                 rec->max_size = max_size;
7539         return ref;
7540 }
7541
7542 /* Check if the type of extent matches with its chunk */
7543 static void check_extent_type(struct extent_record *rec)
7544 {
7545         struct btrfs_block_group_cache *bg_cache;
7546
7547         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7548         if (!bg_cache)
7549                 return;
7550
7551         /* data extent, check chunk directly*/
7552         if (!rec->metadata) {
7553                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7554                         rec->wrong_chunk_type = 1;
7555                 return;
7556         }
7557
7558         /* metadata extent, check the obvious case first */
7559         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7560                                  BTRFS_BLOCK_GROUP_METADATA))) {
7561                 rec->wrong_chunk_type = 1;
7562                 return;
7563         }
7564
7565         /*
7566          * Check SYSTEM extent, as it's also marked as metadata, we can only
7567          * make sure it's a SYSTEM extent by its backref
7568          */
7569         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7570                 struct extent_backref *node;
7571                 struct tree_backref *tback;
7572                 u64 bg_type;
7573
7574                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7575                 if (node->is_data) {
7576                         /* tree block shouldn't have data backref */
7577                         rec->wrong_chunk_type = 1;
7578                         return;
7579                 }
7580                 tback = container_of(node, struct tree_backref, node);
7581
7582                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7583                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7584                 else
7585                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
7586                 if (!(bg_cache->flags & bg_type))
7587                         rec->wrong_chunk_type = 1;
7588         }
7589 }
7590
7591 /*
7592  * Allocate a new extent record, fill default values from @tmpl and insert int
7593  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7594  * the cache, otherwise it fails.
7595  */
7596 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7597                 struct extent_record *tmpl)
7598 {
7599         struct extent_record *rec;
7600         int ret = 0;
7601
7602         BUG_ON(tmpl->max_size == 0);
7603         rec = malloc(sizeof(*rec));
7604         if (!rec)
7605                 return -ENOMEM;
7606         rec->start = tmpl->start;
7607         rec->max_size = tmpl->max_size;
7608         rec->nr = max(tmpl->nr, tmpl->max_size);
7609         rec->found_rec = tmpl->found_rec;
7610         rec->content_checked = tmpl->content_checked;
7611         rec->owner_ref_checked = tmpl->owner_ref_checked;
7612         rec->num_duplicates = 0;
7613         rec->metadata = tmpl->metadata;
7614         rec->flag_block_full_backref = FLAG_UNSET;
7615         rec->bad_full_backref = 0;
7616         rec->crossing_stripes = 0;
7617         rec->wrong_chunk_type = 0;
7618         rec->is_root = tmpl->is_root;
7619         rec->refs = tmpl->refs;
7620         rec->extent_item_refs = tmpl->extent_item_refs;
7621         rec->parent_generation = tmpl->parent_generation;
7622         INIT_LIST_HEAD(&rec->backrefs);
7623         INIT_LIST_HEAD(&rec->dups);
7624         INIT_LIST_HEAD(&rec->list);
7625         rec->backref_tree = RB_ROOT;
7626         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7627         rec->cache.start = tmpl->start;
7628         rec->cache.size = tmpl->nr;
7629         ret = insert_cache_extent(extent_cache, &rec->cache);
7630         if (ret) {
7631                 free(rec);
7632                 return ret;
7633         }
7634         bytes_used += rec->nr;
7635
7636         if (tmpl->metadata)
7637                 rec->crossing_stripes = check_crossing_stripes(global_info,
7638                                 rec->start, global_info->nodesize);
7639         check_extent_type(rec);
7640         return ret;
7641 }
7642
7643 /*
7644  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7645  * some are hints:
7646  * - refs              - if found, increase refs
7647  * - is_root           - if found, set
7648  * - content_checked   - if found, set
7649  * - owner_ref_checked - if found, set
7650  *
7651  * If not found, create a new one, initialize and insert.
7652  */
7653 static int add_extent_rec(struct cache_tree *extent_cache,
7654                 struct extent_record *tmpl)
7655 {
7656         struct extent_record *rec;
7657         struct cache_extent *cache;
7658         int ret = 0;
7659         int dup = 0;
7660
7661         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7662         if (cache) {
7663                 rec = container_of(cache, struct extent_record, cache);
7664                 if (tmpl->refs)
7665                         rec->refs++;
7666                 if (rec->nr == 1)
7667                         rec->nr = max(tmpl->nr, tmpl->max_size);
7668
7669                 /*
7670                  * We need to make sure to reset nr to whatever the extent
7671                  * record says was the real size, this way we can compare it to
7672                  * the backrefs.
7673                  */
7674                 if (tmpl->found_rec) {
7675                         if (tmpl->start != rec->start || rec->found_rec) {
7676                                 struct extent_record *tmp;
7677
7678                                 dup = 1;
7679                                 if (list_empty(&rec->list))
7680                                         list_add_tail(&rec->list,
7681                                                       &duplicate_extents);
7682
7683                                 /*
7684                                  * We have to do this song and dance in case we
7685                                  * find an extent record that falls inside of
7686                                  * our current extent record but does not have
7687                                  * the same objectid.
7688                                  */
7689                                 tmp = malloc(sizeof(*tmp));
7690                                 if (!tmp)
7691                                         return -ENOMEM;
7692                                 tmp->start = tmpl->start;
7693                                 tmp->max_size = tmpl->max_size;
7694                                 tmp->nr = tmpl->nr;
7695                                 tmp->found_rec = 1;
7696                                 tmp->metadata = tmpl->metadata;
7697                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7698                                 INIT_LIST_HEAD(&tmp->list);
7699                                 list_add_tail(&tmp->list, &rec->dups);
7700                                 rec->num_duplicates++;
7701                         } else {
7702                                 rec->nr = tmpl->nr;
7703                                 rec->found_rec = 1;
7704                         }
7705                 }
7706
7707                 if (tmpl->extent_item_refs && !dup) {
7708                         if (rec->extent_item_refs) {
7709                                 fprintf(stderr, "block %llu rec "
7710                                         "extent_item_refs %llu, passed %llu\n",
7711                                         (unsigned long long)tmpl->start,
7712                                         (unsigned long long)
7713                                                         rec->extent_item_refs,
7714                                         (unsigned long long)tmpl->extent_item_refs);
7715                         }
7716                         rec->extent_item_refs = tmpl->extent_item_refs;
7717                 }
7718                 if (tmpl->is_root)
7719                         rec->is_root = 1;
7720                 if (tmpl->content_checked)
7721                         rec->content_checked = 1;
7722                 if (tmpl->owner_ref_checked)
7723                         rec->owner_ref_checked = 1;
7724                 memcpy(&rec->parent_key, &tmpl->parent_key,
7725                                 sizeof(tmpl->parent_key));
7726                 if (tmpl->parent_generation)
7727                         rec->parent_generation = tmpl->parent_generation;
7728                 if (rec->max_size < tmpl->max_size)
7729                         rec->max_size = tmpl->max_size;
7730
7731                 /*
7732                  * A metadata extent can't cross stripe_len boundary, otherwise
7733                  * kernel scrub won't be able to handle it.
7734                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7735                  * it.
7736                  */
7737                 if (tmpl->metadata)
7738                         rec->crossing_stripes = check_crossing_stripes(
7739                                         global_info, rec->start,
7740                                         global_info->nodesize);
7741                 check_extent_type(rec);
7742                 maybe_free_extent_rec(extent_cache, rec);
7743                 return ret;
7744         }
7745
7746         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7747
7748         return ret;
7749 }
7750
7751 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7752                             u64 parent, u64 root, int found_ref)
7753 {
7754         struct extent_record *rec;
7755         struct tree_backref *back;
7756         struct cache_extent *cache;
7757         int ret;
7758         bool insert = false;
7759
7760         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7761         if (!cache) {
7762                 struct extent_record tmpl;
7763
7764                 memset(&tmpl, 0, sizeof(tmpl));
7765                 tmpl.start = bytenr;
7766                 tmpl.nr = 1;
7767                 tmpl.metadata = 1;
7768                 tmpl.max_size = 1;
7769
7770                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7771                 if (ret)
7772                         return ret;
7773
7774                 /* really a bug in cache_extent implement now */
7775                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7776                 if (!cache)
7777                         return -ENOENT;
7778         }
7779
7780         rec = container_of(cache, struct extent_record, cache);
7781         if (rec->start != bytenr) {
7782                 /*
7783                  * Several cause, from unaligned bytenr to over lapping extents
7784                  */
7785                 return -EEXIST;
7786         }
7787
7788         back = find_tree_backref(rec, parent, root);
7789         if (!back) {
7790                 back = alloc_tree_backref(rec, parent, root);
7791                 if (!back)
7792                         return -ENOMEM;
7793                 insert = true;
7794         }
7795
7796         if (found_ref) {
7797                 if (back->node.found_ref) {
7798                         fprintf(stderr, "Extent back ref already exists "
7799                                 "for %llu parent %llu root %llu \n",
7800                                 (unsigned long long)bytenr,
7801                                 (unsigned long long)parent,
7802                                 (unsigned long long)root);
7803                 }
7804                 back->node.found_ref = 1;
7805         } else {
7806                 if (back->node.found_extent_tree) {
7807                         fprintf(stderr, "Extent back ref already exists "
7808                                 "for %llu parent %llu root %llu \n",
7809                                 (unsigned long long)bytenr,
7810                                 (unsigned long long)parent,
7811                                 (unsigned long long)root);
7812                 }
7813                 back->node.found_extent_tree = 1;
7814         }
7815         if (insert)
7816                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7817                         compare_extent_backref));
7818         check_extent_type(rec);
7819         maybe_free_extent_rec(extent_cache, rec);
7820         return 0;
7821 }
7822
7823 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7824                             u64 parent, u64 root, u64 owner, u64 offset,
7825                             u32 num_refs, int found_ref, u64 max_size)
7826 {
7827         struct extent_record *rec;
7828         struct data_backref *back;
7829         struct cache_extent *cache;
7830         int ret;
7831         bool insert = false;
7832
7833         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7834         if (!cache) {
7835                 struct extent_record tmpl;
7836
7837                 memset(&tmpl, 0, sizeof(tmpl));
7838                 tmpl.start = bytenr;
7839                 tmpl.nr = 1;
7840                 tmpl.max_size = max_size;
7841
7842                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7843                 if (ret)
7844                         return ret;
7845
7846                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7847                 if (!cache)
7848                         abort();
7849         }
7850
7851         rec = container_of(cache, struct extent_record, cache);
7852         if (rec->max_size < max_size)
7853                 rec->max_size = max_size;
7854
7855         /*
7856          * If found_ref is set then max_size is the real size and must match the
7857          * existing refs.  So if we have already found a ref then we need to
7858          * make sure that this ref matches the existing one, otherwise we need
7859          * to add a new backref so we can notice that the backrefs don't match
7860          * and we need to figure out who is telling the truth.  This is to
7861          * account for that awful fsync bug I introduced where we'd end up with
7862          * a btrfs_file_extent_item that would have its length include multiple
7863          * prealloc extents or point inside of a prealloc extent.
7864          */
7865         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7866                                  bytenr, max_size);
7867         if (!back) {
7868                 back = alloc_data_backref(rec, parent, root, owner, offset,
7869                                           max_size);
7870                 BUG_ON(!back);
7871                 insert = true;
7872         }
7873
7874         if (found_ref) {
7875                 BUG_ON(num_refs != 1);
7876                 if (back->node.found_ref)
7877                         BUG_ON(back->bytes != max_size);
7878                 back->node.found_ref = 1;
7879                 back->found_ref += 1;
7880                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7881                         back->bytes = max_size;
7882                         back->disk_bytenr = bytenr;
7883
7884                         /* Need to reinsert if not already in the tree */
7885                         if (!insert) {
7886                                 rb_erase(&back->node.node, &rec->backref_tree);
7887                                 insert = true;
7888                         }
7889                 }
7890                 rec->refs += 1;
7891                 rec->content_checked = 1;
7892                 rec->owner_ref_checked = 1;
7893         } else {
7894                 if (back->node.found_extent_tree) {
7895                         fprintf(stderr, "Extent back ref already exists "
7896                                 "for %llu parent %llu root %llu "
7897                                 "owner %llu offset %llu num_refs %lu\n",
7898                                 (unsigned long long)bytenr,
7899                                 (unsigned long long)parent,
7900                                 (unsigned long long)root,
7901                                 (unsigned long long)owner,
7902                                 (unsigned long long)offset,
7903                                 (unsigned long)num_refs);
7904                 }
7905                 back->num_refs = num_refs;
7906                 back->node.found_extent_tree = 1;
7907         }
7908         if (insert)
7909                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7910                         compare_extent_backref));
7911
7912         maybe_free_extent_rec(extent_cache, rec);
7913         return 0;
7914 }
7915
7916 static int add_pending(struct cache_tree *pending,
7917                        struct cache_tree *seen, u64 bytenr, u32 size)
7918 {
7919         int ret;
7920         ret = add_cache_extent(seen, bytenr, size);
7921         if (ret)
7922                 return ret;
7923         add_cache_extent(pending, bytenr, size);
7924         return 0;
7925 }
7926
7927 static int pick_next_pending(struct cache_tree *pending,
7928                         struct cache_tree *reada,
7929                         struct cache_tree *nodes,
7930                         u64 last, struct block_info *bits, int bits_nr,
7931                         int *reada_bits)
7932 {
7933         unsigned long node_start = last;
7934         struct cache_extent *cache;
7935         int ret;
7936
7937         cache = search_cache_extent(reada, 0);
7938         if (cache) {
7939                 bits[0].start = cache->start;
7940                 bits[0].size = cache->size;
7941                 *reada_bits = 1;
7942                 return 1;
7943         }
7944         *reada_bits = 0;
7945         if (node_start > 32768)
7946                 node_start -= 32768;
7947
7948         cache = search_cache_extent(nodes, node_start);
7949         if (!cache)
7950                 cache = search_cache_extent(nodes, 0);
7951
7952         if (!cache) {
7953                  cache = search_cache_extent(pending, 0);
7954                  if (!cache)
7955                          return 0;
7956                  ret = 0;
7957                  do {
7958                          bits[ret].start = cache->start;
7959                          bits[ret].size = cache->size;
7960                          cache = next_cache_extent(cache);
7961                          ret++;
7962                  } while (cache && ret < bits_nr);
7963                  return ret;
7964         }
7965
7966         ret = 0;
7967         do {
7968                 bits[ret].start = cache->start;
7969                 bits[ret].size = cache->size;
7970                 cache = next_cache_extent(cache);
7971                 ret++;
7972         } while (cache && ret < bits_nr);
7973
7974         if (bits_nr - ret > 8) {
7975                 u64 lookup = bits[0].start + bits[0].size;
7976                 struct cache_extent *next;
7977                 next = search_cache_extent(pending, lookup);
7978                 while(next) {
7979                         if (next->start - lookup > 32768)
7980                                 break;
7981                         bits[ret].start = next->start;
7982                         bits[ret].size = next->size;
7983                         lookup = next->start + next->size;
7984                         ret++;
7985                         if (ret == bits_nr)
7986                                 break;
7987                         next = next_cache_extent(next);
7988                         if (!next)
7989                                 break;
7990                 }
7991         }
7992         return ret;
7993 }
7994
7995 static void free_chunk_record(struct cache_extent *cache)
7996 {
7997         struct chunk_record *rec;
7998
7999         rec = container_of(cache, struct chunk_record, cache);
8000         list_del_init(&rec->list);
8001         list_del_init(&rec->dextents);
8002         free(rec);
8003 }
8004
8005 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
8006 {
8007         cache_tree_free_extents(chunk_cache, free_chunk_record);
8008 }
8009
8010 static void free_device_record(struct rb_node *node)
8011 {
8012         struct device_record *rec;
8013
8014         rec = container_of(node, struct device_record, node);
8015         free(rec);
8016 }
8017
8018 FREE_RB_BASED_TREE(device_cache, free_device_record);
8019
8020 int insert_block_group_record(struct block_group_tree *tree,
8021                               struct block_group_record *bg_rec)
8022 {
8023         int ret;
8024
8025         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
8026         if (ret)
8027                 return ret;
8028
8029         list_add_tail(&bg_rec->list, &tree->block_groups);
8030         return 0;
8031 }
8032
8033 static void free_block_group_record(struct cache_extent *cache)
8034 {
8035         struct block_group_record *rec;
8036
8037         rec = container_of(cache, struct block_group_record, cache);
8038         list_del_init(&rec->list);
8039         free(rec);
8040 }
8041
8042 void free_block_group_tree(struct block_group_tree *tree)
8043 {
8044         cache_tree_free_extents(&tree->tree, free_block_group_record);
8045 }
8046
8047 int insert_device_extent_record(struct device_extent_tree *tree,
8048                                 struct device_extent_record *de_rec)
8049 {
8050         int ret;
8051
8052         /*
8053          * Device extent is a bit different from the other extents, because
8054          * the extents which belong to the different devices may have the
8055          * same start and size, so we need use the special extent cache
8056          * search/insert functions.
8057          */
8058         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
8059         if (ret)
8060                 return ret;
8061
8062         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
8063         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
8064         return 0;
8065 }
8066
8067 static void free_device_extent_record(struct cache_extent *cache)
8068 {
8069         struct device_extent_record *rec;
8070
8071         rec = container_of(cache, struct device_extent_record, cache);
8072         if (!list_empty(&rec->chunk_list))
8073                 list_del_init(&rec->chunk_list);
8074         if (!list_empty(&rec->device_list))
8075                 list_del_init(&rec->device_list);
8076         free(rec);
8077 }
8078
8079 void free_device_extent_tree(struct device_extent_tree *tree)
8080 {
8081         cache_tree_free_extents(&tree->tree, free_device_extent_record);
8082 }
8083
8084 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8085 static int process_extent_ref_v0(struct cache_tree *extent_cache,
8086                                  struct extent_buffer *leaf, int slot)
8087 {
8088         struct btrfs_extent_ref_v0 *ref0;
8089         struct btrfs_key key;
8090         int ret;
8091
8092         btrfs_item_key_to_cpu(leaf, &key, slot);
8093         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
8094         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
8095                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
8096                                 0, 0);
8097         } else {
8098                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
8099                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
8100         }
8101         return ret;
8102 }
8103 #endif
8104
8105 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
8106                                             struct btrfs_key *key,
8107                                             int slot)
8108 {
8109         struct btrfs_chunk *ptr;
8110         struct chunk_record *rec;
8111         int num_stripes, i;
8112
8113         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
8114         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
8115
8116         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
8117         if (!rec) {
8118                 fprintf(stderr, "memory allocation failed\n");
8119                 exit(-1);
8120         }
8121
8122         INIT_LIST_HEAD(&rec->list);
8123         INIT_LIST_HEAD(&rec->dextents);
8124         rec->bg_rec = NULL;
8125
8126         rec->cache.start = key->offset;
8127         rec->cache.size = btrfs_chunk_length(leaf, ptr);
8128
8129         rec->generation = btrfs_header_generation(leaf);
8130
8131         rec->objectid = key->objectid;
8132         rec->type = key->type;
8133         rec->offset = key->offset;
8134
8135         rec->length = rec->cache.size;
8136         rec->owner = btrfs_chunk_owner(leaf, ptr);
8137         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
8138         rec->type_flags = btrfs_chunk_type(leaf, ptr);
8139         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
8140         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
8141         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
8142         rec->num_stripes = num_stripes;
8143         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
8144
8145         for (i = 0; i < rec->num_stripes; ++i) {
8146                 rec->stripes[i].devid =
8147                         btrfs_stripe_devid_nr(leaf, ptr, i);
8148                 rec->stripes[i].offset =
8149                         btrfs_stripe_offset_nr(leaf, ptr, i);
8150                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
8151                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
8152                                 BTRFS_UUID_SIZE);
8153         }
8154
8155         return rec;
8156 }
8157
8158 static int process_chunk_item(struct cache_tree *chunk_cache,
8159                               struct btrfs_key *key, struct extent_buffer *eb,
8160                               int slot)
8161 {
8162         struct chunk_record *rec;
8163         struct btrfs_chunk *chunk;
8164         int ret = 0;
8165
8166         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
8167         /*
8168          * Do extra check for this chunk item,
8169          *
8170          * It's still possible one can craft a leaf with CHUNK_ITEM, with
8171          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
8172          * and owner<->key_type check.
8173          */
8174         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
8175                                       key->offset);
8176         if (ret < 0) {
8177                 error("chunk(%llu, %llu) is not valid, ignore it",
8178                       key->offset, btrfs_chunk_length(eb, chunk));
8179                 return 0;
8180         }
8181         rec = btrfs_new_chunk_record(eb, key, slot);
8182         ret = insert_cache_extent(chunk_cache, &rec->cache);
8183         if (ret) {
8184                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
8185                         rec->offset, rec->length);
8186                 free(rec);
8187         }
8188
8189         return ret;
8190 }
8191
8192 static int process_device_item(struct rb_root *dev_cache,
8193                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
8194 {
8195         struct btrfs_dev_item *ptr;
8196         struct device_record *rec;
8197         int ret = 0;
8198
8199         ptr = btrfs_item_ptr(eb,
8200                 slot, struct btrfs_dev_item);
8201
8202         rec = malloc(sizeof(*rec));
8203         if (!rec) {
8204                 fprintf(stderr, "memory allocation failed\n");
8205                 return -ENOMEM;
8206         }
8207
8208         rec->devid = key->offset;
8209         rec->generation = btrfs_header_generation(eb);
8210
8211         rec->objectid = key->objectid;
8212         rec->type = key->type;
8213         rec->offset = key->offset;
8214
8215         rec->devid = btrfs_device_id(eb, ptr);
8216         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
8217         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
8218
8219         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
8220         if (ret) {
8221                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
8222                 free(rec);
8223         }
8224
8225         return ret;
8226 }
8227
8228 struct block_group_record *
8229 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
8230                              int slot)
8231 {
8232         struct btrfs_block_group_item *ptr;
8233         struct block_group_record *rec;
8234
8235         rec = calloc(1, sizeof(*rec));
8236         if (!rec) {
8237                 fprintf(stderr, "memory allocation failed\n");
8238                 exit(-1);
8239         }
8240
8241         rec->cache.start = key->objectid;
8242         rec->cache.size = key->offset;
8243
8244         rec->generation = btrfs_header_generation(leaf);
8245
8246         rec->objectid = key->objectid;
8247         rec->type = key->type;
8248         rec->offset = key->offset;
8249
8250         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
8251         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
8252
8253         INIT_LIST_HEAD(&rec->list);
8254
8255         return rec;
8256 }
8257
8258 static int process_block_group_item(struct block_group_tree *block_group_cache,
8259                                     struct btrfs_key *key,
8260                                     struct extent_buffer *eb, int slot)
8261 {
8262         struct block_group_record *rec;
8263         int ret = 0;
8264
8265         rec = btrfs_new_block_group_record(eb, key, slot);
8266         ret = insert_block_group_record(block_group_cache, rec);
8267         if (ret) {
8268                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
8269                         rec->objectid, rec->offset);
8270                 free(rec);
8271         }
8272
8273         return ret;
8274 }
8275
8276 struct device_extent_record *
8277 btrfs_new_device_extent_record(struct extent_buffer *leaf,
8278                                struct btrfs_key *key, int slot)
8279 {
8280         struct device_extent_record *rec;
8281         struct btrfs_dev_extent *ptr;
8282
8283         rec = calloc(1, sizeof(*rec));
8284         if (!rec) {
8285                 fprintf(stderr, "memory allocation failed\n");
8286                 exit(-1);
8287         }
8288
8289         rec->cache.objectid = key->objectid;
8290         rec->cache.start = key->offset;
8291
8292         rec->generation = btrfs_header_generation(leaf);
8293
8294         rec->objectid = key->objectid;
8295         rec->type = key->type;
8296         rec->offset = key->offset;
8297
8298         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
8299         rec->chunk_objecteid =
8300                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
8301         rec->chunk_offset =
8302                 btrfs_dev_extent_chunk_offset(leaf, ptr);
8303         rec->length = btrfs_dev_extent_length(leaf, ptr);
8304         rec->cache.size = rec->length;
8305
8306         INIT_LIST_HEAD(&rec->chunk_list);
8307         INIT_LIST_HEAD(&rec->device_list);
8308
8309         return rec;
8310 }
8311
8312 static int
8313 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
8314                            struct btrfs_key *key, struct extent_buffer *eb,
8315                            int slot)
8316 {
8317         struct device_extent_record *rec;
8318         int ret;
8319
8320         rec = btrfs_new_device_extent_record(eb, key, slot);
8321         ret = insert_device_extent_record(dev_extent_cache, rec);
8322         if (ret) {
8323                 fprintf(stderr,
8324                         "Device extent[%llu, %llu, %llu] existed.\n",
8325                         rec->objectid, rec->offset, rec->length);
8326                 free(rec);
8327         }
8328
8329         return ret;
8330 }
8331
8332 static int process_extent_item(struct btrfs_root *root,
8333                                struct cache_tree *extent_cache,
8334                                struct extent_buffer *eb, int slot)
8335 {
8336         struct btrfs_extent_item *ei;
8337         struct btrfs_extent_inline_ref *iref;
8338         struct btrfs_extent_data_ref *dref;
8339         struct btrfs_shared_data_ref *sref;
8340         struct btrfs_key key;
8341         struct extent_record tmpl;
8342         unsigned long end;
8343         unsigned long ptr;
8344         int ret;
8345         int type;
8346         u32 item_size = btrfs_item_size_nr(eb, slot);
8347         u64 refs = 0;
8348         u64 offset;
8349         u64 num_bytes;
8350         int metadata = 0;
8351
8352         btrfs_item_key_to_cpu(eb, &key, slot);
8353
8354         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8355                 metadata = 1;
8356                 num_bytes = root->fs_info->nodesize;
8357         } else {
8358                 num_bytes = key.offset;
8359         }
8360
8361         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8362                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8363                       key.objectid, root->fs_info->sectorsize);
8364                 return -EIO;
8365         }
8366         if (item_size < sizeof(*ei)) {
8367 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8368                 struct btrfs_extent_item_v0 *ei0;
8369                 BUG_ON(item_size != sizeof(*ei0));
8370                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8371                 refs = btrfs_extent_refs_v0(eb, ei0);
8372 #else
8373                 BUG();
8374 #endif
8375                 memset(&tmpl, 0, sizeof(tmpl));
8376                 tmpl.start = key.objectid;
8377                 tmpl.nr = num_bytes;
8378                 tmpl.extent_item_refs = refs;
8379                 tmpl.metadata = metadata;
8380                 tmpl.found_rec = 1;
8381                 tmpl.max_size = num_bytes;
8382
8383                 return add_extent_rec(extent_cache, &tmpl);
8384         }
8385
8386         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8387         refs = btrfs_extent_refs(eb, ei);
8388         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8389                 metadata = 1;
8390         else
8391                 metadata = 0;
8392         if (metadata && num_bytes != root->fs_info->nodesize) {
8393                 error("ignore invalid metadata extent, length %llu does not equal to %u",
8394                       num_bytes, root->fs_info->nodesize);
8395                 return -EIO;
8396         }
8397         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8398                 error("ignore invalid data extent, length %llu is not aligned to %u",
8399                       num_bytes, root->fs_info->sectorsize);
8400                 return -EIO;
8401         }
8402
8403         memset(&tmpl, 0, sizeof(tmpl));
8404         tmpl.start = key.objectid;
8405         tmpl.nr = num_bytes;
8406         tmpl.extent_item_refs = refs;
8407         tmpl.metadata = metadata;
8408         tmpl.found_rec = 1;
8409         tmpl.max_size = num_bytes;
8410         add_extent_rec(extent_cache, &tmpl);
8411
8412         ptr = (unsigned long)(ei + 1);
8413         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8414             key.type == BTRFS_EXTENT_ITEM_KEY)
8415                 ptr += sizeof(struct btrfs_tree_block_info);
8416
8417         end = (unsigned long)ei + item_size;
8418         while (ptr < end) {
8419                 iref = (struct btrfs_extent_inline_ref *)ptr;
8420                 type = btrfs_extent_inline_ref_type(eb, iref);
8421                 offset = btrfs_extent_inline_ref_offset(eb, iref);
8422                 switch (type) {
8423                 case BTRFS_TREE_BLOCK_REF_KEY:
8424                         ret = add_tree_backref(extent_cache, key.objectid,
8425                                         0, offset, 0);
8426                         if (ret < 0)
8427                                 error(
8428                         "add_tree_backref failed (extent items tree block): %s",
8429                                       strerror(-ret));
8430                         break;
8431                 case BTRFS_SHARED_BLOCK_REF_KEY:
8432                         ret = add_tree_backref(extent_cache, key.objectid,
8433                                         offset, 0, 0);
8434                         if (ret < 0)
8435                                 error(
8436                         "add_tree_backref failed (extent items shared block): %s",
8437                                       strerror(-ret));
8438                         break;
8439                 case BTRFS_EXTENT_DATA_REF_KEY:
8440                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8441                         add_data_backref(extent_cache, key.objectid, 0,
8442                                         btrfs_extent_data_ref_root(eb, dref),
8443                                         btrfs_extent_data_ref_objectid(eb,
8444                                                                        dref),
8445                                         btrfs_extent_data_ref_offset(eb, dref),
8446                                         btrfs_extent_data_ref_count(eb, dref),
8447                                         0, num_bytes);
8448                         break;
8449                 case BTRFS_SHARED_DATA_REF_KEY:
8450                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
8451                         add_data_backref(extent_cache, key.objectid, offset,
8452                                         0, 0, 0,
8453                                         btrfs_shared_data_ref_count(eb, sref),
8454                                         0, num_bytes);
8455                         break;
8456                 default:
8457                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8458                                 key.objectid, key.type, num_bytes);
8459                         goto out;
8460                 }
8461                 ptr += btrfs_extent_inline_ref_size(type);
8462         }
8463         WARN_ON(ptr > end);
8464 out:
8465         return 0;
8466 }
8467
8468 static int check_cache_range(struct btrfs_root *root,
8469                              struct btrfs_block_group_cache *cache,
8470                              u64 offset, u64 bytes)
8471 {
8472         struct btrfs_free_space *entry;
8473         u64 *logical;
8474         u64 bytenr;
8475         int stripe_len;
8476         int i, nr, ret;
8477
8478         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8479                 bytenr = btrfs_sb_offset(i);
8480                 ret = btrfs_rmap_block(root->fs_info,
8481                                        cache->key.objectid, bytenr, 0,
8482                                        &logical, &nr, &stripe_len);
8483                 if (ret)
8484                         return ret;
8485
8486                 while (nr--) {
8487                         if (logical[nr] + stripe_len <= offset)
8488                                 continue;
8489                         if (offset + bytes <= logical[nr])
8490                                 continue;
8491                         if (logical[nr] == offset) {
8492                                 if (stripe_len >= bytes) {
8493                                         free(logical);
8494                                         return 0;
8495                                 }
8496                                 bytes -= stripe_len;
8497                                 offset += stripe_len;
8498                         } else if (logical[nr] < offset) {
8499                                 if (logical[nr] + stripe_len >=
8500                                     offset + bytes) {
8501                                         free(logical);
8502                                         return 0;
8503                                 }
8504                                 bytes = (offset + bytes) -
8505                                         (logical[nr] + stripe_len);
8506                                 offset = logical[nr] + stripe_len;
8507                         } else {
8508                                 /*
8509                                  * Could be tricky, the super may land in the
8510                                  * middle of the area we're checking.  First
8511                                  * check the easiest case, it's at the end.
8512                                  */
8513                                 if (logical[nr] + stripe_len >=
8514                                     bytes + offset) {
8515                                         bytes = logical[nr] - offset;
8516                                         continue;
8517                                 }
8518
8519                                 /* Check the left side */
8520                                 ret = check_cache_range(root, cache,
8521                                                         offset,
8522                                                         logical[nr] - offset);
8523                                 if (ret) {
8524                                         free(logical);
8525                                         return ret;
8526                                 }
8527
8528                                 /* Now we continue with the right side */
8529                                 bytes = (offset + bytes) -
8530                                         (logical[nr] + stripe_len);
8531                                 offset = logical[nr] + stripe_len;
8532                         }
8533                 }
8534
8535                 free(logical);
8536         }
8537
8538         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8539         if (!entry) {
8540                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8541                         offset, offset+bytes);
8542                 return -EINVAL;
8543         }
8544
8545         if (entry->offset != offset) {
8546                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8547                         entry->offset);
8548                 return -EINVAL;
8549         }
8550
8551         if (entry->bytes != bytes) {
8552                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8553                         bytes, entry->bytes, offset);
8554                 return -EINVAL;
8555         }
8556
8557         unlink_free_space(cache->free_space_ctl, entry);
8558         free(entry);
8559         return 0;
8560 }
8561
8562 static int verify_space_cache(struct btrfs_root *root,
8563                               struct btrfs_block_group_cache *cache)
8564 {
8565         struct btrfs_path path;
8566         struct extent_buffer *leaf;
8567         struct btrfs_key key;
8568         u64 last;
8569         int ret = 0;
8570
8571         root = root->fs_info->extent_root;
8572
8573         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8574
8575         btrfs_init_path(&path);
8576         key.objectid = last;
8577         key.offset = 0;
8578         key.type = BTRFS_EXTENT_ITEM_KEY;
8579         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8580         if (ret < 0)
8581                 goto out;
8582         ret = 0;
8583         while (1) {
8584                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8585                         ret = btrfs_next_leaf(root, &path);
8586                         if (ret < 0)
8587                                 goto out;
8588                         if (ret > 0) {
8589                                 ret = 0;
8590                                 break;
8591                         }
8592                 }
8593                 leaf = path.nodes[0];
8594                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8595                 if (key.objectid >= cache->key.offset + cache->key.objectid)
8596                         break;
8597                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8598                     key.type != BTRFS_METADATA_ITEM_KEY) {
8599                         path.slots[0]++;
8600                         continue;
8601                 }
8602
8603                 if (last == key.objectid) {
8604                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
8605                                 last = key.objectid + key.offset;
8606                         else
8607                                 last = key.objectid + root->fs_info->nodesize;
8608                         path.slots[0]++;
8609                         continue;
8610                 }
8611
8612                 ret = check_cache_range(root, cache, last,
8613                                         key.objectid - last);
8614                 if (ret)
8615                         break;
8616                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8617                         last = key.objectid + key.offset;
8618                 else
8619                         last = key.objectid + root->fs_info->nodesize;
8620                 path.slots[0]++;
8621         }
8622
8623         if (last < cache->key.objectid + cache->key.offset)
8624                 ret = check_cache_range(root, cache, last,
8625                                         cache->key.objectid +
8626                                         cache->key.offset - last);
8627
8628 out:
8629         btrfs_release_path(&path);
8630
8631         if (!ret &&
8632             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8633                 fprintf(stderr, "There are still entries left in the space "
8634                         "cache\n");
8635                 ret = -EINVAL;
8636         }
8637
8638         return ret;
8639 }
8640
8641 static int check_space_cache(struct btrfs_root *root)
8642 {
8643         struct btrfs_block_group_cache *cache;
8644         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8645         int ret;
8646         int error = 0;
8647
8648         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8649             btrfs_super_generation(root->fs_info->super_copy) !=
8650             btrfs_super_cache_generation(root->fs_info->super_copy)) {
8651                 printf("cache and super generation don't match, space cache "
8652                        "will be invalidated\n");
8653                 return 0;
8654         }
8655
8656         if (ctx.progress_enabled) {
8657                 ctx.tp = TASK_FREE_SPACE;
8658                 task_start(ctx.info);
8659         }
8660
8661         while (1) {
8662                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8663                 if (!cache)
8664                         break;
8665
8666                 start = cache->key.objectid + cache->key.offset;
8667                 if (!cache->free_space_ctl) {
8668                         if (btrfs_init_free_space_ctl(cache,
8669                                                 root->fs_info->sectorsize)) {
8670                                 ret = -ENOMEM;
8671                                 break;
8672                         }
8673                 } else {
8674                         btrfs_remove_free_space_cache(cache);
8675                 }
8676
8677                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8678                         ret = exclude_super_stripes(root, cache);
8679                         if (ret) {
8680                                 fprintf(stderr, "could not exclude super stripes: %s\n",
8681                                         strerror(-ret));
8682                                 error++;
8683                                 continue;
8684                         }
8685                         ret = load_free_space_tree(root->fs_info, cache);
8686                         free_excluded_extents(root, cache);
8687                         if (ret < 0) {
8688                                 fprintf(stderr, "could not load free space tree: %s\n",
8689                                         strerror(-ret));
8690                                 error++;
8691                                 continue;
8692                         }
8693                         error += ret;
8694                 } else {
8695                         ret = load_free_space_cache(root->fs_info, cache);
8696                         if (!ret)
8697                                 continue;
8698                 }
8699
8700                 ret = verify_space_cache(root, cache);
8701                 if (ret) {
8702                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8703                                 cache->key.objectid);
8704                         error++;
8705                 }
8706         }
8707
8708         task_stop(ctx.info);
8709
8710         return error ? -EINVAL : 0;
8711 }
8712
8713 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8714                         u64 num_bytes, unsigned long leaf_offset,
8715                         struct extent_buffer *eb) {
8716
8717         struct btrfs_fs_info *fs_info = root->fs_info;
8718         u64 offset = 0;
8719         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8720         char *data;
8721         unsigned long csum_offset;
8722         u32 csum;
8723         u32 csum_expected;
8724         u64 read_len;
8725         u64 data_checked = 0;
8726         u64 tmp;
8727         int ret = 0;
8728         int mirror;
8729         int num_copies;
8730
8731         if (num_bytes % fs_info->sectorsize)
8732                 return -EINVAL;
8733
8734         data = malloc(num_bytes);
8735         if (!data)
8736                 return -ENOMEM;
8737
8738         while (offset < num_bytes) {
8739                 mirror = 0;
8740 again:
8741                 read_len = num_bytes - offset;
8742                 /* read as much space once a time */
8743                 ret = read_extent_data(fs_info, data + offset,
8744                                 bytenr + offset, &read_len, mirror);
8745                 if (ret)
8746                         goto out;
8747                 data_checked = 0;
8748                 /* verify every 4k data's checksum */
8749                 while (data_checked < read_len) {
8750                         csum = ~(u32)0;
8751                         tmp = offset + data_checked;
8752
8753                         csum = btrfs_csum_data((char *)data + tmp,
8754                                                csum, fs_info->sectorsize);
8755                         btrfs_csum_final(csum, (u8 *)&csum);
8756
8757                         csum_offset = leaf_offset +
8758                                  tmp / fs_info->sectorsize * csum_size;
8759                         read_extent_buffer(eb, (char *)&csum_expected,
8760                                            csum_offset, csum_size);
8761                         /* try another mirror */
8762                         if (csum != csum_expected) {
8763                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8764                                                 mirror, bytenr + tmp,
8765                                                 csum, csum_expected);
8766                                 num_copies = btrfs_num_copies(root->fs_info,
8767                                                 bytenr, num_bytes);
8768                                 if (mirror < num_copies - 1) {
8769                                         mirror += 1;
8770                                         goto again;
8771                                 }
8772                         }
8773                         data_checked += fs_info->sectorsize;
8774                 }
8775                 offset += read_len;
8776         }
8777 out:
8778         free(data);
8779         return ret;
8780 }
8781
8782 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8783                                u64 num_bytes)
8784 {
8785         struct btrfs_path path;
8786         struct extent_buffer *leaf;
8787         struct btrfs_key key;
8788         int ret;
8789
8790         btrfs_init_path(&path);
8791         key.objectid = bytenr;
8792         key.type = BTRFS_EXTENT_ITEM_KEY;
8793         key.offset = (u64)-1;
8794
8795 again:
8796         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8797                                 0, 0);
8798         if (ret < 0) {
8799                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8800                 btrfs_release_path(&path);
8801                 return ret;
8802         } else if (ret) {
8803                 if (path.slots[0] > 0) {
8804                         path.slots[0]--;
8805                 } else {
8806                         ret = btrfs_prev_leaf(root, &path);
8807                         if (ret < 0) {
8808                                 goto out;
8809                         } else if (ret > 0) {
8810                                 ret = 0;
8811                                 goto out;
8812                         }
8813                 }
8814         }
8815
8816         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8817
8818         /*
8819          * Block group items come before extent items if they have the same
8820          * bytenr, so walk back one more just in case.  Dear future traveller,
8821          * first congrats on mastering time travel.  Now if it's not too much
8822          * trouble could you go back to 2006 and tell Chris to make the
8823          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8824          * EXTENT_ITEM_KEY please?
8825          */
8826         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8827                 if (path.slots[0] > 0) {
8828                         path.slots[0]--;
8829                 } else {
8830                         ret = btrfs_prev_leaf(root, &path);
8831                         if (ret < 0) {
8832                                 goto out;
8833                         } else if (ret > 0) {
8834                                 ret = 0;
8835                                 goto out;
8836                         }
8837                 }
8838                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8839         }
8840
8841         while (num_bytes) {
8842                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8843                         ret = btrfs_next_leaf(root, &path);
8844                         if (ret < 0) {
8845                                 fprintf(stderr, "Error going to next leaf "
8846                                         "%d\n", ret);
8847                                 btrfs_release_path(&path);
8848                                 return ret;
8849                         } else if (ret) {
8850                                 break;
8851                         }
8852                 }
8853                 leaf = path.nodes[0];
8854                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8855                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8856                         path.slots[0]++;
8857                         continue;
8858                 }
8859                 if (key.objectid + key.offset < bytenr) {
8860                         path.slots[0]++;
8861                         continue;
8862                 }
8863                 if (key.objectid > bytenr + num_bytes)
8864                         break;
8865
8866                 if (key.objectid == bytenr) {
8867                         if (key.offset >= num_bytes) {
8868                                 num_bytes = 0;
8869                                 break;
8870                         }
8871                         num_bytes -= key.offset;
8872                         bytenr += key.offset;
8873                 } else if (key.objectid < bytenr) {
8874                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8875                                 num_bytes = 0;
8876                                 break;
8877                         }
8878                         num_bytes = (bytenr + num_bytes) -
8879                                 (key.objectid + key.offset);
8880                         bytenr = key.objectid + key.offset;
8881                 } else {
8882                         if (key.objectid + key.offset < bytenr + num_bytes) {
8883                                 u64 new_start = key.objectid + key.offset;
8884                                 u64 new_bytes = bytenr + num_bytes - new_start;
8885
8886                                 /*
8887                                  * Weird case, the extent is in the middle of
8888                                  * our range, we'll have to search one side
8889                                  * and then the other.  Not sure if this happens
8890                                  * in real life, but no harm in coding it up
8891                                  * anyway just in case.
8892                                  */
8893                                 btrfs_release_path(&path);
8894                                 ret = check_extent_exists(root, new_start,
8895                                                           new_bytes);
8896                                 if (ret) {
8897                                         fprintf(stderr, "Right section didn't "
8898                                                 "have a record\n");
8899                                         break;
8900                                 }
8901                                 num_bytes = key.objectid - bytenr;
8902                                 goto again;
8903                         }
8904                         num_bytes = key.objectid - bytenr;
8905                 }
8906                 path.slots[0]++;
8907         }
8908         ret = 0;
8909
8910 out:
8911         if (num_bytes && !ret) {
8912                 fprintf(stderr, "There are no extents for csum range "
8913                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8914                 ret = 1;
8915         }
8916
8917         btrfs_release_path(&path);
8918         return ret;
8919 }
8920
8921 static int check_csums(struct btrfs_root *root)
8922 {
8923         struct btrfs_path path;
8924         struct extent_buffer *leaf;
8925         struct btrfs_key key;
8926         u64 offset = 0, num_bytes = 0;
8927         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8928         int errors = 0;
8929         int ret;
8930         u64 data_len;
8931         unsigned long leaf_offset;
8932
8933         root = root->fs_info->csum_root;
8934         if (!extent_buffer_uptodate(root->node)) {
8935                 fprintf(stderr, "No valid csum tree found\n");
8936                 return -ENOENT;
8937         }
8938
8939         btrfs_init_path(&path);
8940         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8941         key.type = BTRFS_EXTENT_CSUM_KEY;
8942         key.offset = 0;
8943         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8944         if (ret < 0) {
8945                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8946                 btrfs_release_path(&path);
8947                 return ret;
8948         }
8949
8950         if (ret > 0 && path.slots[0])
8951                 path.slots[0]--;
8952         ret = 0;
8953
8954         while (1) {
8955                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8956                         ret = btrfs_next_leaf(root, &path);
8957                         if (ret < 0) {
8958                                 fprintf(stderr, "Error going to next leaf "
8959                                         "%d\n", ret);
8960                                 break;
8961                         }
8962                         if (ret)
8963                                 break;
8964                 }
8965                 leaf = path.nodes[0];
8966
8967                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8968                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8969                         path.slots[0]++;
8970                         continue;
8971                 }
8972
8973                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8974                               csum_size) * root->fs_info->sectorsize;
8975                 if (!check_data_csum)
8976                         goto skip_csum_check;
8977                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8978                 ret = check_extent_csums(root, key.offset, data_len,
8979                                          leaf_offset, leaf);
8980                 if (ret)
8981                         break;
8982 skip_csum_check:
8983                 if (!num_bytes) {
8984                         offset = key.offset;
8985                 } else if (key.offset != offset + num_bytes) {
8986                         ret = check_extent_exists(root, offset, num_bytes);
8987                         if (ret) {
8988                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8989                                         "there is no extent record\n",
8990                                         offset, offset+num_bytes);
8991                                 errors++;
8992                         }
8993                         offset = key.offset;
8994                         num_bytes = 0;
8995                 }
8996                 num_bytes += data_len;
8997                 path.slots[0]++;
8998         }
8999
9000         btrfs_release_path(&path);
9001         return errors;
9002 }
9003
9004 static int is_dropped_key(struct btrfs_key *key,
9005                           struct btrfs_key *drop_key) {
9006         if (key->objectid < drop_key->objectid)
9007                 return 1;
9008         else if (key->objectid == drop_key->objectid) {
9009                 if (key->type < drop_key->type)
9010                         return 1;
9011                 else if (key->type == drop_key->type) {
9012                         if (key->offset < drop_key->offset)
9013                                 return 1;
9014                 }
9015         }
9016         return 0;
9017 }
9018
9019 /*
9020  * Here are the rules for FULL_BACKREF.
9021  *
9022  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
9023  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
9024  *      FULL_BACKREF set.
9025  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
9026  *    if it happened after the relocation occurred since we'll have dropped the
9027  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
9028  *    have no real way to know for sure.
9029  *
9030  * We process the blocks one root at a time, and we start from the lowest root
9031  * objectid and go to the highest.  So we can just lookup the owner backref for
9032  * the record and if we don't find it then we know it doesn't exist and we have
9033  * a FULL BACKREF.
9034  *
9035  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
9036  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
9037  * be set or not and then we can check later once we've gathered all the refs.
9038  */
9039 static int calc_extent_flag(struct cache_tree *extent_cache,
9040                            struct extent_buffer *buf,
9041                            struct root_item_record *ri,
9042                            u64 *flags)
9043 {
9044         struct extent_record *rec;
9045         struct cache_extent *cache;
9046         struct tree_backref *tback;
9047         u64 owner = 0;
9048
9049         cache = lookup_cache_extent(extent_cache, buf->start, 1);
9050         /* we have added this extent before */
9051         if (!cache)
9052                 return -ENOENT;
9053
9054         rec = container_of(cache, struct extent_record, cache);
9055
9056         /*
9057          * Except file/reloc tree, we can not have
9058          * FULL BACKREF MODE
9059          */
9060         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
9061                 goto normal;
9062         /*
9063          * root node
9064          */
9065         if (buf->start == ri->bytenr)
9066                 goto normal;
9067
9068         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
9069                 goto full_backref;
9070
9071         owner = btrfs_header_owner(buf);
9072         if (owner == ri->objectid)
9073                 goto normal;
9074
9075         tback = find_tree_backref(rec, 0, owner);
9076         if (!tback)
9077                 goto full_backref;
9078 normal:
9079         *flags = 0;
9080         if (rec->flag_block_full_backref != FLAG_UNSET &&
9081             rec->flag_block_full_backref != 0)
9082                 rec->bad_full_backref = 1;
9083         return 0;
9084 full_backref:
9085         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9086         if (rec->flag_block_full_backref != FLAG_UNSET &&
9087             rec->flag_block_full_backref != 1)
9088                 rec->bad_full_backref = 1;
9089         return 0;
9090 }
9091
9092 static void report_mismatch_key_root(u8 key_type, u64 rootid)
9093 {
9094         fprintf(stderr, "Invalid key type(");
9095         print_key_type(stderr, 0, key_type);
9096         fprintf(stderr, ") found in root(");
9097         print_objectid(stderr, rootid, 0);
9098         fprintf(stderr, ")\n");
9099 }
9100
9101 /*
9102  * Check if the key is valid with its extent buffer.
9103  *
9104  * This is a early check in case invalid key exists in a extent buffer
9105  * This is not comprehensive yet, but should prevent wrong key/item passed
9106  * further
9107  */
9108 static int check_type_with_root(u64 rootid, u8 key_type)
9109 {
9110         switch (key_type) {
9111         /* Only valid in chunk tree */
9112         case BTRFS_DEV_ITEM_KEY:
9113         case BTRFS_CHUNK_ITEM_KEY:
9114                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
9115                         goto err;
9116                 break;
9117         /* valid in csum and log tree */
9118         case BTRFS_CSUM_TREE_OBJECTID:
9119                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
9120                       is_fstree(rootid)))
9121                         goto err;
9122                 break;
9123         case BTRFS_EXTENT_ITEM_KEY:
9124         case BTRFS_METADATA_ITEM_KEY:
9125         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9126                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
9127                         goto err;
9128                 break;
9129         case BTRFS_ROOT_ITEM_KEY:
9130                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
9131                         goto err;
9132                 break;
9133         case BTRFS_DEV_EXTENT_KEY:
9134                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
9135                         goto err;
9136                 break;
9137         }
9138         return 0;
9139 err:
9140         report_mismatch_key_root(key_type, rootid);
9141         return -EINVAL;
9142 }
9143
9144 static int run_next_block(struct btrfs_root *root,
9145                           struct block_info *bits,
9146                           int bits_nr,
9147                           u64 *last,
9148                           struct cache_tree *pending,
9149                           struct cache_tree *seen,
9150                           struct cache_tree *reada,
9151                           struct cache_tree *nodes,
9152                           struct cache_tree *extent_cache,
9153                           struct cache_tree *chunk_cache,
9154                           struct rb_root *dev_cache,
9155                           struct block_group_tree *block_group_cache,
9156                           struct device_extent_tree *dev_extent_cache,
9157                           struct root_item_record *ri)
9158 {
9159         struct btrfs_fs_info *fs_info = root->fs_info;
9160         struct extent_buffer *buf;
9161         struct extent_record *rec = NULL;
9162         u64 bytenr;
9163         u32 size;
9164         u64 parent;
9165         u64 owner;
9166         u64 flags;
9167         u64 ptr;
9168         u64 gen = 0;
9169         int ret = 0;
9170         int i;
9171         int nritems;
9172         struct btrfs_key key;
9173         struct cache_extent *cache;
9174         int reada_bits;
9175
9176         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
9177                                     bits_nr, &reada_bits);
9178         if (nritems == 0)
9179                 return 1;
9180
9181         if (!reada_bits) {
9182                 for(i = 0; i < nritems; i++) {
9183                         ret = add_cache_extent(reada, bits[i].start,
9184                                                bits[i].size);
9185                         if (ret == -EEXIST)
9186                                 continue;
9187
9188                         /* fixme, get the parent transid */
9189                         readahead_tree_block(fs_info, bits[i].start, 0);
9190                 }
9191         }
9192         *last = bits[0].start;
9193         bytenr = bits[0].start;
9194         size = bits[0].size;
9195
9196         cache = lookup_cache_extent(pending, bytenr, size);
9197         if (cache) {
9198                 remove_cache_extent(pending, cache);
9199                 free(cache);
9200         }
9201         cache = lookup_cache_extent(reada, bytenr, size);
9202         if (cache) {
9203                 remove_cache_extent(reada, cache);
9204                 free(cache);
9205         }
9206         cache = lookup_cache_extent(nodes, bytenr, size);
9207         if (cache) {
9208                 remove_cache_extent(nodes, cache);
9209                 free(cache);
9210         }
9211         cache = lookup_cache_extent(extent_cache, bytenr, size);
9212         if (cache) {
9213                 rec = container_of(cache, struct extent_record, cache);
9214                 gen = rec->parent_generation;
9215         }
9216
9217         /* fixme, get the real parent transid */
9218         buf = read_tree_block(root->fs_info, bytenr, gen);
9219         if (!extent_buffer_uptodate(buf)) {
9220                 record_bad_block_io(root->fs_info,
9221                                     extent_cache, bytenr, size);
9222                 goto out;
9223         }
9224
9225         nritems = btrfs_header_nritems(buf);
9226
9227         flags = 0;
9228         if (!init_extent_tree) {
9229                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
9230                                        btrfs_header_level(buf), 1, NULL,
9231                                        &flags);
9232                 if (ret < 0) {
9233                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9234                         if (ret < 0) {
9235                                 fprintf(stderr, "Couldn't calc extent flags\n");
9236                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9237                         }
9238                 }
9239         } else {
9240                 flags = 0;
9241                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9242                 if (ret < 0) {
9243                         fprintf(stderr, "Couldn't calc extent flags\n");
9244                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9245                 }
9246         }
9247
9248         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9249                 if (ri != NULL &&
9250                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
9251                     ri->objectid == btrfs_header_owner(buf)) {
9252                         /*
9253                          * Ok we got to this block from it's original owner and
9254                          * we have FULL_BACKREF set.  Relocation can leave
9255                          * converted blocks over so this is altogether possible,
9256                          * however it's not possible if the generation > the
9257                          * last snapshot, so check for this case.
9258                          */
9259                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
9260                             btrfs_header_generation(buf) > ri->last_snapshot) {
9261                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9262                                 rec->bad_full_backref = 1;
9263                         }
9264                 }
9265         } else {
9266                 if (ri != NULL &&
9267                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
9268                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
9269                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9270                         rec->bad_full_backref = 1;
9271                 }
9272         }
9273
9274         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9275                 rec->flag_block_full_backref = 1;
9276                 parent = bytenr;
9277                 owner = 0;
9278         } else {
9279                 rec->flag_block_full_backref = 0;
9280                 parent = 0;
9281                 owner = btrfs_header_owner(buf);
9282         }
9283
9284         ret = check_block(root, extent_cache, buf, flags);
9285         if (ret)
9286                 goto out;
9287
9288         if (btrfs_is_leaf(buf)) {
9289                 btree_space_waste += btrfs_leaf_free_space(root, buf);
9290                 for (i = 0; i < nritems; i++) {
9291                         struct btrfs_file_extent_item *fi;
9292                         btrfs_item_key_to_cpu(buf, &key, i);
9293                         /*
9294                          * Check key type against the leaf owner.
9295                          * Could filter quite a lot of early error if
9296                          * owner is correct
9297                          */
9298                         if (check_type_with_root(btrfs_header_owner(buf),
9299                                                  key.type)) {
9300                                 fprintf(stderr, "ignoring invalid key\n");
9301                                 continue;
9302                         }
9303                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
9304                                 process_extent_item(root, extent_cache, buf,
9305                                                     i);
9306                                 continue;
9307                         }
9308                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9309                                 process_extent_item(root, extent_cache, buf,
9310                                                     i);
9311                                 continue;
9312                         }
9313                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
9314                                 total_csum_bytes +=
9315                                         btrfs_item_size_nr(buf, i);
9316                                 continue;
9317                         }
9318                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
9319                                 process_chunk_item(chunk_cache, &key, buf, i);
9320                                 continue;
9321                         }
9322                         if (key.type == BTRFS_DEV_ITEM_KEY) {
9323                                 process_device_item(dev_cache, &key, buf, i);
9324                                 continue;
9325                         }
9326                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9327                                 process_block_group_item(block_group_cache,
9328                                         &key, buf, i);
9329                                 continue;
9330                         }
9331                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
9332                                 process_device_extent_item(dev_extent_cache,
9333                                         &key, buf, i);
9334                                 continue;
9335
9336                         }
9337                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9338 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9339                                 process_extent_ref_v0(extent_cache, buf, i);
9340 #else
9341                                 BUG();
9342 #endif
9343                                 continue;
9344                         }
9345
9346                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9347                                 ret = add_tree_backref(extent_cache,
9348                                                 key.objectid, 0, key.offset, 0);
9349                                 if (ret < 0)
9350                                         error(
9351                                 "add_tree_backref failed (leaf tree block): %s",
9352                                               strerror(-ret));
9353                                 continue;
9354                         }
9355                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9356                                 ret = add_tree_backref(extent_cache,
9357                                                 key.objectid, key.offset, 0, 0);
9358                                 if (ret < 0)
9359                                         error(
9360                                 "add_tree_backref failed (leaf shared block): %s",
9361                                               strerror(-ret));
9362                                 continue;
9363                         }
9364                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9365                                 struct btrfs_extent_data_ref *ref;
9366                                 ref = btrfs_item_ptr(buf, i,
9367                                                 struct btrfs_extent_data_ref);
9368                                 add_data_backref(extent_cache,
9369                                         key.objectid, 0,
9370                                         btrfs_extent_data_ref_root(buf, ref),
9371                                         btrfs_extent_data_ref_objectid(buf,
9372                                                                        ref),
9373                                         btrfs_extent_data_ref_offset(buf, ref),
9374                                         btrfs_extent_data_ref_count(buf, ref),
9375                                         0, root->fs_info->sectorsize);
9376                                 continue;
9377                         }
9378                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9379                                 struct btrfs_shared_data_ref *ref;
9380                                 ref = btrfs_item_ptr(buf, i,
9381                                                 struct btrfs_shared_data_ref);
9382                                 add_data_backref(extent_cache,
9383                                         key.objectid, key.offset, 0, 0, 0,
9384                                         btrfs_shared_data_ref_count(buf, ref),
9385                                         0, root->fs_info->sectorsize);
9386                                 continue;
9387                         }
9388                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9389                                 struct bad_item *bad;
9390
9391                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9392                                         continue;
9393                                 if (!owner)
9394                                         continue;
9395                                 bad = malloc(sizeof(struct bad_item));
9396                                 if (!bad)
9397                                         continue;
9398                                 INIT_LIST_HEAD(&bad->list);
9399                                 memcpy(&bad->key, &key,
9400                                        sizeof(struct btrfs_key));
9401                                 bad->root_id = owner;
9402                                 list_add_tail(&bad->list, &delete_items);
9403                                 continue;
9404                         }
9405                         if (key.type != BTRFS_EXTENT_DATA_KEY)
9406                                 continue;
9407                         fi = btrfs_item_ptr(buf, i,
9408                                             struct btrfs_file_extent_item);
9409                         if (btrfs_file_extent_type(buf, fi) ==
9410                             BTRFS_FILE_EXTENT_INLINE)
9411                                 continue;
9412                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9413                                 continue;
9414
9415                         data_bytes_allocated +=
9416                                 btrfs_file_extent_disk_num_bytes(buf, fi);
9417                         if (data_bytes_allocated < root->fs_info->sectorsize) {
9418                                 abort();
9419                         }
9420                         data_bytes_referenced +=
9421                                 btrfs_file_extent_num_bytes(buf, fi);
9422                         add_data_backref(extent_cache,
9423                                 btrfs_file_extent_disk_bytenr(buf, fi),
9424                                 parent, owner, key.objectid, key.offset -
9425                                 btrfs_file_extent_offset(buf, fi), 1, 1,
9426                                 btrfs_file_extent_disk_num_bytes(buf, fi));
9427                 }
9428         } else {
9429                 int level;
9430                 struct btrfs_key first_key;
9431
9432                 first_key.objectid = 0;
9433
9434                 if (nritems > 0)
9435                         btrfs_item_key_to_cpu(buf, &first_key, 0);
9436                 level = btrfs_header_level(buf);
9437                 for (i = 0; i < nritems; i++) {
9438                         struct extent_record tmpl;
9439
9440                         ptr = btrfs_node_blockptr(buf, i);
9441                         size = root->fs_info->nodesize;
9442                         btrfs_node_key_to_cpu(buf, &key, i);
9443                         if (ri != NULL) {
9444                                 if ((level == ri->drop_level)
9445                                     && is_dropped_key(&key, &ri->drop_key)) {
9446                                         continue;
9447                                 }
9448                         }
9449
9450                         memset(&tmpl, 0, sizeof(tmpl));
9451                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9452                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9453                         tmpl.start = ptr;
9454                         tmpl.nr = size;
9455                         tmpl.refs = 1;
9456                         tmpl.metadata = 1;
9457                         tmpl.max_size = size;
9458                         ret = add_extent_rec(extent_cache, &tmpl);
9459                         if (ret < 0)
9460                                 goto out;
9461
9462                         ret = add_tree_backref(extent_cache, ptr, parent,
9463                                         owner, 1);
9464                         if (ret < 0) {
9465                                 error(
9466                                 "add_tree_backref failed (non-leaf block): %s",
9467                                       strerror(-ret));
9468                                 continue;
9469                         }
9470
9471                         if (level > 1) {
9472                                 add_pending(nodes, seen, ptr, size);
9473                         } else {
9474                                 add_pending(pending, seen, ptr, size);
9475                         }
9476                 }
9477                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
9478                                       nritems) * sizeof(struct btrfs_key_ptr);
9479         }
9480         total_btree_bytes += buf->len;
9481         if (fs_root_objectid(btrfs_header_owner(buf)))
9482                 total_fs_tree_bytes += buf->len;
9483         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9484                 total_extent_tree_bytes += buf->len;
9485 out:
9486         free_extent_buffer(buf);
9487         return ret;
9488 }
9489
9490 static int add_root_to_pending(struct extent_buffer *buf,
9491                                struct cache_tree *extent_cache,
9492                                struct cache_tree *pending,
9493                                struct cache_tree *seen,
9494                                struct cache_tree *nodes,
9495                                u64 objectid)
9496 {
9497         struct extent_record tmpl;
9498         int ret;
9499
9500         if (btrfs_header_level(buf) > 0)
9501                 add_pending(nodes, seen, buf->start, buf->len);
9502         else
9503                 add_pending(pending, seen, buf->start, buf->len);
9504
9505         memset(&tmpl, 0, sizeof(tmpl));
9506         tmpl.start = buf->start;
9507         tmpl.nr = buf->len;
9508         tmpl.is_root = 1;
9509         tmpl.refs = 1;
9510         tmpl.metadata = 1;
9511         tmpl.max_size = buf->len;
9512         add_extent_rec(extent_cache, &tmpl);
9513
9514         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9515             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9516                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9517                                 0, 1);
9518         else
9519                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9520                                 1);
9521         return ret;
9522 }
9523
9524 /* as we fix the tree, we might be deleting blocks that
9525  * we're tracking for repair.  This hook makes sure we
9526  * remove any backrefs for blocks as we are fixing them.
9527  */
9528 static int free_extent_hook(struct btrfs_trans_handle *trans,
9529                             struct btrfs_root *root,
9530                             u64 bytenr, u64 num_bytes, u64 parent,
9531                             u64 root_objectid, u64 owner, u64 offset,
9532                             int refs_to_drop)
9533 {
9534         struct extent_record *rec;
9535         struct cache_extent *cache;
9536         int is_data;
9537         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9538
9539         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9540         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9541         if (!cache)
9542                 return 0;
9543
9544         rec = container_of(cache, struct extent_record, cache);
9545         if (is_data) {
9546                 struct data_backref *back;
9547                 back = find_data_backref(rec, parent, root_objectid, owner,
9548                                          offset, 1, bytenr, num_bytes);
9549                 if (!back)
9550                         goto out;
9551                 if (back->node.found_ref) {
9552                         back->found_ref -= refs_to_drop;
9553                         if (rec->refs)
9554                                 rec->refs -= refs_to_drop;
9555                 }
9556                 if (back->node.found_extent_tree) {
9557                         back->num_refs -= refs_to_drop;
9558                         if (rec->extent_item_refs)
9559                                 rec->extent_item_refs -= refs_to_drop;
9560                 }
9561                 if (back->found_ref == 0)
9562                         back->node.found_ref = 0;
9563                 if (back->num_refs == 0)
9564                         back->node.found_extent_tree = 0;
9565
9566                 if (!back->node.found_extent_tree && back->node.found_ref) {
9567                         rb_erase(&back->node.node, &rec->backref_tree);
9568                         free(back);
9569                 }
9570         } else {
9571                 struct tree_backref *back;
9572                 back = find_tree_backref(rec, parent, root_objectid);
9573                 if (!back)
9574                         goto out;
9575                 if (back->node.found_ref) {
9576                         if (rec->refs)
9577                                 rec->refs--;
9578                         back->node.found_ref = 0;
9579                 }
9580                 if (back->node.found_extent_tree) {
9581                         if (rec->extent_item_refs)
9582                                 rec->extent_item_refs--;
9583                         back->node.found_extent_tree = 0;
9584                 }
9585                 if (!back->node.found_extent_tree && back->node.found_ref) {
9586                         rb_erase(&back->node.node, &rec->backref_tree);
9587                         free(back);
9588                 }
9589         }
9590         maybe_free_extent_rec(extent_cache, rec);
9591 out:
9592         return 0;
9593 }
9594
9595 static int delete_extent_records(struct btrfs_trans_handle *trans,
9596                                  struct btrfs_root *root,
9597                                  struct btrfs_path *path,
9598                                  u64 bytenr)
9599 {
9600         struct btrfs_key key;
9601         struct btrfs_key found_key;
9602         struct extent_buffer *leaf;
9603         int ret;
9604         int slot;
9605
9606
9607         key.objectid = bytenr;
9608         key.type = (u8)-1;
9609         key.offset = (u64)-1;
9610
9611         while(1) {
9612                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9613                                         &key, path, 0, 1);
9614                 if (ret < 0)
9615                         break;
9616
9617                 if (ret > 0) {
9618                         ret = 0;
9619                         if (path->slots[0] == 0)
9620                                 break;
9621                         path->slots[0]--;
9622                 }
9623                 ret = 0;
9624
9625                 leaf = path->nodes[0];
9626                 slot = path->slots[0];
9627
9628                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9629                 if (found_key.objectid != bytenr)
9630                         break;
9631
9632                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9633                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
9634                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9635                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9636                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9637                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9638                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9639                         btrfs_release_path(path);
9640                         if (found_key.type == 0) {
9641                                 if (found_key.offset == 0)
9642                                         break;
9643                                 key.offset = found_key.offset - 1;
9644                                 key.type = found_key.type;
9645                         }
9646                         key.type = found_key.type - 1;
9647                         key.offset = (u64)-1;
9648                         continue;
9649                 }
9650
9651                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9652                         found_key.objectid, found_key.type, found_key.offset);
9653
9654                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9655                 if (ret)
9656                         break;
9657                 btrfs_release_path(path);
9658
9659                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9660                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
9661                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9662                                 found_key.offset : root->fs_info->nodesize;
9663
9664                         ret = btrfs_update_block_group(trans, root, bytenr,
9665                                                        bytes, 0, 0);
9666                         if (ret)
9667                                 break;
9668                 }
9669         }
9670
9671         btrfs_release_path(path);
9672         return ret;
9673 }
9674
9675 /*
9676  * for a single backref, this will allocate a new extent
9677  * and add the backref to it.
9678  */
9679 static int record_extent(struct btrfs_trans_handle *trans,
9680                          struct btrfs_fs_info *info,
9681                          struct btrfs_path *path,
9682                          struct extent_record *rec,
9683                          struct extent_backref *back,
9684                          int allocated, u64 flags)
9685 {
9686         int ret = 0;
9687         struct btrfs_root *extent_root = info->extent_root;
9688         struct extent_buffer *leaf;
9689         struct btrfs_key ins_key;
9690         struct btrfs_extent_item *ei;
9691         struct data_backref *dback;
9692         struct btrfs_tree_block_info *bi;
9693
9694         if (!back->is_data)
9695                 rec->max_size = max_t(u64, rec->max_size,
9696                                     info->nodesize);
9697
9698         if (!allocated) {
9699                 u32 item_size = sizeof(*ei);
9700
9701                 if (!back->is_data)
9702                         item_size += sizeof(*bi);
9703
9704                 ins_key.objectid = rec->start;
9705                 ins_key.offset = rec->max_size;
9706                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9707
9708                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9709                                         &ins_key, item_size);
9710                 if (ret)
9711                         goto fail;
9712
9713                 leaf = path->nodes[0];
9714                 ei = btrfs_item_ptr(leaf, path->slots[0],
9715                                     struct btrfs_extent_item);
9716
9717                 btrfs_set_extent_refs(leaf, ei, 0);
9718                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9719
9720                 if (back->is_data) {
9721                         btrfs_set_extent_flags(leaf, ei,
9722                                                BTRFS_EXTENT_FLAG_DATA);
9723                 } else {
9724                         struct btrfs_disk_key copy_key;;
9725
9726                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9727                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9728                                              sizeof(*bi));
9729
9730                         btrfs_set_disk_key_objectid(&copy_key,
9731                                                     rec->info_objectid);
9732                         btrfs_set_disk_key_type(&copy_key, 0);
9733                         btrfs_set_disk_key_offset(&copy_key, 0);
9734
9735                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9736                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9737
9738                         btrfs_set_extent_flags(leaf, ei,
9739                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9740                 }
9741
9742                 btrfs_mark_buffer_dirty(leaf);
9743                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9744                                                rec->max_size, 1, 0);
9745                 if (ret)
9746                         goto fail;
9747                 btrfs_release_path(path);
9748         }
9749
9750         if (back->is_data) {
9751                 u64 parent;
9752                 int i;
9753
9754                 dback = to_data_backref(back);
9755                 if (back->full_backref)
9756                         parent = dback->parent;
9757                 else
9758                         parent = 0;
9759
9760                 for (i = 0; i < dback->found_ref; i++) {
9761                         /* if parent != 0, we're doing a full backref
9762                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9763                          * just makes the backref allocator create a data
9764                          * backref
9765                          */
9766                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9767                                                    rec->start, rec->max_size,
9768                                                    parent,
9769                                                    dback->root,
9770                                                    parent ?
9771                                                    BTRFS_FIRST_FREE_OBJECTID :
9772                                                    dback->owner,
9773                                                    dback->offset);
9774                         if (ret)
9775                                 break;
9776                 }
9777                 fprintf(stderr, "adding new data backref"
9778                                 " on %llu %s %llu owner %llu"
9779                                 " offset %llu found %d\n",
9780                                 (unsigned long long)rec->start,
9781                                 back->full_backref ?
9782                                 "parent" : "root",
9783                                 back->full_backref ?
9784                                 (unsigned long long)parent :
9785                                 (unsigned long long)dback->root,
9786                                 (unsigned long long)dback->owner,
9787                                 (unsigned long long)dback->offset,
9788                                 dback->found_ref);
9789         } else {
9790                 u64 parent;
9791                 struct tree_backref *tback;
9792
9793                 tback = to_tree_backref(back);
9794                 if (back->full_backref)
9795                         parent = tback->parent;
9796                 else
9797                         parent = 0;
9798
9799                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9800                                            rec->start, rec->max_size,
9801                                            parent, tback->root, 0, 0);
9802                 fprintf(stderr, "adding new tree backref on "
9803                         "start %llu len %llu parent %llu root %llu\n",
9804                         rec->start, rec->max_size, parent, tback->root);
9805         }
9806 fail:
9807         btrfs_release_path(path);
9808         return ret;
9809 }
9810
9811 static struct extent_entry *find_entry(struct list_head *entries,
9812                                        u64 bytenr, u64 bytes)
9813 {
9814         struct extent_entry *entry = NULL;
9815
9816         list_for_each_entry(entry, entries, list) {
9817                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9818                         return entry;
9819         }
9820
9821         return NULL;
9822 }
9823
9824 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9825 {
9826         struct extent_entry *entry, *best = NULL, *prev = NULL;
9827
9828         list_for_each_entry(entry, entries, list) {
9829                 /*
9830                  * If there are as many broken entries as entries then we know
9831                  * not to trust this particular entry.
9832                  */
9833                 if (entry->broken == entry->count)
9834                         continue;
9835
9836                 /*
9837                  * Special case, when there are only two entries and 'best' is
9838                  * the first one
9839                  */
9840                 if (!prev) {
9841                         best = entry;
9842                         prev = entry;
9843                         continue;
9844                 }
9845
9846                 /*
9847                  * If our current entry == best then we can't be sure our best
9848                  * is really the best, so we need to keep searching.
9849                  */
9850                 if (best && best->count == entry->count) {
9851                         prev = entry;
9852                         best = NULL;
9853                         continue;
9854                 }
9855
9856                 /* Prev == entry, not good enough, have to keep searching */
9857                 if (!prev->broken && prev->count == entry->count)
9858                         continue;
9859
9860                 if (!best)
9861                         best = (prev->count > entry->count) ? prev : entry;
9862                 else if (best->count < entry->count)
9863                         best = entry;
9864                 prev = entry;
9865         }
9866
9867         return best;
9868 }
9869
9870 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9871                       struct data_backref *dback, struct extent_entry *entry)
9872 {
9873         struct btrfs_trans_handle *trans;
9874         struct btrfs_root *root;
9875         struct btrfs_file_extent_item *fi;
9876         struct extent_buffer *leaf;
9877         struct btrfs_key key;
9878         u64 bytenr, bytes;
9879         int ret, err;
9880
9881         key.objectid = dback->root;
9882         key.type = BTRFS_ROOT_ITEM_KEY;
9883         key.offset = (u64)-1;
9884         root = btrfs_read_fs_root(info, &key);
9885         if (IS_ERR(root)) {
9886                 fprintf(stderr, "Couldn't find root for our ref\n");
9887                 return -EINVAL;
9888         }
9889
9890         /*
9891          * The backref points to the original offset of the extent if it was
9892          * split, so we need to search down to the offset we have and then walk
9893          * forward until we find the backref we're looking for.
9894          */
9895         key.objectid = dback->owner;
9896         key.type = BTRFS_EXTENT_DATA_KEY;
9897         key.offset = dback->offset;
9898         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9899         if (ret < 0) {
9900                 fprintf(stderr, "Error looking up ref %d\n", ret);
9901                 return ret;
9902         }
9903
9904         while (1) {
9905                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9906                         ret = btrfs_next_leaf(root, path);
9907                         if (ret) {
9908                                 fprintf(stderr, "Couldn't find our ref, next\n");
9909                                 return -EINVAL;
9910                         }
9911                 }
9912                 leaf = path->nodes[0];
9913                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9914                 if (key.objectid != dback->owner ||
9915                     key.type != BTRFS_EXTENT_DATA_KEY) {
9916                         fprintf(stderr, "Couldn't find our ref, search\n");
9917                         return -EINVAL;
9918                 }
9919                 fi = btrfs_item_ptr(leaf, path->slots[0],
9920                                     struct btrfs_file_extent_item);
9921                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9922                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9923
9924                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9925                         break;
9926                 path->slots[0]++;
9927         }
9928
9929         btrfs_release_path(path);
9930
9931         trans = btrfs_start_transaction(root, 1);
9932         if (IS_ERR(trans))
9933                 return PTR_ERR(trans);
9934
9935         /*
9936          * Ok we have the key of the file extent we want to fix, now we can cow
9937          * down to the thing and fix it.
9938          */
9939         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9940         if (ret < 0) {
9941                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9942                         key.objectid, key.type, key.offset, ret);
9943                 goto out;
9944         }
9945         if (ret > 0) {
9946                 fprintf(stderr, "Well that's odd, we just found this key "
9947                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9948                         key.offset);
9949                 ret = -EINVAL;
9950                 goto out;
9951         }
9952         leaf = path->nodes[0];
9953         fi = btrfs_item_ptr(leaf, path->slots[0],
9954                             struct btrfs_file_extent_item);
9955
9956         if (btrfs_file_extent_compression(leaf, fi) &&
9957             dback->disk_bytenr != entry->bytenr) {
9958                 fprintf(stderr, "Ref doesn't match the record start and is "
9959                         "compressed, please take a btrfs-image of this file "
9960                         "system and send it to a btrfs developer so they can "
9961                         "complete this functionality for bytenr %Lu\n",
9962                         dback->disk_bytenr);
9963                 ret = -EINVAL;
9964                 goto out;
9965         }
9966
9967         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9968                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9969         } else if (dback->disk_bytenr > entry->bytenr) {
9970                 u64 off_diff, offset;
9971
9972                 off_diff = dback->disk_bytenr - entry->bytenr;
9973                 offset = btrfs_file_extent_offset(leaf, fi);
9974                 if (dback->disk_bytenr + offset +
9975                     btrfs_file_extent_num_bytes(leaf, fi) >
9976                     entry->bytenr + entry->bytes) {
9977                         fprintf(stderr, "Ref is past the entry end, please "
9978                                 "take a btrfs-image of this file system and "
9979                                 "send it to a btrfs developer, ref %Lu\n",
9980                                 dback->disk_bytenr);
9981                         ret = -EINVAL;
9982                         goto out;
9983                 }
9984                 offset += off_diff;
9985                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9986                 btrfs_set_file_extent_offset(leaf, fi, offset);
9987         } else if (dback->disk_bytenr < entry->bytenr) {
9988                 u64 offset;
9989
9990                 offset = btrfs_file_extent_offset(leaf, fi);
9991                 if (dback->disk_bytenr + offset < entry->bytenr) {
9992                         fprintf(stderr, "Ref is before the entry start, please"
9993                                 " take a btrfs-image of this file system and "
9994                                 "send it to a btrfs developer, ref %Lu\n",
9995                                 dback->disk_bytenr);
9996                         ret = -EINVAL;
9997                         goto out;
9998                 }
9999
10000                 offset += dback->disk_bytenr;
10001                 offset -= entry->bytenr;
10002                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
10003                 btrfs_set_file_extent_offset(leaf, fi, offset);
10004         }
10005
10006         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
10007
10008         /*
10009          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
10010          * only do this if we aren't using compression, otherwise it's a
10011          * trickier case.
10012          */
10013         if (!btrfs_file_extent_compression(leaf, fi))
10014                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
10015         else
10016                 printf("ram bytes may be wrong?\n");
10017         btrfs_mark_buffer_dirty(leaf);
10018 out:
10019         err = btrfs_commit_transaction(trans, root);
10020         btrfs_release_path(path);
10021         return ret ? ret : err;
10022 }
10023
10024 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
10025                            struct extent_record *rec)
10026 {
10027         struct extent_backref *back, *tmp;
10028         struct data_backref *dback;
10029         struct extent_entry *entry, *best = NULL;
10030         LIST_HEAD(entries);
10031         int nr_entries = 0;
10032         int broken_entries = 0;
10033         int ret = 0;
10034         short mismatch = 0;
10035
10036         /*
10037          * Metadata is easy and the backrefs should always agree on bytenr and
10038          * size, if not we've got bigger issues.
10039          */
10040         if (rec->metadata)
10041                 return 0;
10042
10043         rbtree_postorder_for_each_entry_safe(back, tmp,
10044                                              &rec->backref_tree, node) {
10045                 if (back->full_backref || !back->is_data)
10046                         continue;
10047
10048                 dback = to_data_backref(back);
10049
10050                 /*
10051                  * We only pay attention to backrefs that we found a real
10052                  * backref for.
10053                  */
10054                 if (dback->found_ref == 0)
10055                         continue;
10056
10057                 /*
10058                  * For now we only catch when the bytes don't match, not the
10059                  * bytenr.  We can easily do this at the same time, but I want
10060                  * to have a fs image to test on before we just add repair
10061                  * functionality willy-nilly so we know we won't screw up the
10062                  * repair.
10063                  */
10064
10065                 entry = find_entry(&entries, dback->disk_bytenr,
10066                                    dback->bytes);
10067                 if (!entry) {
10068                         entry = malloc(sizeof(struct extent_entry));
10069                         if (!entry) {
10070                                 ret = -ENOMEM;
10071                                 goto out;
10072                         }
10073                         memset(entry, 0, sizeof(*entry));
10074                         entry->bytenr = dback->disk_bytenr;
10075                         entry->bytes = dback->bytes;
10076                         list_add_tail(&entry->list, &entries);
10077                         nr_entries++;
10078                 }
10079
10080                 /*
10081                  * If we only have on entry we may think the entries agree when
10082                  * in reality they don't so we have to do some extra checking.
10083                  */
10084                 if (dback->disk_bytenr != rec->start ||
10085                     dback->bytes != rec->nr || back->broken)
10086                         mismatch = 1;
10087
10088                 if (back->broken) {
10089                         entry->broken++;
10090                         broken_entries++;
10091                 }
10092
10093                 entry->count++;
10094         }
10095
10096         /* Yay all the backrefs agree, carry on good sir */
10097         if (nr_entries <= 1 && !mismatch)
10098                 goto out;
10099
10100         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
10101                 "%Lu\n", rec->start);
10102
10103         /*
10104          * First we want to see if the backrefs can agree amongst themselves who
10105          * is right, so figure out which one of the entries has the highest
10106          * count.
10107          */
10108         best = find_most_right_entry(&entries);
10109
10110         /*
10111          * Ok so we may have an even split between what the backrefs think, so
10112          * this is where we use the extent ref to see what it thinks.
10113          */
10114         if (!best) {
10115                 entry = find_entry(&entries, rec->start, rec->nr);
10116                 if (!entry && (!broken_entries || !rec->found_rec)) {
10117                         fprintf(stderr, "Backrefs don't agree with each other "
10118                                 "and extent record doesn't agree with anybody,"
10119                                 " so we can't fix bytenr %Lu bytes %Lu\n",
10120                                 rec->start, rec->nr);
10121                         ret = -EINVAL;
10122                         goto out;
10123                 } else if (!entry) {
10124                         /*
10125                          * Ok our backrefs were broken, we'll assume this is the
10126                          * correct value and add an entry for this range.
10127                          */
10128                         entry = malloc(sizeof(struct extent_entry));
10129                         if (!entry) {
10130                                 ret = -ENOMEM;
10131                                 goto out;
10132                         }
10133                         memset(entry, 0, sizeof(*entry));
10134                         entry->bytenr = rec->start;
10135                         entry->bytes = rec->nr;
10136                         list_add_tail(&entry->list, &entries);
10137                         nr_entries++;
10138                 }
10139                 entry->count++;
10140                 best = find_most_right_entry(&entries);
10141                 if (!best) {
10142                         fprintf(stderr, "Backrefs and extent record evenly "
10143                                 "split on who is right, this is going to "
10144                                 "require user input to fix bytenr %Lu bytes "
10145                                 "%Lu\n", rec->start, rec->nr);
10146                         ret = -EINVAL;
10147                         goto out;
10148                 }
10149         }
10150
10151         /*
10152          * I don't think this can happen currently as we'll abort() if we catch
10153          * this case higher up, but in case somebody removes that we still can't
10154          * deal with it properly here yet, so just bail out of that's the case.
10155          */
10156         if (best->bytenr != rec->start) {
10157                 fprintf(stderr, "Extent start and backref starts don't match, "
10158                         "please use btrfs-image on this file system and send "
10159                         "it to a btrfs developer so they can make fsck fix "
10160                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
10161                         rec->start, rec->nr);
10162                 ret = -EINVAL;
10163                 goto out;
10164         }
10165
10166         /*
10167          * Ok great we all agreed on an extent record, let's go find the real
10168          * references and fix up the ones that don't match.
10169          */
10170         rbtree_postorder_for_each_entry_safe(back, tmp,
10171                                              &rec->backref_tree, node) {
10172                 if (back->full_backref || !back->is_data)
10173                         continue;
10174
10175                 dback = to_data_backref(back);
10176
10177                 /*
10178                  * Still ignoring backrefs that don't have a real ref attached
10179                  * to them.
10180                  */
10181                 if (dback->found_ref == 0)
10182                         continue;
10183
10184                 if (dback->bytes == best->bytes &&
10185                     dback->disk_bytenr == best->bytenr)
10186                         continue;
10187
10188                 ret = repair_ref(info, path, dback, best);
10189                 if (ret)
10190                         goto out;
10191         }
10192
10193         /*
10194          * Ok we messed with the actual refs, which means we need to drop our
10195          * entire cache and go back and rescan.  I know this is a huge pain and
10196          * adds a lot of extra work, but it's the only way to be safe.  Once all
10197          * the backrefs agree we may not need to do anything to the extent
10198          * record itself.
10199          */
10200         ret = -EAGAIN;
10201 out:
10202         while (!list_empty(&entries)) {
10203                 entry = list_entry(entries.next, struct extent_entry, list);
10204                 list_del_init(&entry->list);
10205                 free(entry);
10206         }
10207         return ret;
10208 }
10209
10210 static int process_duplicates(struct cache_tree *extent_cache,
10211                               struct extent_record *rec)
10212 {
10213         struct extent_record *good, *tmp;
10214         struct cache_extent *cache;
10215         int ret;
10216
10217         /*
10218          * If we found a extent record for this extent then return, or if we
10219          * have more than one duplicate we are likely going to need to delete
10220          * something.
10221          */
10222         if (rec->found_rec || rec->num_duplicates > 1)
10223                 return 0;
10224
10225         /* Shouldn't happen but just in case */
10226         BUG_ON(!rec->num_duplicates);
10227
10228         /*
10229          * So this happens if we end up with a backref that doesn't match the
10230          * actual extent entry.  So either the backref is bad or the extent
10231          * entry is bad.  Either way we want to have the extent_record actually
10232          * reflect what we found in the extent_tree, so we need to take the
10233          * duplicate out and use that as the extent_record since the only way we
10234          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
10235          */
10236         remove_cache_extent(extent_cache, &rec->cache);
10237
10238         good = to_extent_record(rec->dups.next);
10239         list_del_init(&good->list);
10240         INIT_LIST_HEAD(&good->backrefs);
10241         INIT_LIST_HEAD(&good->dups);
10242         good->cache.start = good->start;
10243         good->cache.size = good->nr;
10244         good->content_checked = 0;
10245         good->owner_ref_checked = 0;
10246         good->num_duplicates = 0;
10247         good->refs = rec->refs;
10248         list_splice_init(&rec->backrefs, &good->backrefs);
10249         while (1) {
10250                 cache = lookup_cache_extent(extent_cache, good->start,
10251                                             good->nr);
10252                 if (!cache)
10253                         break;
10254                 tmp = container_of(cache, struct extent_record, cache);
10255
10256                 /*
10257                  * If we find another overlapping extent and it's found_rec is
10258                  * set then it's a duplicate and we need to try and delete
10259                  * something.
10260                  */
10261                 if (tmp->found_rec || tmp->num_duplicates > 0) {
10262                         if (list_empty(&good->list))
10263                                 list_add_tail(&good->list,
10264                                               &duplicate_extents);
10265                         good->num_duplicates += tmp->num_duplicates + 1;
10266                         list_splice_init(&tmp->dups, &good->dups);
10267                         list_del_init(&tmp->list);
10268                         list_add_tail(&tmp->list, &good->dups);
10269                         remove_cache_extent(extent_cache, &tmp->cache);
10270                         continue;
10271                 }
10272
10273                 /*
10274                  * Ok we have another non extent item backed extent rec, so lets
10275                  * just add it to this extent and carry on like we did above.
10276                  */
10277                 good->refs += tmp->refs;
10278                 list_splice_init(&tmp->backrefs, &good->backrefs);
10279                 remove_cache_extent(extent_cache, &tmp->cache);
10280                 free(tmp);
10281         }
10282         ret = insert_cache_extent(extent_cache, &good->cache);
10283         BUG_ON(ret);
10284         free(rec);
10285         return good->num_duplicates ? 0 : 1;
10286 }
10287
10288 static int delete_duplicate_records(struct btrfs_root *root,
10289                                     struct extent_record *rec)
10290 {
10291         struct btrfs_trans_handle *trans;
10292         LIST_HEAD(delete_list);
10293         struct btrfs_path path;
10294         struct extent_record *tmp, *good, *n;
10295         int nr_del = 0;
10296         int ret = 0, err;
10297         struct btrfs_key key;
10298
10299         btrfs_init_path(&path);
10300
10301         good = rec;
10302         /* Find the record that covers all of the duplicates. */
10303         list_for_each_entry(tmp, &rec->dups, list) {
10304                 if (good->start < tmp->start)
10305                         continue;
10306                 if (good->nr > tmp->nr)
10307                         continue;
10308
10309                 if (tmp->start + tmp->nr < good->start + good->nr) {
10310                         fprintf(stderr, "Ok we have overlapping extents that "
10311                                 "aren't completely covered by each other, this "
10312                                 "is going to require more careful thought.  "
10313                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
10314                                 tmp->start, tmp->nr, good->start, good->nr);
10315                         abort();
10316                 }
10317                 good = tmp;
10318         }
10319
10320         if (good != rec)
10321                 list_add_tail(&rec->list, &delete_list);
10322
10323         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10324                 if (tmp == good)
10325                         continue;
10326                 list_move_tail(&tmp->list, &delete_list);
10327         }
10328
10329         root = root->fs_info->extent_root;
10330         trans = btrfs_start_transaction(root, 1);
10331         if (IS_ERR(trans)) {
10332                 ret = PTR_ERR(trans);
10333                 goto out;
10334         }
10335
10336         list_for_each_entry(tmp, &delete_list, list) {
10337                 if (tmp->found_rec == 0)
10338                         continue;
10339                 key.objectid = tmp->start;
10340                 key.type = BTRFS_EXTENT_ITEM_KEY;
10341                 key.offset = tmp->nr;
10342
10343                 /* Shouldn't happen but just in case */
10344                 if (tmp->metadata) {
10345                         fprintf(stderr, "Well this shouldn't happen, extent "
10346                                 "record overlaps but is metadata? "
10347                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10348                         abort();
10349                 }
10350
10351                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10352                 if (ret) {
10353                         if (ret > 0)
10354                                 ret = -EINVAL;
10355                         break;
10356                 }
10357                 ret = btrfs_del_item(trans, root, &path);
10358                 if (ret)
10359                         break;
10360                 btrfs_release_path(&path);
10361                 nr_del++;
10362         }
10363         err = btrfs_commit_transaction(trans, root);
10364         if (err && !ret)
10365                 ret = err;
10366 out:
10367         while (!list_empty(&delete_list)) {
10368                 tmp = to_extent_record(delete_list.next);
10369                 list_del_init(&tmp->list);
10370                 if (tmp == rec)
10371                         continue;
10372                 free(tmp);
10373         }
10374
10375         while (!list_empty(&rec->dups)) {
10376                 tmp = to_extent_record(rec->dups.next);
10377                 list_del_init(&tmp->list);
10378                 free(tmp);
10379         }
10380
10381         btrfs_release_path(&path);
10382
10383         if (!ret && !nr_del)
10384                 rec->num_duplicates = 0;
10385
10386         return ret ? ret : nr_del;
10387 }
10388
10389 static int find_possible_backrefs(struct btrfs_fs_info *info,
10390                                   struct btrfs_path *path,
10391                                   struct cache_tree *extent_cache,
10392                                   struct extent_record *rec)
10393 {
10394         struct btrfs_root *root;
10395         struct extent_backref *back, *tmp;
10396         struct data_backref *dback;
10397         struct cache_extent *cache;
10398         struct btrfs_file_extent_item *fi;
10399         struct btrfs_key key;
10400         u64 bytenr, bytes;
10401         int ret;
10402
10403         rbtree_postorder_for_each_entry_safe(back, tmp,
10404                                              &rec->backref_tree, node) {
10405                 /* Don't care about full backrefs (poor unloved backrefs) */
10406                 if (back->full_backref || !back->is_data)
10407                         continue;
10408
10409                 dback = to_data_backref(back);
10410
10411                 /* We found this one, we don't need to do a lookup */
10412                 if (dback->found_ref)
10413                         continue;
10414
10415                 key.objectid = dback->root;
10416                 key.type = BTRFS_ROOT_ITEM_KEY;
10417                 key.offset = (u64)-1;
10418
10419                 root = btrfs_read_fs_root(info, &key);
10420
10421                 /* No root, definitely a bad ref, skip */
10422                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10423                         continue;
10424                 /* Other err, exit */
10425                 if (IS_ERR(root))
10426                         return PTR_ERR(root);
10427
10428                 key.objectid = dback->owner;
10429                 key.type = BTRFS_EXTENT_DATA_KEY;
10430                 key.offset = dback->offset;
10431                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10432                 if (ret) {
10433                         btrfs_release_path(path);
10434                         if (ret < 0)
10435                                 return ret;
10436                         /* Didn't find it, we can carry on */
10437                         ret = 0;
10438                         continue;
10439                 }
10440
10441                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10442                                     struct btrfs_file_extent_item);
10443                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10444                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10445                 btrfs_release_path(path);
10446                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10447                 if (cache) {
10448                         struct extent_record *tmp;
10449                         tmp = container_of(cache, struct extent_record, cache);
10450
10451                         /*
10452                          * If we found an extent record for the bytenr for this
10453                          * particular backref then we can't add it to our
10454                          * current extent record.  We only want to add backrefs
10455                          * that don't have a corresponding extent item in the
10456                          * extent tree since they likely belong to this record
10457                          * and we need to fix it if it doesn't match bytenrs.
10458                          */
10459                         if  (tmp->found_rec)
10460                                 continue;
10461                 }
10462
10463                 dback->found_ref += 1;
10464                 dback->disk_bytenr = bytenr;
10465                 dback->bytes = bytes;
10466
10467                 /*
10468                  * Set this so the verify backref code knows not to trust the
10469                  * values in this backref.
10470                  */
10471                 back->broken = 1;
10472         }
10473
10474         return 0;
10475 }
10476
10477 /*
10478  * Record orphan data ref into corresponding root.
10479  *
10480  * Return 0 if the extent item contains data ref and recorded.
10481  * Return 1 if the extent item contains no useful data ref
10482  *   On that case, it may contains only shared_dataref or metadata backref
10483  *   or the file extent exists(this should be handled by the extent bytenr
10484  *   recovery routine)
10485  * Return <0 if something goes wrong.
10486  */
10487 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10488                                       struct extent_record *rec)
10489 {
10490         struct btrfs_key key;
10491         struct btrfs_root *dest_root;
10492         struct extent_backref *back, *tmp;
10493         struct data_backref *dback;
10494         struct orphan_data_extent *orphan;
10495         struct btrfs_path path;
10496         int recorded_data_ref = 0;
10497         int ret = 0;
10498
10499         if (rec->metadata)
10500                 return 1;
10501         btrfs_init_path(&path);
10502         rbtree_postorder_for_each_entry_safe(back, tmp,
10503                                              &rec->backref_tree, node) {
10504                 if (back->full_backref || !back->is_data ||
10505                     !back->found_extent_tree)
10506                         continue;
10507                 dback = to_data_backref(back);
10508                 if (dback->found_ref)
10509                         continue;
10510                 key.objectid = dback->root;
10511                 key.type = BTRFS_ROOT_ITEM_KEY;
10512                 key.offset = (u64)-1;
10513
10514                 dest_root = btrfs_read_fs_root(fs_info, &key);
10515
10516                 /* For non-exist root we just skip it */
10517                 if (IS_ERR(dest_root) || !dest_root)
10518                         continue;
10519
10520                 key.objectid = dback->owner;
10521                 key.type = BTRFS_EXTENT_DATA_KEY;
10522                 key.offset = dback->offset;
10523
10524                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10525                 btrfs_release_path(&path);
10526                 /*
10527                  * For ret < 0, it's OK since the fs-tree may be corrupted,
10528                  * we need to record it for inode/file extent rebuild.
10529                  * For ret > 0, we record it only for file extent rebuild.
10530                  * For ret == 0, the file extent exists but only bytenr
10531                  * mismatch, let the original bytenr fix routine to handle,
10532                  * don't record it.
10533                  */
10534                 if (ret == 0)
10535                         continue;
10536                 ret = 0;
10537                 orphan = malloc(sizeof(*orphan));
10538                 if (!orphan) {
10539                         ret = -ENOMEM;
10540                         goto out;
10541                 }
10542                 INIT_LIST_HEAD(&orphan->list);
10543                 orphan->root = dback->root;
10544                 orphan->objectid = dback->owner;
10545                 orphan->offset = dback->offset;
10546                 orphan->disk_bytenr = rec->cache.start;
10547                 orphan->disk_len = rec->cache.size;
10548                 list_add(&dest_root->orphan_data_extents, &orphan->list);
10549                 recorded_data_ref = 1;
10550         }
10551 out:
10552         btrfs_release_path(&path);
10553         if (!ret)
10554                 return !recorded_data_ref;
10555         else
10556                 return ret;
10557 }
10558
10559 /*
10560  * when an incorrect extent item is found, this will delete
10561  * all of the existing entries for it and recreate them
10562  * based on what the tree scan found.
10563  */
10564 static int fixup_extent_refs(struct btrfs_fs_info *info,
10565                              struct cache_tree *extent_cache,
10566                              struct extent_record *rec)
10567 {
10568         struct btrfs_trans_handle *trans = NULL;
10569         int ret;
10570         struct btrfs_path path;
10571         struct cache_extent *cache;
10572         struct extent_backref *back, *tmp;
10573         int allocated = 0;
10574         u64 flags = 0;
10575
10576         if (rec->flag_block_full_backref)
10577                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10578
10579         btrfs_init_path(&path);
10580         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10581                 /*
10582                  * Sometimes the backrefs themselves are so broken they don't
10583                  * get attached to any meaningful rec, so first go back and
10584                  * check any of our backrefs that we couldn't find and throw
10585                  * them into the list if we find the backref so that
10586                  * verify_backrefs can figure out what to do.
10587                  */
10588                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10589                 if (ret < 0)
10590                         goto out;
10591         }
10592
10593         /* step one, make sure all of the backrefs agree */
10594         ret = verify_backrefs(info, &path, rec);
10595         if (ret < 0)
10596                 goto out;
10597
10598         trans = btrfs_start_transaction(info->extent_root, 1);
10599         if (IS_ERR(trans)) {
10600                 ret = PTR_ERR(trans);
10601                 goto out;
10602         }
10603
10604         /* step two, delete all the existing records */
10605         ret = delete_extent_records(trans, info->extent_root, &path,
10606                                     rec->start);
10607
10608         if (ret < 0)
10609                 goto out;
10610
10611         /* was this block corrupt?  If so, don't add references to it */
10612         cache = lookup_cache_extent(info->corrupt_blocks,
10613                                     rec->start, rec->max_size);
10614         if (cache) {
10615                 ret = 0;
10616                 goto out;
10617         }
10618
10619         /* step three, recreate all the refs we did find */
10620         rbtree_postorder_for_each_entry_safe(back, tmp,
10621                                              &rec->backref_tree, node) {
10622                 /*
10623                  * if we didn't find any references, don't create a
10624                  * new extent record
10625                  */
10626                 if (!back->found_ref)
10627                         continue;
10628
10629                 rec->bad_full_backref = 0;
10630                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10631                 allocated = 1;
10632
10633                 if (ret)
10634                         goto out;
10635         }
10636 out:
10637         if (trans) {
10638                 int err = btrfs_commit_transaction(trans, info->extent_root);
10639                 if (!ret)
10640                         ret = err;
10641         }
10642
10643         if (!ret)
10644                 fprintf(stderr, "Repaired extent references for %llu\n",
10645                                 (unsigned long long)rec->start);
10646
10647         btrfs_release_path(&path);
10648         return ret;
10649 }
10650
10651 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10652                               struct extent_record *rec)
10653 {
10654         struct btrfs_trans_handle *trans;
10655         struct btrfs_root *root = fs_info->extent_root;
10656         struct btrfs_path path;
10657         struct btrfs_extent_item *ei;
10658         struct btrfs_key key;
10659         u64 flags;
10660         int ret = 0;
10661
10662         key.objectid = rec->start;
10663         if (rec->metadata) {
10664                 key.type = BTRFS_METADATA_ITEM_KEY;
10665                 key.offset = rec->info_level;
10666         } else {
10667                 key.type = BTRFS_EXTENT_ITEM_KEY;
10668                 key.offset = rec->max_size;
10669         }
10670
10671         trans = btrfs_start_transaction(root, 0);
10672         if (IS_ERR(trans))
10673                 return PTR_ERR(trans);
10674
10675         btrfs_init_path(&path);
10676         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10677         if (ret < 0) {
10678                 btrfs_release_path(&path);
10679                 btrfs_commit_transaction(trans, root);
10680                 return ret;
10681         } else if (ret) {
10682                 fprintf(stderr, "Didn't find extent for %llu\n",
10683                         (unsigned long long)rec->start);
10684                 btrfs_release_path(&path);
10685                 btrfs_commit_transaction(trans, root);
10686                 return -ENOENT;
10687         }
10688
10689         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10690                             struct btrfs_extent_item);
10691         flags = btrfs_extent_flags(path.nodes[0], ei);
10692         if (rec->flag_block_full_backref) {
10693                 fprintf(stderr, "setting full backref on %llu\n",
10694                         (unsigned long long)key.objectid);
10695                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10696         } else {
10697                 fprintf(stderr, "clearing full backref on %llu\n",
10698                         (unsigned long long)key.objectid);
10699                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10700         }
10701         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10702         btrfs_mark_buffer_dirty(path.nodes[0]);
10703         btrfs_release_path(&path);
10704         ret = btrfs_commit_transaction(trans, root);
10705         if (!ret)
10706                 fprintf(stderr, "Repaired extent flags for %llu\n",
10707                                 (unsigned long long)rec->start);
10708
10709         return ret;
10710 }
10711
10712 /* right now we only prune from the extent allocation tree */
10713 static int prune_one_block(struct btrfs_trans_handle *trans,
10714                            struct btrfs_fs_info *info,
10715                            struct btrfs_corrupt_block *corrupt)
10716 {
10717         int ret;
10718         struct btrfs_path path;
10719         struct extent_buffer *eb;
10720         u64 found;
10721         int slot;
10722         int nritems;
10723         int level = corrupt->level + 1;
10724
10725         btrfs_init_path(&path);
10726 again:
10727         /* we want to stop at the parent to our busted block */
10728         path.lowest_level = level;
10729
10730         ret = btrfs_search_slot(trans, info->extent_root,
10731                                 &corrupt->key, &path, -1, 1);
10732
10733         if (ret < 0)
10734                 goto out;
10735
10736         eb = path.nodes[level];
10737         if (!eb) {
10738                 ret = -ENOENT;
10739                 goto out;
10740         }
10741
10742         /*
10743          * hopefully the search gave us the block we want to prune,
10744          * lets try that first
10745          */
10746         slot = path.slots[level];
10747         found =  btrfs_node_blockptr(eb, slot);
10748         if (found == corrupt->cache.start)
10749                 goto del_ptr;
10750
10751         nritems = btrfs_header_nritems(eb);
10752
10753         /* the search failed, lets scan this node and hope we find it */
10754         for (slot = 0; slot < nritems; slot++) {
10755                 found =  btrfs_node_blockptr(eb, slot);
10756                 if (found == corrupt->cache.start)
10757                         goto del_ptr;
10758         }
10759         /*
10760          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10761          * to this block
10762          */
10763         if (eb == info->extent_root->node) {
10764                 ret = -ENOENT;
10765                 goto out;
10766         } else {
10767                 level++;
10768                 btrfs_release_path(&path);
10769                 goto again;
10770         }
10771
10772 del_ptr:
10773         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10774         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10775
10776 out:
10777         btrfs_release_path(&path);
10778         return ret;
10779 }
10780
10781 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10782 {
10783         struct btrfs_trans_handle *trans = NULL;
10784         struct cache_extent *cache;
10785         struct btrfs_corrupt_block *corrupt;
10786
10787         while (1) {
10788                 cache = search_cache_extent(info->corrupt_blocks, 0);
10789                 if (!cache)
10790                         break;
10791                 if (!trans) {
10792                         trans = btrfs_start_transaction(info->extent_root, 1);
10793                         if (IS_ERR(trans))
10794                                 return PTR_ERR(trans);
10795                 }
10796                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10797                 prune_one_block(trans, info, corrupt);
10798                 remove_cache_extent(info->corrupt_blocks, cache);
10799         }
10800         if (trans)
10801                 return btrfs_commit_transaction(trans, info->extent_root);
10802         return 0;
10803 }
10804
10805 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10806 {
10807         struct btrfs_block_group_cache *cache;
10808         u64 start, end;
10809         int ret;
10810
10811         while (1) {
10812                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10813                                             &start, &end, EXTENT_DIRTY);
10814                 if (ret)
10815                         break;
10816                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10817         }
10818
10819         start = 0;
10820         while (1) {
10821                 cache = btrfs_lookup_first_block_group(fs_info, start);
10822                 if (!cache)
10823                         break;
10824                 if (cache->cached)
10825                         cache->cached = 0;
10826                 start = cache->key.objectid + cache->key.offset;
10827         }
10828 }
10829
10830 static int check_extent_refs(struct btrfs_root *root,
10831                              struct cache_tree *extent_cache)
10832 {
10833         struct extent_record *rec;
10834         struct cache_extent *cache;
10835         int ret = 0;
10836         int had_dups = 0;
10837         int err = 0;
10838
10839         if (repair) {
10840                 /*
10841                  * if we're doing a repair, we have to make sure
10842                  * we don't allocate from the problem extents.
10843                  * In the worst case, this will be all the
10844                  * extents in the FS
10845                  */
10846                 cache = search_cache_extent(extent_cache, 0);
10847                 while(cache) {
10848                         rec = container_of(cache, struct extent_record, cache);
10849                         set_extent_dirty(root->fs_info->excluded_extents,
10850                                          rec->start,
10851                                          rec->start + rec->max_size - 1);
10852                         cache = next_cache_extent(cache);
10853                 }
10854
10855                 /* pin down all the corrupted blocks too */
10856                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10857                 while(cache) {
10858                         set_extent_dirty(root->fs_info->excluded_extents,
10859                                          cache->start,
10860                                          cache->start + cache->size - 1);
10861                         cache = next_cache_extent(cache);
10862                 }
10863                 prune_corrupt_blocks(root->fs_info);
10864                 reset_cached_block_groups(root->fs_info);
10865         }
10866
10867         reset_cached_block_groups(root->fs_info);
10868
10869         /*
10870          * We need to delete any duplicate entries we find first otherwise we
10871          * could mess up the extent tree when we have backrefs that actually
10872          * belong to a different extent item and not the weird duplicate one.
10873          */
10874         while (repair && !list_empty(&duplicate_extents)) {
10875                 rec = to_extent_record(duplicate_extents.next);
10876                 list_del_init(&rec->list);
10877
10878                 /* Sometimes we can find a backref before we find an actual
10879                  * extent, so we need to process it a little bit to see if there
10880                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10881                  * if this is a backref screwup.  If we need to delete stuff
10882                  * process_duplicates() will return 0, otherwise it will return
10883                  * 1 and we
10884                  */
10885                 if (process_duplicates(extent_cache, rec))
10886                         continue;
10887                 ret = delete_duplicate_records(root, rec);
10888                 if (ret < 0)
10889                         return ret;
10890                 /*
10891                  * delete_duplicate_records will return the number of entries
10892                  * deleted, so if it's greater than 0 then we know we actually
10893                  * did something and we need to remove.
10894                  */
10895                 if (ret)
10896                         had_dups = 1;
10897         }
10898
10899         if (had_dups)
10900                 return -EAGAIN;
10901
10902         while(1) {
10903                 int cur_err = 0;
10904                 int fix = 0;
10905
10906                 cache = search_cache_extent(extent_cache, 0);
10907                 if (!cache)
10908                         break;
10909                 rec = container_of(cache, struct extent_record, cache);
10910                 if (rec->num_duplicates) {
10911                         fprintf(stderr, "extent item %llu has multiple extent "
10912                                 "items\n", (unsigned long long)rec->start);
10913                         cur_err = 1;
10914                 }
10915
10916                 if (rec->refs != rec->extent_item_refs) {
10917                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10918                                 (unsigned long long)rec->start,
10919                                 (unsigned long long)rec->nr);
10920                         fprintf(stderr, "extent item %llu, found %llu\n",
10921                                 (unsigned long long)rec->extent_item_refs,
10922                                 (unsigned long long)rec->refs);
10923                         ret = record_orphan_data_extents(root->fs_info, rec);
10924                         if (ret < 0)
10925                                 goto repair_abort;
10926                         fix = ret;
10927                         cur_err = 1;
10928                 }
10929                 if (all_backpointers_checked(rec, 1)) {
10930                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10931                                 (unsigned long long)rec->start,
10932                                 (unsigned long long)rec->nr);
10933                         fix = 1;
10934                         cur_err = 1;
10935                 }
10936                 if (!rec->owner_ref_checked) {
10937                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10938                                 (unsigned long long)rec->start,
10939                                 (unsigned long long)rec->nr);
10940                         fix = 1;
10941                         cur_err = 1;
10942                 }
10943
10944                 if (repair && fix) {
10945                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10946                         if (ret)
10947                                 goto repair_abort;
10948                 }
10949
10950
10951                 if (rec->bad_full_backref) {
10952                         fprintf(stderr, "bad full backref, on [%llu]\n",
10953                                 (unsigned long long)rec->start);
10954                         if (repair) {
10955                                 ret = fixup_extent_flags(root->fs_info, rec);
10956                                 if (ret)
10957                                         goto repair_abort;
10958                                 fix = 1;
10959                         }
10960                         cur_err = 1;
10961                 }
10962                 /*
10963                  * Although it's not a extent ref's problem, we reuse this
10964                  * routine for error reporting.
10965                  * No repair function yet.
10966                  */
10967                 if (rec->crossing_stripes) {
10968                         fprintf(stderr,
10969                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10970                                 rec->start, rec->start + rec->max_size);
10971                         cur_err = 1;
10972                 }
10973
10974                 if (rec->wrong_chunk_type) {
10975                         fprintf(stderr,
10976                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10977                                 rec->start, rec->start + rec->max_size);
10978                         cur_err = 1;
10979                 }
10980
10981                 err = cur_err;
10982                 remove_cache_extent(extent_cache, cache);
10983                 free_all_extent_backrefs(rec);
10984                 if (!init_extent_tree && repair && (!cur_err || fix))
10985                         clear_extent_dirty(root->fs_info->excluded_extents,
10986                                            rec->start,
10987                                            rec->start + rec->max_size - 1);
10988                 free(rec);
10989         }
10990 repair_abort:
10991         if (repair) {
10992                 if (ret && ret != -EAGAIN) {
10993                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10994                         exit(1);
10995                 } else if (!ret) {
10996                         struct btrfs_trans_handle *trans;
10997
10998                         root = root->fs_info->extent_root;
10999                         trans = btrfs_start_transaction(root, 1);
11000                         if (IS_ERR(trans)) {
11001                                 ret = PTR_ERR(trans);
11002                                 goto repair_abort;
11003                         }
11004
11005                         ret = btrfs_fix_block_accounting(trans, root);
11006                         if (ret)
11007                                 goto repair_abort;
11008                         ret = btrfs_commit_transaction(trans, root);
11009                         if (ret)
11010                                 goto repair_abort;
11011                 }
11012                 return ret;
11013         }
11014
11015         if (err)
11016                 err = -EIO;
11017         return err;
11018 }
11019
11020 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
11021 {
11022         u64 stripe_size;
11023
11024         if (type & BTRFS_BLOCK_GROUP_RAID0) {
11025                 stripe_size = length;
11026                 stripe_size /= num_stripes;
11027         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
11028                 stripe_size = length * 2;
11029                 stripe_size /= num_stripes;
11030         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
11031                 stripe_size = length;
11032                 stripe_size /= (num_stripes - 1);
11033         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
11034                 stripe_size = length;
11035                 stripe_size /= (num_stripes - 2);
11036         } else {
11037                 stripe_size = length;
11038         }
11039         return stripe_size;
11040 }
11041
11042 /*
11043  * Check the chunk with its block group/dev list ref:
11044  * Return 0 if all refs seems valid.
11045  * Return 1 if part of refs seems valid, need later check for rebuild ref
11046  * like missing block group and needs to search extent tree to rebuild them.
11047  * Return -1 if essential refs are missing and unable to rebuild.
11048  */
11049 static int check_chunk_refs(struct chunk_record *chunk_rec,
11050                             struct block_group_tree *block_group_cache,
11051                             struct device_extent_tree *dev_extent_cache,
11052                             int silent)
11053 {
11054         struct cache_extent *block_group_item;
11055         struct block_group_record *block_group_rec;
11056         struct cache_extent *dev_extent_item;
11057         struct device_extent_record *dev_extent_rec;
11058         u64 devid;
11059         u64 offset;
11060         u64 length;
11061         int metadump_v2 = 0;
11062         int i;
11063         int ret = 0;
11064
11065         block_group_item = lookup_cache_extent(&block_group_cache->tree,
11066                                                chunk_rec->offset,
11067                                                chunk_rec->length);
11068         if (block_group_item) {
11069                 block_group_rec = container_of(block_group_item,
11070                                                struct block_group_record,
11071                                                cache);
11072                 if (chunk_rec->length != block_group_rec->offset ||
11073                     chunk_rec->offset != block_group_rec->objectid ||
11074                     (!metadump_v2 &&
11075                      chunk_rec->type_flags != block_group_rec->flags)) {
11076                         if (!silent)
11077                                 fprintf(stderr,
11078                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
11079                                         chunk_rec->objectid,
11080                                         chunk_rec->type,
11081                                         chunk_rec->offset,
11082                                         chunk_rec->length,
11083                                         chunk_rec->offset,
11084                                         chunk_rec->type_flags,
11085                                         block_group_rec->objectid,
11086                                         block_group_rec->type,
11087                                         block_group_rec->offset,
11088                                         block_group_rec->offset,
11089                                         block_group_rec->objectid,
11090                                         block_group_rec->flags);
11091                         ret = -1;
11092                 } else {
11093                         list_del_init(&block_group_rec->list);
11094                         chunk_rec->bg_rec = block_group_rec;
11095                 }
11096         } else {
11097                 if (!silent)
11098                         fprintf(stderr,
11099                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
11100                                 chunk_rec->objectid,
11101                                 chunk_rec->type,
11102                                 chunk_rec->offset,
11103                                 chunk_rec->length,
11104                                 chunk_rec->offset,
11105                                 chunk_rec->type_flags);
11106                 ret = 1;
11107         }
11108
11109         if (metadump_v2)
11110                 return ret;
11111
11112         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
11113                                     chunk_rec->num_stripes);
11114         for (i = 0; i < chunk_rec->num_stripes; ++i) {
11115                 devid = chunk_rec->stripes[i].devid;
11116                 offset = chunk_rec->stripes[i].offset;
11117                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
11118                                                        devid, offset, length);
11119                 if (dev_extent_item) {
11120                         dev_extent_rec = container_of(dev_extent_item,
11121                                                 struct device_extent_record,
11122                                                 cache);
11123                         if (dev_extent_rec->objectid != devid ||
11124                             dev_extent_rec->offset != offset ||
11125                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
11126                             dev_extent_rec->length != length) {
11127                                 if (!silent)
11128                                         fprintf(stderr,
11129                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
11130                                                 chunk_rec->objectid,
11131                                                 chunk_rec->type,
11132                                                 chunk_rec->offset,
11133                                                 chunk_rec->stripes[i].devid,
11134                                                 chunk_rec->stripes[i].offset,
11135                                                 dev_extent_rec->objectid,
11136                                                 dev_extent_rec->offset,
11137                                                 dev_extent_rec->length);
11138                                 ret = -1;
11139                         } else {
11140                                 list_move(&dev_extent_rec->chunk_list,
11141                                           &chunk_rec->dextents);
11142                         }
11143                 } else {
11144                         if (!silent)
11145                                 fprintf(stderr,
11146                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
11147                                         chunk_rec->objectid,
11148                                         chunk_rec->type,
11149                                         chunk_rec->offset,
11150                                         chunk_rec->stripes[i].devid,
11151                                         chunk_rec->stripes[i].offset);
11152                         ret = -1;
11153                 }
11154         }
11155         return ret;
11156 }
11157
11158 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
11159 int check_chunks(struct cache_tree *chunk_cache,
11160                  struct block_group_tree *block_group_cache,
11161                  struct device_extent_tree *dev_extent_cache,
11162                  struct list_head *good, struct list_head *bad,
11163                  struct list_head *rebuild, int silent)
11164 {
11165         struct cache_extent *chunk_item;
11166         struct chunk_record *chunk_rec;
11167         struct block_group_record *bg_rec;
11168         struct device_extent_record *dext_rec;
11169         int err;
11170         int ret = 0;
11171
11172         chunk_item = first_cache_extent(chunk_cache);
11173         while (chunk_item) {
11174                 chunk_rec = container_of(chunk_item, struct chunk_record,
11175                                          cache);
11176                 err = check_chunk_refs(chunk_rec, block_group_cache,
11177                                        dev_extent_cache, silent);
11178                 if (err < 0)
11179                         ret = err;
11180                 if (err == 0 && good)
11181                         list_add_tail(&chunk_rec->list, good);
11182                 if (err > 0 && rebuild)
11183                         list_add_tail(&chunk_rec->list, rebuild);
11184                 if (err < 0 && bad)
11185                         list_add_tail(&chunk_rec->list, bad);
11186                 chunk_item = next_cache_extent(chunk_item);
11187         }
11188
11189         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
11190                 if (!silent)
11191                         fprintf(stderr,
11192                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
11193                                 bg_rec->objectid,
11194                                 bg_rec->offset,
11195                                 bg_rec->flags);
11196                 if (!ret)
11197                         ret = 1;
11198         }
11199
11200         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
11201                             chunk_list) {
11202                 if (!silent)
11203                         fprintf(stderr,
11204                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
11205                                 dext_rec->objectid,
11206                                 dext_rec->offset,
11207                                 dext_rec->length);
11208                 if (!ret)
11209                         ret = 1;
11210         }
11211         return ret;
11212 }
11213
11214
11215 static int check_device_used(struct device_record *dev_rec,
11216                              struct device_extent_tree *dext_cache)
11217 {
11218         struct cache_extent *cache;
11219         struct device_extent_record *dev_extent_rec;
11220         u64 total_byte = 0;
11221
11222         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
11223         while (cache) {
11224                 dev_extent_rec = container_of(cache,
11225                                               struct device_extent_record,
11226                                               cache);
11227                 if (dev_extent_rec->objectid != dev_rec->devid)
11228                         break;
11229
11230                 list_del_init(&dev_extent_rec->device_list);
11231                 total_byte += dev_extent_rec->length;
11232                 cache = next_cache_extent(cache);
11233         }
11234
11235         if (total_byte != dev_rec->byte_used) {
11236                 fprintf(stderr,
11237                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
11238                         total_byte, dev_rec->byte_used, dev_rec->objectid,
11239                         dev_rec->type, dev_rec->offset);
11240                 return -1;
11241         } else {
11242                 return 0;
11243         }
11244 }
11245
11246 /*
11247  * Extra (optional) check for dev_item size to report possbile problem on a new
11248  * kernel.
11249  */
11250 static void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
11251 {
11252         if (!IS_ALIGNED(total_bytes, sectorsize)) {
11253                 warning(
11254 "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
11255                         devid, total_bytes, sectorsize);
11256                 warning(
11257 "this is OK for older kernel, but may cause kernel warning for newer kernels");
11258                 warning("this can be fixed by 'btrfs rescue fix-device-size'");
11259         }
11260 }
11261
11262 /*
11263  * Unlike device size alignment check above, some super total_bytes check
11264  * failure can lead to mount failure for newer kernel.
11265  *
11266  * So this function will return the error for a fatal super total_bytes problem.
11267  */
11268 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
11269 {
11270         struct btrfs_device *dev;
11271         struct list_head *dev_list = &fs_info->fs_devices->devices;
11272         u64 total_bytes = 0;
11273         u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
11274
11275         list_for_each_entry(dev, dev_list, dev_list)
11276                 total_bytes += dev->total_bytes;
11277
11278         /* Important check, which can cause unmountable fs */
11279         if (super_bytes < total_bytes) {
11280                 error("super total bytes %llu smaller than real device(s) size %llu",
11281                         super_bytes, total_bytes);
11282                 error("mounting this fs may fail for newer kernels");
11283                 error("this can be fixed by 'btrfs rescue fix-device-size'");
11284                 return false;
11285         }
11286
11287         /*
11288          * Optional check, just to make everything aligned and match with each
11289          * other.
11290          *
11291          * For a btrfs-image restored fs, we don't need to check it anyway.
11292          */
11293         if (btrfs_super_flags(fs_info->super_copy) &
11294             (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
11295                 return true;
11296         if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
11297             !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
11298             super_bytes != total_bytes) {
11299                 warning("minor unaligned/mismatch device size detected");
11300                 warning(
11301                 "recommended to use 'btrfs rescue fix-device-size' to fix it");
11302         }
11303         return true;
11304 }
11305
11306 /* check btrfs_dev_item -> btrfs_dev_extent */
11307 static int check_devices(struct rb_root *dev_cache,
11308                          struct device_extent_tree *dev_extent_cache)
11309 {
11310         struct rb_node *dev_node;
11311         struct device_record *dev_rec;
11312         struct device_extent_record *dext_rec;
11313         int err;
11314         int ret = 0;
11315
11316         dev_node = rb_first(dev_cache);
11317         while (dev_node) {
11318                 dev_rec = container_of(dev_node, struct device_record, node);
11319                 err = check_device_used(dev_rec, dev_extent_cache);
11320                 if (err)
11321                         ret = err;
11322
11323                 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
11324                                          global_info->sectorsize);
11325                 dev_node = rb_next(dev_node);
11326         }
11327         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11328                             device_list) {
11329                 fprintf(stderr,
11330                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11331                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
11332                 if (!ret)
11333                         ret = 1;
11334         }
11335         return ret;
11336 }
11337
11338 static int add_root_item_to_list(struct list_head *head,
11339                                   u64 objectid, u64 bytenr, u64 last_snapshot,
11340                                   u8 level, u8 drop_level,
11341                                   struct btrfs_key *drop_key)
11342 {
11343
11344         struct root_item_record *ri_rec;
11345         ri_rec = malloc(sizeof(*ri_rec));
11346         if (!ri_rec)
11347                 return -ENOMEM;
11348         ri_rec->bytenr = bytenr;
11349         ri_rec->objectid = objectid;
11350         ri_rec->level = level;
11351         ri_rec->drop_level = drop_level;
11352         ri_rec->last_snapshot = last_snapshot;
11353         if (drop_key)
11354                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11355         list_add_tail(&ri_rec->list, head);
11356
11357         return 0;
11358 }
11359
11360 static void free_root_item_list(struct list_head *list)
11361 {
11362         struct root_item_record *ri_rec;
11363
11364         while (!list_empty(list)) {
11365                 ri_rec = list_first_entry(list, struct root_item_record,
11366                                           list);
11367                 list_del_init(&ri_rec->list);
11368                 free(ri_rec);
11369         }
11370 }
11371
11372 static int deal_root_from_list(struct list_head *list,
11373                                struct btrfs_root *root,
11374                                struct block_info *bits,
11375                                int bits_nr,
11376                                struct cache_tree *pending,
11377                                struct cache_tree *seen,
11378                                struct cache_tree *reada,
11379                                struct cache_tree *nodes,
11380                                struct cache_tree *extent_cache,
11381                                struct cache_tree *chunk_cache,
11382                                struct rb_root *dev_cache,
11383                                struct block_group_tree *block_group_cache,
11384                                struct device_extent_tree *dev_extent_cache)
11385 {
11386         int ret = 0;
11387         u64 last;
11388
11389         while (!list_empty(list)) {
11390                 struct root_item_record *rec;
11391                 struct extent_buffer *buf;
11392                 rec = list_entry(list->next,
11393                                  struct root_item_record, list);
11394                 last = 0;
11395                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11396                 if (!extent_buffer_uptodate(buf)) {
11397                         free_extent_buffer(buf);
11398                         ret = -EIO;
11399                         break;
11400                 }
11401                 ret = add_root_to_pending(buf, extent_cache, pending,
11402                                     seen, nodes, rec->objectid);
11403                 if (ret < 0)
11404                         break;
11405                 /*
11406                  * To rebuild extent tree, we need deal with snapshot
11407                  * one by one, otherwise we deal with node firstly which
11408                  * can maximize readahead.
11409                  */
11410                 while (1) {
11411                         ret = run_next_block(root, bits, bits_nr, &last,
11412                                              pending, seen, reada, nodes,
11413                                              extent_cache, chunk_cache,
11414                                              dev_cache, block_group_cache,
11415                                              dev_extent_cache, rec);
11416                         if (ret != 0)
11417                                 break;
11418                 }
11419                 free_extent_buffer(buf);
11420                 list_del(&rec->list);
11421                 free(rec);
11422                 if (ret < 0)
11423                         break;
11424         }
11425         while (ret >= 0) {
11426                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11427                                      reada, nodes, extent_cache, chunk_cache,
11428                                      dev_cache, block_group_cache,
11429                                      dev_extent_cache, NULL);
11430                 if (ret != 0) {
11431                         if (ret > 0)
11432                                 ret = 0;
11433                         break;
11434                 }
11435         }
11436         return ret;
11437 }
11438
11439 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11440 {
11441         struct rb_root dev_cache;
11442         struct cache_tree chunk_cache;
11443         struct block_group_tree block_group_cache;
11444         struct device_extent_tree dev_extent_cache;
11445         struct cache_tree extent_cache;
11446         struct cache_tree seen;
11447         struct cache_tree pending;
11448         struct cache_tree reada;
11449         struct cache_tree nodes;
11450         struct extent_io_tree excluded_extents;
11451         struct cache_tree corrupt_blocks;
11452         struct btrfs_path path;
11453         struct btrfs_key key;
11454         struct btrfs_key found_key;
11455         int ret, err = 0;
11456         struct block_info *bits;
11457         int bits_nr;
11458         struct extent_buffer *leaf;
11459         int slot;
11460         struct btrfs_root_item ri;
11461         struct list_head dropping_trees;
11462         struct list_head normal_trees;
11463         struct btrfs_root *root1;
11464         struct btrfs_root *root;
11465         u64 objectid;
11466         u8 level;
11467
11468         root = fs_info->fs_root;
11469         dev_cache = RB_ROOT;
11470         cache_tree_init(&chunk_cache);
11471         block_group_tree_init(&block_group_cache);
11472         device_extent_tree_init(&dev_extent_cache);
11473
11474         cache_tree_init(&extent_cache);
11475         cache_tree_init(&seen);
11476         cache_tree_init(&pending);
11477         cache_tree_init(&nodes);
11478         cache_tree_init(&reada);
11479         cache_tree_init(&corrupt_blocks);
11480         extent_io_tree_init(&excluded_extents);
11481         INIT_LIST_HEAD(&dropping_trees);
11482         INIT_LIST_HEAD(&normal_trees);
11483
11484         if (repair) {
11485                 fs_info->excluded_extents = &excluded_extents;
11486                 fs_info->fsck_extent_cache = &extent_cache;
11487                 fs_info->free_extent_hook = free_extent_hook;
11488                 fs_info->corrupt_blocks = &corrupt_blocks;
11489         }
11490
11491         bits_nr = 1024;
11492         bits = malloc(bits_nr * sizeof(struct block_info));
11493         if (!bits) {
11494                 perror("malloc");
11495                 exit(1);
11496         }
11497
11498         if (ctx.progress_enabled) {
11499                 ctx.tp = TASK_EXTENTS;
11500                 task_start(ctx.info);
11501         }
11502
11503 again:
11504         root1 = fs_info->tree_root;
11505         level = btrfs_header_level(root1->node);
11506         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11507                                     root1->node->start, 0, level, 0, NULL);
11508         if (ret < 0)
11509                 goto out;
11510         root1 = fs_info->chunk_root;
11511         level = btrfs_header_level(root1->node);
11512         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11513                                     root1->node->start, 0, level, 0, NULL);
11514         if (ret < 0)
11515                 goto out;
11516         btrfs_init_path(&path);
11517         key.offset = 0;
11518         key.objectid = 0;
11519         key.type = BTRFS_ROOT_ITEM_KEY;
11520         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11521         if (ret < 0)
11522                 goto out;
11523         while(1) {
11524                 leaf = path.nodes[0];
11525                 slot = path.slots[0];
11526                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11527                         ret = btrfs_next_leaf(root, &path);
11528                         if (ret != 0)
11529                                 break;
11530                         leaf = path.nodes[0];
11531                         slot = path.slots[0];
11532                 }
11533                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11534                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11535                         unsigned long offset;
11536                         u64 last_snapshot;
11537
11538                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11539                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11540                         last_snapshot = btrfs_root_last_snapshot(&ri);
11541                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11542                                 level = btrfs_root_level(&ri);
11543                                 ret = add_root_item_to_list(&normal_trees,
11544                                                 found_key.objectid,
11545                                                 btrfs_root_bytenr(&ri),
11546                                                 last_snapshot, level,
11547                                                 0, NULL);
11548                                 if (ret < 0)
11549                                         goto out;
11550                         } else {
11551                                 level = btrfs_root_level(&ri);
11552                                 objectid = found_key.objectid;
11553                                 btrfs_disk_key_to_cpu(&found_key,
11554                                                       &ri.drop_progress);
11555                                 ret = add_root_item_to_list(&dropping_trees,
11556                                                 objectid,
11557                                                 btrfs_root_bytenr(&ri),
11558                                                 last_snapshot, level,
11559                                                 ri.drop_level, &found_key);
11560                                 if (ret < 0)
11561                                         goto out;
11562                         }
11563                 }
11564                 path.slots[0]++;
11565         }
11566         btrfs_release_path(&path);
11567
11568         /*
11569          * check_block can return -EAGAIN if it fixes something, please keep
11570          * this in mind when dealing with return values from these functions, if
11571          * we get -EAGAIN we want to fall through and restart the loop.
11572          */
11573         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11574                                   &seen, &reada, &nodes, &extent_cache,
11575                                   &chunk_cache, &dev_cache, &block_group_cache,
11576                                   &dev_extent_cache);
11577         if (ret < 0) {
11578                 if (ret == -EAGAIN)
11579                         goto loop;
11580                 goto out;
11581         }
11582         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11583                                   &pending, &seen, &reada, &nodes,
11584                                   &extent_cache, &chunk_cache, &dev_cache,
11585                                   &block_group_cache, &dev_extent_cache);
11586         if (ret < 0) {
11587                 if (ret == -EAGAIN)
11588                         goto loop;
11589                 goto out;
11590         }
11591
11592         ret = check_chunks(&chunk_cache, &block_group_cache,
11593                            &dev_extent_cache, NULL, NULL, NULL, 0);
11594         if (ret) {
11595                 if (ret == -EAGAIN)
11596                         goto loop;
11597                 err = ret;
11598         }
11599
11600         ret = check_extent_refs(root, &extent_cache);
11601         if (ret < 0) {
11602                 if (ret == -EAGAIN)
11603                         goto loop;
11604                 goto out;
11605         }
11606
11607         ret = check_devices(&dev_cache, &dev_extent_cache);
11608         if (ret && err)
11609                 ret = err;
11610
11611 out:
11612         task_stop(ctx.info);
11613         if (repair) {
11614                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11615                 extent_io_tree_cleanup(&excluded_extents);
11616                 fs_info->fsck_extent_cache = NULL;
11617                 fs_info->free_extent_hook = NULL;
11618                 fs_info->corrupt_blocks = NULL;
11619                 fs_info->excluded_extents = NULL;
11620         }
11621         free(bits);
11622         free_chunk_cache_tree(&chunk_cache);
11623         free_device_cache_tree(&dev_cache);
11624         free_block_group_tree(&block_group_cache);
11625         free_device_extent_tree(&dev_extent_cache);
11626         free_extent_cache_tree(&seen);
11627         free_extent_cache_tree(&pending);
11628         free_extent_cache_tree(&reada);
11629         free_extent_cache_tree(&nodes);
11630         free_root_item_list(&normal_trees);
11631         free_root_item_list(&dropping_trees);
11632         return ret;
11633 loop:
11634         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11635         free_extent_cache_tree(&seen);
11636         free_extent_cache_tree(&pending);
11637         free_extent_cache_tree(&reada);
11638         free_extent_cache_tree(&nodes);
11639         free_chunk_cache_tree(&chunk_cache);
11640         free_block_group_tree(&block_group_cache);
11641         free_device_cache_tree(&dev_cache);
11642         free_device_extent_tree(&dev_extent_cache);
11643         free_extent_record_cache(&extent_cache);
11644         free_root_item_list(&normal_trees);
11645         free_root_item_list(&dropping_trees);
11646         extent_io_tree_cleanup(&excluded_extents);
11647         goto again;
11648 }
11649
11650 static int check_extent_inline_ref(struct extent_buffer *eb,
11651                    struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11652 {
11653         int ret;
11654         u8 type = btrfs_extent_inline_ref_type(eb, iref);
11655
11656         switch (type) {
11657         case BTRFS_TREE_BLOCK_REF_KEY:
11658         case BTRFS_EXTENT_DATA_REF_KEY:
11659         case BTRFS_SHARED_BLOCK_REF_KEY:
11660         case BTRFS_SHARED_DATA_REF_KEY:
11661                 ret = 0;
11662                 break;
11663         default:
11664                 error("extent[%llu %u %llu] has unknown ref type: %d",
11665                       key->objectid, key->type, key->offset, type);
11666                 ret = UNKNOWN_TYPE;
11667                 break;
11668         }
11669
11670         return ret;
11671 }
11672
11673 /*
11674  * Check backrefs of a tree block given by @bytenr or @eb.
11675  *
11676  * @root:       the root containing the @bytenr or @eb
11677  * @eb:         tree block extent buffer, can be NULL
11678  * @bytenr:     bytenr of the tree block to search
11679  * @level:      tree level of the tree block
11680  * @owner:      owner of the tree block
11681  *
11682  * Return >0 for any error found and output error message
11683  * Return 0 for no error found
11684  */
11685 static int check_tree_block_ref(struct btrfs_root *root,
11686                                 struct extent_buffer *eb, u64 bytenr,
11687                                 int level, u64 owner, struct node_refs *nrefs)
11688 {
11689         struct btrfs_key key;
11690         struct btrfs_root *extent_root = root->fs_info->extent_root;
11691         struct btrfs_path path;
11692         struct btrfs_extent_item *ei;
11693         struct btrfs_extent_inline_ref *iref;
11694         struct extent_buffer *leaf;
11695         unsigned long end;
11696         unsigned long ptr;
11697         int slot;
11698         int skinny_level;
11699         int root_level = btrfs_header_level(root->node);
11700         int type;
11701         u32 nodesize = root->fs_info->nodesize;
11702         u32 item_size;
11703         u64 offset;
11704         int found_ref = 0;
11705         int err = 0;
11706         int ret;
11707         int strict = 1;
11708         int parent = 0;
11709
11710         btrfs_init_path(&path);
11711         key.objectid = bytenr;
11712         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11713                 key.type = BTRFS_METADATA_ITEM_KEY;
11714         else
11715                 key.type = BTRFS_EXTENT_ITEM_KEY;
11716         key.offset = (u64)-1;
11717
11718         /* Search for the backref in extent tree */
11719         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11720         if (ret < 0) {
11721                 err |= BACKREF_MISSING;
11722                 goto out;
11723         }
11724         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11725         if (ret) {
11726                 err |= BACKREF_MISSING;
11727                 goto out;
11728         }
11729
11730         leaf = path.nodes[0];
11731         slot = path.slots[0];
11732         btrfs_item_key_to_cpu(leaf, &key, slot);
11733
11734         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11735
11736         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11737                 skinny_level = (int)key.offset;
11738                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11739         } else {
11740                 struct btrfs_tree_block_info *info;
11741
11742                 info = (struct btrfs_tree_block_info *)(ei + 1);
11743                 skinny_level = btrfs_tree_block_level(leaf, info);
11744                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11745         }
11746
11747
11748         if (eb) {
11749                 u64 header_gen;
11750                 u64 extent_gen;
11751
11752                 /*
11753                  * Due to the feature of shared tree blocks, if the upper node
11754                  * is a fs root or shared node, the extent of checked node may
11755                  * not be updated until the next CoW.
11756                  */
11757                 if (nrefs)
11758                         strict = should_check_extent_strictly(root, nrefs,
11759                                         level);
11760                 if (!(btrfs_extent_flags(leaf, ei) &
11761                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11762                         error(
11763                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11764                                 key.objectid, nodesize,
11765                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11766                         err = BACKREF_MISMATCH;
11767                 }
11768                 header_gen = btrfs_header_generation(eb);
11769                 extent_gen = btrfs_extent_generation(leaf, ei);
11770                 if (header_gen != extent_gen) {
11771                         error(
11772         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11773                                 key.objectid, nodesize, header_gen,
11774                                 extent_gen);
11775                         err = BACKREF_MISMATCH;
11776                 }
11777                 if (level != skinny_level) {
11778                         error(
11779                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11780                                 key.objectid, nodesize, level, skinny_level);
11781                         err = BACKREF_MISMATCH;
11782                 }
11783                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11784                         error(
11785                         "extent[%llu %u] is referred by other roots than %llu",
11786                                 key.objectid, nodesize, root->objectid);
11787                         err = BACKREF_MISMATCH;
11788                 }
11789         }
11790
11791         /*
11792          * Iterate the extent/metadata item to find the exact backref
11793          */
11794         item_size = btrfs_item_size_nr(leaf, slot);
11795         ptr = (unsigned long)iref;
11796         end = (unsigned long)ei + item_size;
11797
11798         while (ptr < end) {
11799                 iref = (struct btrfs_extent_inline_ref *)ptr;
11800                 type = btrfs_extent_inline_ref_type(leaf, iref);
11801                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11802
11803                 ret = check_extent_inline_ref(leaf, &key, iref);
11804                 if (ret) {
11805                         err |= ret;
11806                         break;
11807                 }
11808                 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11809                         if (offset == root->objectid)
11810                                 found_ref = 1;
11811                         if (!strict && owner == offset)
11812                                 found_ref = 1;
11813                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11814                         /*
11815                          * Backref of tree reloc root points to itself, no need
11816                          * to check backref any more.
11817                          *
11818                          * This may be an error of loop backref, but extent tree
11819                          * checker should have already handled it.
11820                          * Here we only need to avoid infinite iteration.
11821                          */
11822                         if (offset == bytenr) {
11823                                 found_ref = 1;
11824                         } else {
11825                                 /*
11826                                  * Check if the backref points to valid
11827                                  * referencer
11828                                  */
11829                                 found_ref = !check_tree_block_ref( root, NULL,
11830                                                 offset, level + 1, owner,
11831                                                 NULL);
11832                         }
11833                 }
11834
11835                 if (found_ref)
11836                         break;
11837                 ptr += btrfs_extent_inline_ref_size(type);
11838         }
11839
11840         /*
11841          * Inlined extent item doesn't have what we need, check
11842          * TREE_BLOCK_REF_KEY
11843          */
11844         if (!found_ref) {
11845                 btrfs_release_path(&path);
11846                 key.objectid = bytenr;
11847                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11848                 key.offset = root->objectid;
11849
11850                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11851                 if (!ret)
11852                         found_ref = 1;
11853         }
11854         /*
11855          * Finally check SHARED BLOCK REF, any found will be good
11856          * Here we're not doing comprehensive extent backref checking,
11857          * only need to ensure there is some extent referring to this
11858          * tree block.
11859          */
11860         if (!found_ref) {
11861                 btrfs_release_path(&path);
11862                 key.objectid = bytenr;
11863                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11864                 key.offset = (u64)-1;
11865
11866                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11867                 if (ret < 0) {
11868                         err |= BACKREF_MISSING;
11869                         goto out;
11870                 }
11871                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11872                 if (ret) {
11873                         err |= BACKREF_MISSING;
11874                         goto out;
11875                 }
11876                 found_ref = 1;
11877         }
11878         if (!found_ref)
11879                 err |= BACKREF_MISSING;
11880 out:
11881         btrfs_release_path(&path);
11882         if (nrefs && strict &&
11883             level < root_level && nrefs->full_backref[level + 1])
11884                 parent = nrefs->bytenr[level + 1];
11885         if (eb && (err & BACKREF_MISSING))
11886                 error(
11887         "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11888                       bytenr, nodesize, owner, level,
11889                       parent ? "parent" : "root",
11890                       parent ? parent : root->objectid);
11891         return err;
11892 }
11893
11894 /*
11895  * If @err contains BACKREF_MISSING then add extent of the
11896  * file_extent_data_item.
11897  *
11898  * Returns error bits after reapir.
11899  */
11900 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11901                                    struct btrfs_root *root,
11902                                    struct btrfs_path *pathp,
11903                                    struct node_refs *nrefs,
11904                                    int err)
11905 {
11906         struct btrfs_file_extent_item *fi;
11907         struct btrfs_key fi_key;
11908         struct btrfs_key key;
11909         struct btrfs_extent_item *ei;
11910         struct btrfs_path path;
11911         struct btrfs_root *extent_root = root->fs_info->extent_root;
11912         struct extent_buffer *eb;
11913         u64 size;
11914         u64 disk_bytenr;
11915         u64 num_bytes;
11916         u64 parent;
11917         u64 offset;
11918         u64 extent_offset;
11919         u64 file_offset;
11920         int generation;
11921         int slot;
11922         int ret = 0;
11923
11924         eb = pathp->nodes[0];
11925         slot = pathp->slots[0];
11926         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11927         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11928
11929         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11930             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11931                 return err;
11932
11933         file_offset = fi_key.offset;
11934         generation = btrfs_file_extent_generation(eb, fi);
11935         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11936         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11937         extent_offset = btrfs_file_extent_offset(eb, fi);
11938         offset = file_offset - extent_offset;
11939
11940         /* now repair only adds backref */
11941         if ((err & BACKREF_MISSING) == 0)
11942                 return err;
11943
11944         /* search extent item */
11945         key.objectid = disk_bytenr;
11946         key.type = BTRFS_EXTENT_ITEM_KEY;
11947         key.offset = num_bytes;
11948
11949         btrfs_init_path(&path);
11950         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11951         if (ret < 0) {
11952                 ret = -EIO;
11953                 goto out;
11954         }
11955
11956         /* insert an extent item */
11957         if (ret > 0) {
11958                 key.objectid = disk_bytenr;
11959                 key.type = BTRFS_EXTENT_ITEM_KEY;
11960                 key.offset = num_bytes;
11961                 size = sizeof(*ei);
11962
11963                 btrfs_release_path(&path);
11964                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11965                                               size);
11966                 if (ret)
11967                         goto out;
11968                 eb = path.nodes[0];
11969                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11970
11971                 btrfs_set_extent_refs(eb, ei, 0);
11972                 btrfs_set_extent_generation(eb, ei, generation);
11973                 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11974
11975                 btrfs_mark_buffer_dirty(eb);
11976                 ret = btrfs_update_block_group(trans, extent_root, disk_bytenr,
11977                                                num_bytes, 1, 0);
11978                 btrfs_release_path(&path);
11979         }
11980
11981         if (nrefs->full_backref[0])
11982                 parent = btrfs_header_bytenr(eb);
11983         else
11984                 parent = 0;
11985
11986         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11987                                    root->objectid,
11988                    parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11989                                    offset);
11990         if (ret) {
11991                 error(
11992                 "failed to increase extent data backref[%llu %llu] root %llu",
11993                       disk_bytenr, num_bytes, root->objectid);
11994                 goto out;
11995         } else {
11996                 printf("Add one extent data backref [%llu %llu]\n",
11997                        disk_bytenr, num_bytes);
11998         }
11999
12000         err &= ~BACKREF_MISSING;
12001 out:
12002         if (ret)
12003                 error("can't repair root %llu extent data item[%llu %llu]",
12004                       root->objectid, disk_bytenr, num_bytes);
12005         return err;
12006 }
12007
12008 /*
12009  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
12010  *
12011  * Return >0 any error found and output error message
12012  * Return 0 for no error found
12013  */
12014 static int check_extent_data_item(struct btrfs_root *root,
12015                                   struct btrfs_path *pathp,
12016                                   struct node_refs *nrefs,  int account_bytes)
12017 {
12018         struct btrfs_file_extent_item *fi;
12019         struct extent_buffer *eb = pathp->nodes[0];
12020         struct btrfs_path path;
12021         struct btrfs_root *extent_root = root->fs_info->extent_root;
12022         struct btrfs_key fi_key;
12023         struct btrfs_key dbref_key;
12024         struct extent_buffer *leaf;
12025         struct btrfs_extent_item *ei;
12026         struct btrfs_extent_inline_ref *iref;
12027         struct btrfs_extent_data_ref *dref;
12028         u64 owner;
12029         u64 disk_bytenr;
12030         u64 disk_num_bytes;
12031         u64 extent_num_bytes;
12032         u64 extent_flags;
12033         u64 offset;
12034         u32 item_size;
12035         unsigned long end;
12036         unsigned long ptr;
12037         int type;
12038         int found_dbackref = 0;
12039         int slot = pathp->slots[0];
12040         int err = 0;
12041         int ret;
12042         int strict;
12043
12044         btrfs_item_key_to_cpu(eb, &fi_key, slot);
12045         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
12046
12047         /* Nothing to check for hole and inline data extents */
12048         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
12049             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
12050                 return 0;
12051
12052         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
12053         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
12054         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
12055         offset = btrfs_file_extent_offset(eb, fi);
12056
12057         /* Check unaligned disk_num_bytes and num_bytes */
12058         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
12059                 error(
12060 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
12061                         fi_key.objectid, fi_key.offset, disk_num_bytes,
12062                         root->fs_info->sectorsize);
12063                 err |= BYTES_UNALIGNED;
12064         } else if (account_bytes) {
12065                 data_bytes_allocated += disk_num_bytes;
12066         }
12067         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
12068                 error(
12069 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
12070                         fi_key.objectid, fi_key.offset, extent_num_bytes,
12071                         root->fs_info->sectorsize);
12072                 err |= BYTES_UNALIGNED;
12073         } else if (account_bytes) {
12074                 data_bytes_referenced += extent_num_bytes;
12075         }
12076         owner = btrfs_header_owner(eb);
12077
12078         /* Check the extent item of the file extent in extent tree */
12079         btrfs_init_path(&path);
12080         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12081         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
12082         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
12083
12084         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
12085         if (ret)
12086                 goto out;
12087
12088         leaf = path.nodes[0];
12089         slot = path.slots[0];
12090         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12091
12092         extent_flags = btrfs_extent_flags(leaf, ei);
12093
12094         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
12095                 error(
12096                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
12097                     disk_bytenr, disk_num_bytes,
12098                     BTRFS_EXTENT_FLAG_DATA);
12099                 err |= BACKREF_MISMATCH;
12100         }
12101
12102         /* Check data backref inside that extent item */
12103         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
12104         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12105         ptr = (unsigned long)iref;
12106         end = (unsigned long)ei + item_size;
12107         strict = should_check_extent_strictly(root, nrefs, -1);
12108
12109         while (ptr < end) {
12110                 u64 ref_root;
12111                 u64 ref_objectid;
12112                 u64 ref_offset;
12113                 bool match = false;
12114
12115                 iref = (struct btrfs_extent_inline_ref *)ptr;
12116                 type = btrfs_extent_inline_ref_type(leaf, iref);
12117                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12118
12119                 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
12120                 if (ret) {
12121                         err |= ret;
12122                         break;
12123                 }
12124                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
12125                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
12126                         ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
12127                         ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
12128
12129                         if (ref_objectid == fi_key.objectid &&
12130                             ref_offset == fi_key.offset - offset)
12131                                 match = true;
12132                         if (ref_root == root->objectid && match)
12133                                 found_dbackref = 1;
12134                         else if (!strict && owner == ref_root && match)
12135                                 found_dbackref = 1;
12136                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
12137                         found_dbackref = !check_tree_block_ref(root, NULL,
12138                                 btrfs_extent_inline_ref_offset(leaf, iref),
12139                                 0, owner, NULL);
12140                 }
12141
12142                 if (found_dbackref)
12143                         break;
12144                 ptr += btrfs_extent_inline_ref_size(type);
12145         }
12146
12147         if (!found_dbackref) {
12148                 btrfs_release_path(&path);
12149
12150                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
12151                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12152                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
12153                 dbref_key.offset = hash_extent_data_ref(root->objectid,
12154                                 fi_key.objectid, fi_key.offset - offset);
12155
12156                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12157                                         &dbref_key, &path, 0, 0);
12158                 if (!ret) {
12159                         found_dbackref = 1;
12160                         goto out;
12161                 }
12162
12163                 btrfs_release_path(&path);
12164
12165                 /*
12166                  * Neither inlined nor EXTENT_DATA_REF found, try
12167                  * SHARED_DATA_REF as last chance.
12168                  */
12169                 dbref_key.objectid = disk_bytenr;
12170                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
12171                 dbref_key.offset = eb->start;
12172
12173                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12174                                         &dbref_key, &path, 0, 0);
12175                 if (!ret) {
12176                         found_dbackref = 1;
12177                         goto out;
12178                 }
12179         }
12180
12181 out:
12182         if (!found_dbackref)
12183                 err |= BACKREF_MISSING;
12184         btrfs_release_path(&path);
12185         if (err & BACKREF_MISSING) {
12186                 error("data extent[%llu %llu] backref lost",
12187                       disk_bytenr, disk_num_bytes);
12188         }
12189         return err;
12190 }
12191
12192 /*
12193  * Get real tree block level for the case like shared block
12194  * Return >= 0 as tree level
12195  * Return <0 for error
12196  */
12197 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
12198 {
12199         struct extent_buffer *eb;
12200         struct btrfs_path path;
12201         struct btrfs_key key;
12202         struct btrfs_extent_item *ei;
12203         u64 flags;
12204         u64 transid;
12205         u8 backref_level;
12206         u8 header_level;
12207         int ret;
12208
12209         /* Search extent tree for extent generation and level */
12210         key.objectid = bytenr;
12211         key.type = BTRFS_METADATA_ITEM_KEY;
12212         key.offset = (u64)-1;
12213
12214         btrfs_init_path(&path);
12215         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
12216         if (ret < 0)
12217                 goto release_out;
12218         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
12219         if (ret < 0)
12220                 goto release_out;
12221         if (ret > 0) {
12222                 ret = -ENOENT;
12223                 goto release_out;
12224         }
12225
12226         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12227         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
12228                             struct btrfs_extent_item);
12229         flags = btrfs_extent_flags(path.nodes[0], ei);
12230         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
12231                 ret = -ENOENT;
12232                 goto release_out;
12233         }
12234
12235         /* Get transid for later read_tree_block() check */
12236         transid = btrfs_extent_generation(path.nodes[0], ei);
12237
12238         /* Get backref level as one source */
12239         if (key.type == BTRFS_METADATA_ITEM_KEY) {
12240                 backref_level = key.offset;
12241         } else {
12242                 struct btrfs_tree_block_info *info;
12243
12244                 info = (struct btrfs_tree_block_info *)(ei + 1);
12245                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
12246         }
12247         btrfs_release_path(&path);
12248
12249         /* Get level from tree block as an alternative source */
12250         eb = read_tree_block(fs_info, bytenr, transid);
12251         if (!extent_buffer_uptodate(eb)) {
12252                 free_extent_buffer(eb);
12253                 return -EIO;
12254         }
12255         header_level = btrfs_header_level(eb);
12256         free_extent_buffer(eb);
12257
12258         if (header_level != backref_level)
12259                 return -EIO;
12260         return header_level;
12261
12262 release_out:
12263         btrfs_release_path(&path);
12264         return ret;
12265 }
12266
12267 /*
12268  * Check if a tree block backref is valid (points to a valid tree block)
12269  * if level == -1, level will be resolved
12270  * Return >0 for any error found and print error message
12271  */
12272 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
12273                                     u64 bytenr, int level)
12274 {
12275         struct btrfs_root *root;
12276         struct btrfs_key key;
12277         struct btrfs_path path;
12278         struct extent_buffer *eb;
12279         struct extent_buffer *node;
12280         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12281         int err = 0;
12282         int ret;
12283
12284         /* Query level for level == -1 special case */
12285         if (level == -1)
12286                 level = query_tree_block_level(fs_info, bytenr);
12287         if (level < 0) {
12288                 err |= REFERENCER_MISSING;
12289                 goto out;
12290         }
12291
12292         key.objectid = root_id;
12293         key.type = BTRFS_ROOT_ITEM_KEY;
12294         key.offset = (u64)-1;
12295
12296         root = btrfs_read_fs_root(fs_info, &key);
12297         if (IS_ERR(root)) {
12298                 err |= REFERENCER_MISSING;
12299                 goto out;
12300         }
12301
12302         /* Read out the tree block to get item/node key */
12303         eb = read_tree_block(fs_info, bytenr, 0);
12304         if (!extent_buffer_uptodate(eb)) {
12305                 err |= REFERENCER_MISSING;
12306                 free_extent_buffer(eb);
12307                 goto out;
12308         }
12309
12310         /* Empty tree, no need to check key */
12311         if (!btrfs_header_nritems(eb) && !level) {
12312                 free_extent_buffer(eb);
12313                 goto out;
12314         }
12315
12316         if (level)
12317                 btrfs_node_key_to_cpu(eb, &key, 0);
12318         else
12319                 btrfs_item_key_to_cpu(eb, &key, 0);
12320
12321         free_extent_buffer(eb);
12322
12323         btrfs_init_path(&path);
12324         path.lowest_level = level;
12325         /* Search with the first key, to ensure we can reach it */
12326         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12327         if (ret < 0) {
12328                 err |= REFERENCER_MISSING;
12329                 goto release_out;
12330         }
12331
12332         node = path.nodes[level];
12333         if (btrfs_header_bytenr(node) != bytenr) {
12334                 error(
12335         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
12336                         bytenr, nodesize, bytenr,
12337                         btrfs_header_bytenr(node));
12338                 err |= REFERENCER_MISMATCH;
12339         }
12340         if (btrfs_header_level(node) != level) {
12341                 error(
12342         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
12343                         bytenr, nodesize, level,
12344                         btrfs_header_level(node));
12345                 err |= REFERENCER_MISMATCH;
12346         }
12347
12348 release_out:
12349         btrfs_release_path(&path);
12350 out:
12351         if (err & REFERENCER_MISSING) {
12352                 if (level < 0)
12353                         error("extent [%llu %d] lost referencer (owner: %llu)",
12354                                 bytenr, nodesize, root_id);
12355                 else
12356                         error(
12357                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
12358                                 bytenr, nodesize, root_id, level);
12359         }
12360
12361         return err;
12362 }
12363
12364 /*
12365  * Check if tree block @eb is tree reloc root.
12366  * Return 0 if it's not or any problem happens
12367  * Return 1 if it's a tree reloc root
12368  */
12369 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
12370                                  struct extent_buffer *eb)
12371 {
12372         struct btrfs_root *tree_reloc_root;
12373         struct btrfs_key key;
12374         u64 bytenr = btrfs_header_bytenr(eb);
12375         u64 owner = btrfs_header_owner(eb);
12376         int ret = 0;
12377
12378         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12379         key.offset = owner;
12380         key.type = BTRFS_ROOT_ITEM_KEY;
12381
12382         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
12383         if (IS_ERR(tree_reloc_root))
12384                 return 0;
12385
12386         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
12387                 ret = 1;
12388         btrfs_free_fs_root(tree_reloc_root);
12389         return ret;
12390 }
12391
12392 /*
12393  * Check referencer for shared block backref
12394  * If level == -1, this function will resolve the level.
12395  */
12396 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
12397                                      u64 parent, u64 bytenr, int level)
12398 {
12399         struct extent_buffer *eb;
12400         u32 nr;
12401         int found_parent = 0;
12402         int i;
12403
12404         eb = read_tree_block(fs_info, parent, 0);
12405         if (!extent_buffer_uptodate(eb))
12406                 goto out;
12407
12408         if (level == -1)
12409                 level = query_tree_block_level(fs_info, bytenr);
12410         if (level < 0)
12411                 goto out;
12412
12413         /* It's possible it's a tree reloc root */
12414         if (parent == bytenr) {
12415                 if (is_tree_reloc_root(fs_info, eb))
12416                         found_parent = 1;
12417                 goto out;
12418         }
12419
12420         if (level + 1 != btrfs_header_level(eb))
12421                 goto out;
12422
12423         nr = btrfs_header_nritems(eb);
12424         for (i = 0; i < nr; i++) {
12425                 if (bytenr == btrfs_node_blockptr(eb, i)) {
12426                         found_parent = 1;
12427                         break;
12428                 }
12429         }
12430 out:
12431         free_extent_buffer(eb);
12432         if (!found_parent) {
12433                 error(
12434         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12435                         bytenr, fs_info->nodesize, parent, level);
12436                 return REFERENCER_MISSING;
12437         }
12438         return 0;
12439 }
12440
12441 /*
12442  * Check referencer for normal (inlined) data ref
12443  * If len == 0, it will be resolved by searching in extent tree
12444  */
12445 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12446                                      u64 root_id, u64 objectid, u64 offset,
12447                                      u64 bytenr, u64 len, u32 count)
12448 {
12449         struct btrfs_root *root;
12450         struct btrfs_root *extent_root = fs_info->extent_root;
12451         struct btrfs_key key;
12452         struct btrfs_path path;
12453         struct extent_buffer *leaf;
12454         struct btrfs_file_extent_item *fi;
12455         u32 found_count = 0;
12456         int slot;
12457         int ret = 0;
12458
12459         if (!len) {
12460                 key.objectid = bytenr;
12461                 key.type = BTRFS_EXTENT_ITEM_KEY;
12462                 key.offset = (u64)-1;
12463
12464                 btrfs_init_path(&path);
12465                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12466                 if (ret < 0)
12467                         goto out;
12468                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12469                 if (ret)
12470                         goto out;
12471                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12472                 if (key.objectid != bytenr ||
12473                     key.type != BTRFS_EXTENT_ITEM_KEY)
12474                         goto out;
12475                 len = key.offset;
12476                 btrfs_release_path(&path);
12477         }
12478         key.objectid = root_id;
12479         key.type = BTRFS_ROOT_ITEM_KEY;
12480         key.offset = (u64)-1;
12481         btrfs_init_path(&path);
12482
12483         root = btrfs_read_fs_root(fs_info, &key);
12484         if (IS_ERR(root))
12485                 goto out;
12486
12487         key.objectid = objectid;
12488         key.type = BTRFS_EXTENT_DATA_KEY;
12489         /*
12490          * It can be nasty as data backref offset is
12491          * file offset - file extent offset, which is smaller or
12492          * equal to original backref offset.  The only special case is
12493          * overflow.  So we need to special check and do further search.
12494          */
12495         key.offset = offset & (1ULL << 63) ? 0 : offset;
12496
12497         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12498         if (ret < 0)
12499                 goto out;
12500
12501         /*
12502          * Search afterwards to get correct one
12503          * NOTE: As we must do a comprehensive check on the data backref to
12504          * make sure the dref count also matches, we must iterate all file
12505          * extents for that inode.
12506          */
12507         while (1) {
12508                 leaf = path.nodes[0];
12509                 slot = path.slots[0];
12510
12511                 if (slot >= btrfs_header_nritems(leaf) ||
12512                     btrfs_header_owner(leaf) != root_id)
12513                         goto next;
12514                 btrfs_item_key_to_cpu(leaf, &key, slot);
12515                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12516                         break;
12517                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12518                 /*
12519                  * Except normal disk bytenr and disk num bytes, we still
12520                  * need to do extra check on dbackref offset as
12521                  * dbackref offset = file_offset - file_extent_offset
12522                  *
12523                  * Also, we must check the leaf owner.
12524                  * In case of shared tree blocks (snapshots) we can inherit
12525                  * leaves from source snapshot.
12526                  * In that case, reference from source snapshot should not
12527                  * count.
12528                  */
12529                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12530                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12531                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12532                     offset && btrfs_header_owner(leaf) == root_id)
12533                         found_count++;
12534
12535 next:
12536                 ret = btrfs_next_item(root, &path);
12537                 if (ret)
12538                         break;
12539         }
12540 out:
12541         btrfs_release_path(&path);
12542         if (found_count != count) {
12543                 error(
12544 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12545                         bytenr, len, root_id, objectid, offset, count, found_count);
12546                 return REFERENCER_MISSING;
12547         }
12548         return 0;
12549 }
12550
12551 /*
12552  * Check if the referencer of a shared data backref exists
12553  */
12554 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12555                                      u64 parent, u64 bytenr)
12556 {
12557         struct extent_buffer *eb;
12558         struct btrfs_key key;
12559         struct btrfs_file_extent_item *fi;
12560         u32 nr;
12561         int found_parent = 0;
12562         int i;
12563
12564         eb = read_tree_block(fs_info, parent, 0);
12565         if (!extent_buffer_uptodate(eb))
12566                 goto out;
12567
12568         nr = btrfs_header_nritems(eb);
12569         for (i = 0; i < nr; i++) {
12570                 btrfs_item_key_to_cpu(eb, &key, i);
12571                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12572                         continue;
12573
12574                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12575                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12576                         continue;
12577
12578                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12579                         found_parent = 1;
12580                         break;
12581                 }
12582         }
12583
12584 out:
12585         free_extent_buffer(eb);
12586         if (!found_parent) {
12587                 error("shared extent %llu referencer lost (parent: %llu)",
12588                         bytenr, parent);
12589                 return REFERENCER_MISSING;
12590         }
12591         return 0;
12592 }
12593
12594 /*
12595  * Only delete backref if REFERENCER_MISSING now
12596  *
12597  * Returns <0   the extent was deleted
12598  * Returns >0   the backref was deleted but extent still exists, returned value
12599  *               means error after repair
12600  * Returns  0   nothing happened
12601  */
12602 static int repair_extent_item(struct btrfs_trans_handle *trans,
12603                       struct btrfs_root *root, struct btrfs_path *path,
12604                       u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12605                       u64 owner, u64 offset, int err)
12606 {
12607         struct btrfs_key old_key;
12608         int freed = 0;
12609         int ret;
12610
12611         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12612
12613         if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12614                 /* delete the backref */
12615                 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12616                           num_bytes, parent, root_objectid, owner, offset);
12617                 if (!ret) {
12618                         freed = 1;
12619                         err &= ~REFERENCER_MISSING;
12620                         printf("Delete backref in extent [%llu %llu]\n",
12621                                bytenr, num_bytes);
12622                 } else {
12623                         error("fail to delete backref in extent [%llu %llu]",
12624                                bytenr, num_bytes);
12625                 }
12626         }
12627
12628         /* btrfs_free_extent may delete the extent */
12629         btrfs_release_path(path);
12630         ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12631
12632         if (ret)
12633                 ret = -ENOENT;
12634         else if (freed)
12635                 ret = err;
12636         return ret;
12637 }
12638
12639 /*
12640  * This function will check a given extent item, including its backref and
12641  * itself (like crossing stripe boundary and type)
12642  *
12643  * Since we don't use extent_record anymore, introduce new error bit
12644  */
12645 static int check_extent_item(struct btrfs_trans_handle *trans,
12646                              struct btrfs_fs_info *fs_info,
12647                              struct btrfs_path *path)
12648 {
12649         struct btrfs_extent_item *ei;
12650         struct btrfs_extent_inline_ref *iref;
12651         struct btrfs_extent_data_ref *dref;
12652         struct extent_buffer *eb = path->nodes[0];
12653         unsigned long end;
12654         unsigned long ptr;
12655         int slot = path->slots[0];
12656         int type;
12657         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12658         u32 item_size = btrfs_item_size_nr(eb, slot);
12659         u64 flags;
12660         u64 offset;
12661         u64 parent;
12662         u64 num_bytes;
12663         u64 root_objectid;
12664         u64 owner;
12665         u64 owner_offset;
12666         int metadata = 0;
12667         int level;
12668         struct btrfs_key key;
12669         int ret;
12670         int err = 0;
12671
12672         btrfs_item_key_to_cpu(eb, &key, slot);
12673         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12674                 bytes_used += key.offset;
12675                 num_bytes = key.offset;
12676         } else {
12677                 bytes_used += nodesize;
12678                 num_bytes = nodesize;
12679         }
12680
12681         if (item_size < sizeof(*ei)) {
12682                 /*
12683                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12684                  * old thing when on disk format is still un-determined.
12685                  * No need to care about it anymore
12686                  */
12687                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12688                 return -ENOTTY;
12689         }
12690
12691         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12692         flags = btrfs_extent_flags(eb, ei);
12693
12694         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12695                 metadata = 1;
12696         if (metadata && check_crossing_stripes(global_info, key.objectid,
12697                                                eb->len)) {
12698                 error("bad metadata [%llu, %llu) crossing stripe boundary",
12699                       key.objectid, key.objectid + nodesize);
12700                 err |= CROSSING_STRIPE_BOUNDARY;
12701         }
12702
12703         ptr = (unsigned long)(ei + 1);
12704
12705         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12706                 /* Old EXTENT_ITEM metadata */
12707                 struct btrfs_tree_block_info *info;
12708
12709                 info = (struct btrfs_tree_block_info *)ptr;
12710                 level = btrfs_tree_block_level(eb, info);
12711                 ptr += sizeof(struct btrfs_tree_block_info);
12712         } else {
12713                 /* New METADATA_ITEM */
12714                 level = key.offset;
12715         }
12716         end = (unsigned long)ei + item_size;
12717
12718 next:
12719         /* Reached extent item end normally */
12720         if (ptr == end)
12721                 goto out;
12722
12723         /* Beyond extent item end, wrong item size */
12724         if (ptr > end) {
12725                 err |= ITEM_SIZE_MISMATCH;
12726                 error("extent item at bytenr %llu slot %d has wrong size",
12727                         eb->start, slot);
12728                 goto out;
12729         }
12730
12731         parent = 0;
12732         root_objectid = 0;
12733         owner = 0;
12734         owner_offset = 0;
12735         /* Now check every backref in this extent item */
12736         iref = (struct btrfs_extent_inline_ref *)ptr;
12737         type = btrfs_extent_inline_ref_type(eb, iref);
12738         offset = btrfs_extent_inline_ref_offset(eb, iref);
12739         switch (type) {
12740         case BTRFS_TREE_BLOCK_REF_KEY:
12741                 root_objectid = offset;
12742                 owner = level;
12743                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12744                                                level);
12745                 err |= ret;
12746                 break;
12747         case BTRFS_SHARED_BLOCK_REF_KEY:
12748                 parent = offset;
12749                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12750                                                  level);
12751                 err |= ret;
12752                 break;
12753         case BTRFS_EXTENT_DATA_REF_KEY:
12754                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12755                 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12756                 owner = btrfs_extent_data_ref_objectid(eb, dref);
12757                 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12758                 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12759                                         owner_offset, key.objectid, key.offset,
12760                                         btrfs_extent_data_ref_count(eb, dref));
12761                 err |= ret;
12762                 break;
12763         case BTRFS_SHARED_DATA_REF_KEY:
12764                 parent = offset;
12765                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12766                 err |= ret;
12767                 break;
12768         default:
12769                 error("extent[%llu %d %llu] has unknown ref type: %d",
12770                         key.objectid, key.type, key.offset, type);
12771                 ret = UNKNOWN_TYPE;
12772                 err |= ret;
12773                 goto out;
12774         }
12775
12776         if (err && repair) {
12777                 ret = repair_extent_item(trans, fs_info->extent_root, path,
12778                          key.objectid, num_bytes, parent, root_objectid,
12779                          owner, owner_offset, ret);
12780                 if (ret < 0)
12781                         goto out;
12782                 if (ret) {
12783                         goto next;
12784                         err = ret;
12785                 }
12786         }
12787
12788         ptr += btrfs_extent_inline_ref_size(type);
12789         goto next;
12790
12791 out:
12792         return err;
12793 }
12794
12795 /*
12796  * Check if a dev extent item is referred correctly by its chunk
12797  */
12798 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12799                                  struct extent_buffer *eb, int slot)
12800 {
12801         struct btrfs_root *chunk_root = fs_info->chunk_root;
12802         struct btrfs_dev_extent *ptr;
12803         struct btrfs_path path;
12804         struct btrfs_key chunk_key;
12805         struct btrfs_key devext_key;
12806         struct btrfs_chunk *chunk;
12807         struct extent_buffer *l;
12808         int num_stripes;
12809         u64 length;
12810         int i;
12811         int found_chunk = 0;
12812         int ret;
12813
12814         btrfs_item_key_to_cpu(eb, &devext_key, slot);
12815         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12816         length = btrfs_dev_extent_length(eb, ptr);
12817
12818         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12819         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12820         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12821
12822         btrfs_init_path(&path);
12823         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12824         if (ret)
12825                 goto out;
12826
12827         l = path.nodes[0];
12828         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12829         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12830                                       chunk_key.offset);
12831         if (ret < 0)
12832                 goto out;
12833
12834         if (btrfs_stripe_length(fs_info, l, chunk) != length)
12835                 goto out;
12836
12837         num_stripes = btrfs_chunk_num_stripes(l, chunk);
12838         for (i = 0; i < num_stripes; i++) {
12839                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12840                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12841
12842                 if (devid == devext_key.objectid &&
12843                     offset == devext_key.offset) {
12844                         found_chunk = 1;
12845                         break;
12846                 }
12847         }
12848 out:
12849         btrfs_release_path(&path);
12850         if (!found_chunk) {
12851                 error(
12852                 "device extent[%llu, %llu, %llu] did not find the related chunk",
12853                         devext_key.objectid, devext_key.offset, length);
12854                 return REFERENCER_MISSING;
12855         }
12856         return 0;
12857 }
12858
12859 /*
12860  * Check if the used space is correct with the dev item
12861  */
12862 static int check_dev_item(struct btrfs_fs_info *fs_info,
12863                           struct extent_buffer *eb, int slot)
12864 {
12865         struct btrfs_root *dev_root = fs_info->dev_root;
12866         struct btrfs_dev_item *dev_item;
12867         struct btrfs_path path;
12868         struct btrfs_key key;
12869         struct btrfs_dev_extent *ptr;
12870         u64 total_bytes;
12871         u64 dev_id;
12872         u64 used;
12873         u64 total = 0;
12874         int ret;
12875
12876         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12877         dev_id = btrfs_device_id(eb, dev_item);
12878         used = btrfs_device_bytes_used(eb, dev_item);
12879         total_bytes = btrfs_device_total_bytes(eb, dev_item);
12880
12881         key.objectid = dev_id;
12882         key.type = BTRFS_DEV_EXTENT_KEY;
12883         key.offset = 0;
12884
12885         btrfs_init_path(&path);
12886         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12887         if (ret < 0) {
12888                 btrfs_item_key_to_cpu(eb, &key, slot);
12889                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12890                         key.objectid, key.type, key.offset);
12891                 btrfs_release_path(&path);
12892                 return REFERENCER_MISSING;
12893         }
12894
12895         /* Iterate dev_extents to calculate the used space of a device */
12896         while (1) {
12897                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12898                         goto next;
12899
12900                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12901                 if (key.objectid > dev_id)
12902                         break;
12903                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12904                         goto next;
12905
12906                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12907                                      struct btrfs_dev_extent);
12908                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12909 next:
12910                 ret = btrfs_next_item(dev_root, &path);
12911                 if (ret)
12912                         break;
12913         }
12914         btrfs_release_path(&path);
12915
12916         if (used != total) {
12917                 btrfs_item_key_to_cpu(eb, &key, slot);
12918                 error(
12919 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12920                         total, used, BTRFS_ROOT_TREE_OBJECTID,
12921                         BTRFS_DEV_EXTENT_KEY, dev_id);
12922                 return ACCOUNTING_MISMATCH;
12923         }
12924         check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12925
12926         return 0;
12927 }
12928
12929 /*
12930  * Check a block group item with its referener (chunk) and its used space
12931  * with extent/metadata item
12932  */
12933 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12934                                   struct extent_buffer *eb, int slot)
12935 {
12936         struct btrfs_root *extent_root = fs_info->extent_root;
12937         struct btrfs_root *chunk_root = fs_info->chunk_root;
12938         struct btrfs_block_group_item *bi;
12939         struct btrfs_block_group_item bg_item;
12940         struct btrfs_path path;
12941         struct btrfs_key bg_key;
12942         struct btrfs_key chunk_key;
12943         struct btrfs_key extent_key;
12944         struct btrfs_chunk *chunk;
12945         struct extent_buffer *leaf;
12946         struct btrfs_extent_item *ei;
12947         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12948         u64 flags;
12949         u64 bg_flags;
12950         u64 used;
12951         u64 total = 0;
12952         int ret;
12953         int err = 0;
12954
12955         btrfs_item_key_to_cpu(eb, &bg_key, slot);
12956         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12957         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12958         used = btrfs_block_group_used(&bg_item);
12959         bg_flags = btrfs_block_group_flags(&bg_item);
12960
12961         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12962         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12963         chunk_key.offset = bg_key.objectid;
12964
12965         btrfs_init_path(&path);
12966         /* Search for the referencer chunk */
12967         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12968         if (ret) {
12969                 error(
12970                 "block group[%llu %llu] did not find the related chunk item",
12971                         bg_key.objectid, bg_key.offset);
12972                 err |= REFERENCER_MISSING;
12973         } else {
12974                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12975                                         struct btrfs_chunk);
12976                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12977                                                 bg_key.offset) {
12978                         error(
12979         "block group[%llu %llu] related chunk item length does not match",
12980                                 bg_key.objectid, bg_key.offset);
12981                         err |= REFERENCER_MISMATCH;
12982                 }
12983         }
12984         btrfs_release_path(&path);
12985
12986         /* Search from the block group bytenr */
12987         extent_key.objectid = bg_key.objectid;
12988         extent_key.type = 0;
12989         extent_key.offset = 0;
12990
12991         btrfs_init_path(&path);
12992         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12993         if (ret < 0)
12994                 goto out;
12995
12996         /* Iterate extent tree to account used space */
12997         while (1) {
12998                 leaf = path.nodes[0];
12999
13000                 /* Search slot can point to the last item beyond leaf nritems */
13001                 if (path.slots[0] >= btrfs_header_nritems(leaf))
13002                         goto next;
13003
13004                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
13005                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
13006                         break;
13007
13008                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
13009                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
13010                         goto next;
13011                 if (extent_key.objectid < bg_key.objectid)
13012                         goto next;
13013
13014                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
13015                         total += nodesize;
13016                 else
13017                         total += extent_key.offset;
13018
13019                 ei = btrfs_item_ptr(leaf, path.slots[0],
13020                                     struct btrfs_extent_item);
13021                 flags = btrfs_extent_flags(leaf, ei);
13022                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
13023                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
13024                                 error(
13025                         "bad extent[%llu, %llu) type mismatch with chunk",
13026                                         extent_key.objectid,
13027                                         extent_key.objectid + extent_key.offset);
13028                                 err |= CHUNK_TYPE_MISMATCH;
13029                         }
13030                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
13031                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
13032                                     BTRFS_BLOCK_GROUP_METADATA))) {
13033                                 error(
13034                         "bad extent[%llu, %llu) type mismatch with chunk",
13035                                         extent_key.objectid,
13036                                         extent_key.objectid + nodesize);
13037                                 err |= CHUNK_TYPE_MISMATCH;
13038                         }
13039                 }
13040 next:
13041                 ret = btrfs_next_item(extent_root, &path);
13042                 if (ret)
13043                         break;
13044         }
13045
13046 out:
13047         btrfs_release_path(&path);
13048
13049         if (total != used) {
13050                 error(
13051                 "block group[%llu %llu] used %llu but extent items used %llu",
13052                         bg_key.objectid, bg_key.offset, used, total);
13053                 err |= BG_ACCOUNTING_ERROR;
13054         }
13055         return err;
13056 }
13057
13058 /*
13059  * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
13060  * FIXME: We still need to repair error of dev_item.
13061  *
13062  * Returns error after repair.
13063  */
13064 static int repair_chunk_item(struct btrfs_trans_handle *trans,
13065                              struct btrfs_root *chunk_root,
13066                              struct btrfs_path *path, int err)
13067 {
13068         struct btrfs_chunk *chunk;
13069         struct btrfs_key chunk_key;
13070         struct extent_buffer *eb = path->nodes[0];
13071         u64 length;
13072         int slot = path->slots[0];
13073         u64 type;
13074         int ret = 0;
13075
13076         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
13077         if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
13078                 return err;
13079         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
13080         type = btrfs_chunk_type(path->nodes[0], chunk);
13081         length = btrfs_chunk_length(eb, chunk);
13082
13083         if (err & REFERENCER_MISSING) {
13084                 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
13085                      type, chunk_key.objectid, chunk_key.offset, length);
13086                 if (ret) {
13087                         error("fail to add block group item[%llu %llu]",
13088                               chunk_key.offset, length);
13089                         goto out;
13090                 } else {
13091                         err &= ~REFERENCER_MISSING;
13092                         printf("Added block group item[%llu %llu]\n",
13093                                chunk_key.offset, length);
13094                 }
13095         }
13096
13097 out:
13098         return err;
13099 }
13100
13101 /*
13102  * Check a chunk item.
13103  * Including checking all referred dev_extents and block group
13104  */
13105 static int check_chunk_item(struct btrfs_fs_info *fs_info,
13106                             struct extent_buffer *eb, int slot)
13107 {
13108         struct btrfs_root *extent_root = fs_info->extent_root;
13109         struct btrfs_root *dev_root = fs_info->dev_root;
13110         struct btrfs_path path;
13111         struct btrfs_key chunk_key;
13112         struct btrfs_key bg_key;
13113         struct btrfs_key devext_key;
13114         struct btrfs_chunk *chunk;
13115         struct extent_buffer *leaf;
13116         struct btrfs_block_group_item *bi;
13117         struct btrfs_block_group_item bg_item;
13118         struct btrfs_dev_extent *ptr;
13119         u64 length;
13120         u64 chunk_end;
13121         u64 stripe_len;
13122         u64 type;
13123         int num_stripes;
13124         u64 offset;
13125         u64 objectid;
13126         int i;
13127         int ret;
13128         int err = 0;
13129
13130         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
13131         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
13132         length = btrfs_chunk_length(eb, chunk);
13133         chunk_end = chunk_key.offset + length;
13134         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
13135                                       chunk_key.offset);
13136         if (ret < 0) {
13137                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
13138                         chunk_end);
13139                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
13140                 goto out;
13141         }
13142         type = btrfs_chunk_type(eb, chunk);
13143
13144         bg_key.objectid = chunk_key.offset;
13145         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
13146         bg_key.offset = length;
13147
13148         btrfs_init_path(&path);
13149         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
13150         if (ret) {
13151                 error(
13152                 "chunk[%llu %llu) did not find the related block group item",
13153                         chunk_key.offset, chunk_end);
13154                 err |= REFERENCER_MISSING;
13155         } else{
13156                 leaf = path.nodes[0];
13157                 bi = btrfs_item_ptr(leaf, path.slots[0],
13158                                     struct btrfs_block_group_item);
13159                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
13160                                    sizeof(bg_item));
13161                 if (btrfs_block_group_flags(&bg_item) != type) {
13162                         error(
13163 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
13164                                 chunk_key.offset, chunk_end, type,
13165                                 btrfs_block_group_flags(&bg_item));
13166                         err |= REFERENCER_MISSING;
13167                 }
13168         }
13169
13170         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
13171         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
13172         for (i = 0; i < num_stripes; i++) {
13173                 btrfs_release_path(&path);
13174                 btrfs_init_path(&path);
13175                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
13176                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
13177                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
13178
13179                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
13180                                         0, 0);
13181                 if (ret)
13182                         goto not_match_dev;
13183
13184                 leaf = path.nodes[0];
13185                 ptr = btrfs_item_ptr(leaf, path.slots[0],
13186                                      struct btrfs_dev_extent);
13187                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
13188                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
13189                 if (objectid != chunk_key.objectid ||
13190                     offset != chunk_key.offset ||
13191                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
13192                         goto not_match_dev;
13193                 continue;
13194 not_match_dev:
13195                 err |= BACKREF_MISSING;
13196                 error(
13197                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
13198                         chunk_key.objectid, chunk_end, i);
13199                 continue;
13200         }
13201         btrfs_release_path(&path);
13202 out:
13203         return err;
13204 }
13205
13206 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
13207                                    struct btrfs_root *root,
13208                                    struct btrfs_path *path)
13209 {
13210         struct btrfs_key key;
13211         int ret = 0;
13212
13213         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
13214         btrfs_release_path(path);
13215         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
13216         if (ret) {
13217                 ret = -ENOENT;
13218                 goto out;
13219         }
13220
13221         ret = btrfs_del_item(trans, root, path);
13222         if (ret)
13223                 goto out;
13224
13225         if (path->slots[0] == 0)
13226                 btrfs_prev_leaf(root, path);
13227         else
13228                 path->slots[0]--;
13229 out:
13230         if (ret)
13231                 error("failed to delete root %llu item[%llu, %u, %llu]",
13232                       root->objectid, key.objectid, key.type, key.offset);
13233         else
13234                 printf("Deleted root %llu item[%llu, %u, %llu]\n",
13235                        root->objectid, key.objectid, key.type, key.offset);
13236         return ret;
13237 }
13238
13239 /*
13240  * Main entry function to check known items and update related accounting info
13241  */
13242 static int check_leaf_items(struct btrfs_trans_handle *trans,
13243                             struct btrfs_root *root, struct btrfs_path *path,
13244                             struct node_refs *nrefs, int account_bytes)
13245 {
13246         struct btrfs_fs_info *fs_info = root->fs_info;
13247         struct btrfs_key key;
13248         struct extent_buffer *eb;
13249         int slot;
13250         int type;
13251         struct btrfs_extent_data_ref *dref;
13252         int ret = 0;
13253         int err = 0;
13254
13255 again:
13256         eb = path->nodes[0];
13257         slot = path->slots[0];
13258         if (slot >= btrfs_header_nritems(eb)) {
13259                 if (slot == 0) {
13260                         error("empty leaf [%llu %u] root %llu", eb->start,
13261                                 root->fs_info->nodesize, root->objectid);
13262                         err |= EIO;
13263                 }
13264                 goto out;
13265         }
13266
13267         btrfs_item_key_to_cpu(eb, &key, slot);
13268         type = key.type;
13269
13270         switch (type) {
13271         case BTRFS_EXTENT_DATA_KEY:
13272                 ret = check_extent_data_item(root, path, nrefs, account_bytes);
13273                 if (repair && ret)
13274                         ret = repair_extent_data_item(trans, root, path, nrefs,
13275                                                       ret);
13276                 err |= ret;
13277                 break;
13278         case BTRFS_BLOCK_GROUP_ITEM_KEY:
13279                 ret = check_block_group_item(fs_info, eb, slot);
13280                 if (repair &&
13281                     ret & REFERENCER_MISSING)
13282                         ret = delete_extent_tree_item(trans, root, path);
13283                 err |= ret;
13284                 break;
13285         case BTRFS_DEV_ITEM_KEY:
13286                 ret = check_dev_item(fs_info, eb, slot);
13287                 err |= ret;
13288                 break;
13289         case BTRFS_CHUNK_ITEM_KEY:
13290                 ret = check_chunk_item(fs_info, eb, slot);
13291                 if (repair && ret)
13292                         ret = repair_chunk_item(trans, root, path, ret);
13293                 err |= ret;
13294                 break;
13295         case BTRFS_DEV_EXTENT_KEY:
13296                 ret = check_dev_extent_item(fs_info, eb, slot);
13297                 err |= ret;
13298                 break;
13299         case BTRFS_EXTENT_ITEM_KEY:
13300         case BTRFS_METADATA_ITEM_KEY:
13301                 ret = check_extent_item(trans, fs_info, path);
13302                 err |= ret;
13303                 break;
13304         case BTRFS_EXTENT_CSUM_KEY:
13305                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
13306                 err |= ret;
13307                 break;
13308         case BTRFS_TREE_BLOCK_REF_KEY:
13309                 ret = check_tree_block_backref(fs_info, key.offset,
13310                                                key.objectid, -1);
13311                 if (repair &&
13312                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13313                         ret = delete_extent_tree_item(trans, root, path);
13314                 err |= ret;
13315                 break;
13316         case BTRFS_EXTENT_DATA_REF_KEY:
13317                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
13318                 ret = check_extent_data_backref(fs_info,
13319                                 btrfs_extent_data_ref_root(eb, dref),
13320                                 btrfs_extent_data_ref_objectid(eb, dref),
13321                                 btrfs_extent_data_ref_offset(eb, dref),
13322                                 key.objectid, 0,
13323                                 btrfs_extent_data_ref_count(eb, dref));
13324                 if (repair &&
13325                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13326                         ret = delete_extent_tree_item(trans, root, path);
13327                 err |= ret;
13328                 break;
13329         case BTRFS_SHARED_BLOCK_REF_KEY:
13330                 ret = check_shared_block_backref(fs_info, key.offset,
13331                                                  key.objectid, -1);
13332                 if (repair &&
13333                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13334                         ret = delete_extent_tree_item(trans, root, path);
13335                 err |= ret;
13336                 break;
13337         case BTRFS_SHARED_DATA_REF_KEY:
13338                 ret = check_shared_data_backref(fs_info, key.offset,
13339                                                 key.objectid);
13340                 if (repair &&
13341                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13342                         ret = delete_extent_tree_item(trans, root, path);
13343                 err |= ret;
13344                 break;
13345         default:
13346                 break;
13347         }
13348
13349         ++path->slots[0];
13350         goto again;
13351 out:
13352         return err;
13353 }
13354
13355 /*
13356  * Low memory usage version check_chunks_and_extents.
13357  */
13358 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
13359 {
13360         struct btrfs_trans_handle *trans = NULL;
13361         struct btrfs_path path;
13362         struct btrfs_key old_key;
13363         struct btrfs_key key;
13364         struct btrfs_root *root1;
13365         struct btrfs_root *root;
13366         struct btrfs_root *cur_root;
13367         int err = 0;
13368         int ret;
13369
13370         root = fs_info->fs_root;
13371
13372         if (repair) {
13373                 trans = btrfs_start_transaction(fs_info->extent_root, 1);
13374                 if (IS_ERR(trans)) {
13375                         error("failed to start transaction before check");
13376                         return PTR_ERR(trans);
13377                 }
13378         }
13379
13380         root1 = root->fs_info->chunk_root;
13381         ret = check_btrfs_root(trans, root1, 0, 1);
13382         err |= ret;
13383
13384         root1 = root->fs_info->tree_root;
13385         ret = check_btrfs_root(trans, root1, 0, 1);
13386         err |= ret;
13387
13388         btrfs_init_path(&path);
13389         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
13390         key.offset = 0;
13391         key.type = BTRFS_ROOT_ITEM_KEY;
13392
13393         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
13394         if (ret) {
13395                 error("cannot find extent tree in tree_root");
13396                 goto out;
13397         }
13398
13399         while (1) {
13400                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13401                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13402                         goto next;
13403                 old_key = key;
13404                 key.offset = (u64)-1;
13405
13406                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13407                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13408                                         &key);
13409                 else
13410                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
13411                 if (IS_ERR(cur_root) || !cur_root) {
13412                         error("failed to read tree: %lld", key.objectid);
13413                         goto next;
13414                 }
13415
13416                 ret = check_btrfs_root(trans, cur_root, 0, 1);
13417                 err |= ret;
13418
13419                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13420                         btrfs_free_fs_root(cur_root);
13421
13422                 btrfs_release_path(&path);
13423                 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13424                                         &old_key, &path, 0, 0);
13425                 if (ret)
13426                         goto out;
13427 next:
13428                 ret = btrfs_next_item(root1, &path);
13429                 if (ret)
13430                         goto out;
13431         }
13432 out:
13433
13434         /* if repair, update block accounting */
13435         if (repair) {
13436                 ret = btrfs_fix_block_accounting(trans, root);
13437                 if (ret)
13438                         err |= ret;
13439                 else
13440                         err &= ~BG_ACCOUNTING_ERROR;
13441         }
13442
13443         if (trans)
13444                 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13445
13446         btrfs_release_path(&path);
13447
13448         return err;
13449 }
13450
13451 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13452 {
13453         int ret;
13454
13455         if (!ctx.progress_enabled)
13456                 fprintf(stderr, "checking extents\n");
13457         if (check_mode == CHECK_MODE_LOWMEM)
13458                 ret = check_chunks_and_extents_v2(fs_info);
13459         else
13460                 ret = check_chunks_and_extents(fs_info);
13461
13462         /* Also repair device size related problems */
13463         if (repair && !ret) {
13464                 ret = btrfs_fix_device_and_super_size(fs_info);
13465                 if (ret > 0)
13466                         ret = 0;
13467         }
13468         return ret;
13469 }
13470
13471 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13472                            struct btrfs_root *root, int overwrite)
13473 {
13474         struct extent_buffer *c;
13475         struct extent_buffer *old = root->node;
13476         int level;
13477         int ret;
13478         struct btrfs_disk_key disk_key = {0,0,0};
13479
13480         level = 0;
13481
13482         if (overwrite) {
13483                 c = old;
13484                 extent_buffer_get(c);
13485                 goto init;
13486         }
13487         c = btrfs_alloc_free_block(trans, root,
13488                                    root->fs_info->nodesize,
13489                                    root->root_key.objectid,
13490                                    &disk_key, level, 0, 0);
13491         if (IS_ERR(c)) {
13492                 c = old;
13493                 extent_buffer_get(c);
13494                 overwrite = 1;
13495         }
13496 init:
13497         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13498         btrfs_set_header_level(c, level);
13499         btrfs_set_header_bytenr(c, c->start);
13500         btrfs_set_header_generation(c, trans->transid);
13501         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13502         btrfs_set_header_owner(c, root->root_key.objectid);
13503
13504         write_extent_buffer(c, root->fs_info->fsid,
13505                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
13506
13507         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13508                             btrfs_header_chunk_tree_uuid(c),
13509                             BTRFS_UUID_SIZE);
13510
13511         btrfs_mark_buffer_dirty(c);
13512         /*
13513          * this case can happen in the following case:
13514          *
13515          * 1.overwrite previous root.
13516          *
13517          * 2.reinit reloc data root, this is because we skip pin
13518          * down reloc data tree before which means we can allocate
13519          * same block bytenr here.
13520          */
13521         if (old->start == c->start) {
13522                 btrfs_set_root_generation(&root->root_item,
13523                                           trans->transid);
13524                 root->root_item.level = btrfs_header_level(root->node);
13525                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13526                                         &root->root_key, &root->root_item);
13527                 if (ret) {
13528                         free_extent_buffer(c);
13529                         return ret;
13530                 }
13531         }
13532         free_extent_buffer(old);
13533         root->node = c;
13534         add_root_to_dirty_list(root);
13535         return 0;
13536 }
13537
13538 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13539                                 struct extent_buffer *eb, int tree_root)
13540 {
13541         struct extent_buffer *tmp;
13542         struct btrfs_root_item *ri;
13543         struct btrfs_key key;
13544         u64 bytenr;
13545         int level = btrfs_header_level(eb);
13546         int nritems;
13547         int ret;
13548         int i;
13549
13550         /*
13551          * If we have pinned this block before, don't pin it again.
13552          * This can not only avoid forever loop with broken filesystem
13553          * but also give us some speedups.
13554          */
13555         if (test_range_bit(&fs_info->pinned_extents, eb->start,
13556                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13557                 return 0;
13558
13559         btrfs_pin_extent(fs_info, eb->start, eb->len);
13560
13561         nritems = btrfs_header_nritems(eb);
13562         for (i = 0; i < nritems; i++) {
13563                 if (level == 0) {
13564                         btrfs_item_key_to_cpu(eb, &key, i);
13565                         if (key.type != BTRFS_ROOT_ITEM_KEY)
13566                                 continue;
13567                         /* Skip the extent root and reloc roots */
13568                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13569                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13570                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13571                                 continue;
13572                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13573                         bytenr = btrfs_disk_root_bytenr(eb, ri);
13574
13575                         /*
13576                          * If at any point we start needing the real root we
13577                          * will have to build a stump root for the root we are
13578                          * in, but for now this doesn't actually use the root so
13579                          * just pass in extent_root.
13580                          */
13581                         tmp = read_tree_block(fs_info, bytenr, 0);
13582                         if (!extent_buffer_uptodate(tmp)) {
13583                                 fprintf(stderr, "Error reading root block\n");
13584                                 return -EIO;
13585                         }
13586                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
13587                         free_extent_buffer(tmp);
13588                         if (ret)
13589                                 return ret;
13590                 } else {
13591                         bytenr = btrfs_node_blockptr(eb, i);
13592
13593                         /* If we aren't the tree root don't read the block */
13594                         if (level == 1 && !tree_root) {
13595                                 btrfs_pin_extent(fs_info, bytenr,
13596                                                 fs_info->nodesize);
13597                                 continue;
13598                         }
13599
13600                         tmp = read_tree_block(fs_info, bytenr, 0);
13601                         if (!extent_buffer_uptodate(tmp)) {
13602                                 fprintf(stderr, "Error reading tree block\n");
13603                                 return -EIO;
13604                         }
13605                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13606                         free_extent_buffer(tmp);
13607                         if (ret)
13608                                 return ret;
13609                 }
13610         }
13611
13612         return 0;
13613 }
13614
13615 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13616 {
13617         int ret;
13618
13619         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13620         if (ret)
13621                 return ret;
13622
13623         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13624 }
13625
13626 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13627 {
13628         struct btrfs_block_group_cache *cache;
13629         struct btrfs_path path;
13630         struct extent_buffer *leaf;
13631         struct btrfs_chunk *chunk;
13632         struct btrfs_key key;
13633         int ret;
13634         u64 start;
13635
13636         btrfs_init_path(&path);
13637         key.objectid = 0;
13638         key.type = BTRFS_CHUNK_ITEM_KEY;
13639         key.offset = 0;
13640         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13641         if (ret < 0) {
13642                 btrfs_release_path(&path);
13643                 return ret;
13644         }
13645
13646         /*
13647          * We do this in case the block groups were screwed up and had alloc
13648          * bits that aren't actually set on the chunks.  This happens with
13649          * restored images every time and could happen in real life I guess.
13650          */
13651         fs_info->avail_data_alloc_bits = 0;
13652         fs_info->avail_metadata_alloc_bits = 0;
13653         fs_info->avail_system_alloc_bits = 0;
13654
13655         /* First we need to create the in-memory block groups */
13656         while (1) {
13657                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13658                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13659                         if (ret < 0) {
13660                                 btrfs_release_path(&path);
13661                                 return ret;
13662                         }
13663                         if (ret) {
13664                                 ret = 0;
13665                                 break;
13666                         }
13667                 }
13668                 leaf = path.nodes[0];
13669                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13670                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13671                         path.slots[0]++;
13672                         continue;
13673                 }
13674
13675                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13676                 btrfs_add_block_group(fs_info, 0,
13677                                       btrfs_chunk_type(leaf, chunk),
13678                                       key.objectid, key.offset,
13679                                       btrfs_chunk_length(leaf, chunk));
13680                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13681                                  key.offset + btrfs_chunk_length(leaf, chunk));
13682                 path.slots[0]++;
13683         }
13684         start = 0;
13685         while (1) {
13686                 cache = btrfs_lookup_first_block_group(fs_info, start);
13687                 if (!cache)
13688                         break;
13689                 cache->cached = 1;
13690                 start = cache->key.objectid + cache->key.offset;
13691         }
13692
13693         btrfs_release_path(&path);
13694         return 0;
13695 }
13696
13697 static int reset_balance(struct btrfs_trans_handle *trans,
13698                          struct btrfs_fs_info *fs_info)
13699 {
13700         struct btrfs_root *root = fs_info->tree_root;
13701         struct btrfs_path path;
13702         struct extent_buffer *leaf;
13703         struct btrfs_key key;
13704         int del_slot, del_nr = 0;
13705         int ret;
13706         int found = 0;
13707
13708         btrfs_init_path(&path);
13709         key.objectid = BTRFS_BALANCE_OBJECTID;
13710         key.type = BTRFS_BALANCE_ITEM_KEY;
13711         key.offset = 0;
13712         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13713         if (ret) {
13714                 if (ret > 0)
13715                         ret = 0;
13716                 if (!ret)
13717                         goto reinit_data_reloc;
13718                 else
13719                         goto out;
13720         }
13721
13722         ret = btrfs_del_item(trans, root, &path);
13723         if (ret)
13724                 goto out;
13725         btrfs_release_path(&path);
13726
13727         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13728         key.type = BTRFS_ROOT_ITEM_KEY;
13729         key.offset = 0;
13730         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13731         if (ret < 0)
13732                 goto out;
13733         while (1) {
13734                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13735                         if (!found)
13736                                 break;
13737
13738                         if (del_nr) {
13739                                 ret = btrfs_del_items(trans, root, &path,
13740                                                       del_slot, del_nr);
13741                                 del_nr = 0;
13742                                 if (ret)
13743                                         goto out;
13744                         }
13745                         key.offset++;
13746                         btrfs_release_path(&path);
13747
13748                         found = 0;
13749                         ret = btrfs_search_slot(trans, root, &key, &path,
13750                                                 -1, 1);
13751                         if (ret < 0)
13752                                 goto out;
13753                         continue;
13754                 }
13755                 found = 1;
13756                 leaf = path.nodes[0];
13757                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13758                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13759                         break;
13760                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13761                         path.slots[0]++;
13762                         continue;
13763                 }
13764                 if (!del_nr) {
13765                         del_slot = path.slots[0];
13766                         del_nr = 1;
13767                 } else {
13768                         del_nr++;
13769                 }
13770                 path.slots[0]++;
13771         }
13772
13773         if (del_nr) {
13774                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13775                 if (ret)
13776                         goto out;
13777         }
13778         btrfs_release_path(&path);
13779
13780 reinit_data_reloc:
13781         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13782         key.type = BTRFS_ROOT_ITEM_KEY;
13783         key.offset = (u64)-1;
13784         root = btrfs_read_fs_root(fs_info, &key);
13785         if (IS_ERR(root)) {
13786                 fprintf(stderr, "Error reading data reloc tree\n");
13787                 ret = PTR_ERR(root);
13788                 goto out;
13789         }
13790         record_root_in_trans(trans, root);
13791         ret = btrfs_fsck_reinit_root(trans, root, 0);
13792         if (ret)
13793                 goto out;
13794         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13795 out:
13796         btrfs_release_path(&path);
13797         return ret;
13798 }
13799
13800 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13801                               struct btrfs_fs_info *fs_info)
13802 {
13803         u64 start = 0;
13804         int ret;
13805
13806         /*
13807          * The only reason we don't do this is because right now we're just
13808          * walking the trees we find and pinning down their bytes, we don't look
13809          * at any of the leaves.  In order to do mixed groups we'd have to check
13810          * the leaves of any fs roots and pin down the bytes for any file
13811          * extents we find.  Not hard but why do it if we don't have to?
13812          */
13813         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13814                 fprintf(stderr, "We don't support re-initing the extent tree "
13815                         "for mixed block groups yet, please notify a btrfs "
13816                         "developer you want to do this so they can add this "
13817                         "functionality.\n");
13818                 return -EINVAL;
13819         }
13820
13821         /*
13822          * first we need to walk all of the trees except the extent tree and pin
13823          * down the bytes that are in use so we don't overwrite any existing
13824          * metadata.
13825          */
13826         ret = pin_metadata_blocks(fs_info);
13827         if (ret) {
13828                 fprintf(stderr, "error pinning down used bytes\n");
13829                 return ret;
13830         }
13831
13832         /*
13833          * Need to drop all the block groups since we're going to recreate all
13834          * of them again.
13835          */
13836         btrfs_free_block_groups(fs_info);
13837         ret = reset_block_groups(fs_info);
13838         if (ret) {
13839                 fprintf(stderr, "error resetting the block groups\n");
13840                 return ret;
13841         }
13842
13843         /* Ok we can allocate now, reinit the extent root */
13844         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13845         if (ret) {
13846                 fprintf(stderr, "extent root initialization failed\n");
13847                 /*
13848                  * When the transaction code is updated we should end the
13849                  * transaction, but for now progs only knows about commit so
13850                  * just return an error.
13851                  */
13852                 return ret;
13853         }
13854
13855         /*
13856          * Now we have all the in-memory block groups setup so we can make
13857          * allocations properly, and the metadata we care about is safe since we
13858          * pinned all of it above.
13859          */
13860         while (1) {
13861                 struct btrfs_block_group_cache *cache;
13862
13863                 cache = btrfs_lookup_first_block_group(fs_info, start);
13864                 if (!cache)
13865                         break;
13866                 start = cache->key.objectid + cache->key.offset;
13867                 ret = btrfs_insert_item(trans, fs_info->extent_root,
13868                                         &cache->key, &cache->item,
13869                                         sizeof(cache->item));
13870                 if (ret) {
13871                         fprintf(stderr, "Error adding block group\n");
13872                         return ret;
13873                 }
13874                 btrfs_extent_post_op(trans, fs_info->extent_root);
13875         }
13876
13877         ret = reset_balance(trans, fs_info);
13878         if (ret)
13879                 fprintf(stderr, "error resetting the pending balance\n");
13880
13881         return ret;
13882 }
13883
13884 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13885 {
13886         struct btrfs_path path;
13887         struct btrfs_trans_handle *trans;
13888         struct btrfs_key key;
13889         int ret;
13890
13891         printf("Recowing metadata block %llu\n", eb->start);
13892         key.objectid = btrfs_header_owner(eb);
13893         key.type = BTRFS_ROOT_ITEM_KEY;
13894         key.offset = (u64)-1;
13895
13896         root = btrfs_read_fs_root(root->fs_info, &key);
13897         if (IS_ERR(root)) {
13898                 fprintf(stderr, "Couldn't find owner root %llu\n",
13899                         key.objectid);
13900                 return PTR_ERR(root);
13901         }
13902
13903         trans = btrfs_start_transaction(root, 1);
13904         if (IS_ERR(trans))
13905                 return PTR_ERR(trans);
13906
13907         btrfs_init_path(&path);
13908         path.lowest_level = btrfs_header_level(eb);
13909         if (path.lowest_level)
13910                 btrfs_node_key_to_cpu(eb, &key, 0);
13911         else
13912                 btrfs_item_key_to_cpu(eb, &key, 0);
13913
13914         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13915         btrfs_commit_transaction(trans, root);
13916         btrfs_release_path(&path);
13917         return ret;
13918 }
13919
13920 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13921 {
13922         struct btrfs_path path;
13923         struct btrfs_trans_handle *trans;
13924         struct btrfs_key key;
13925         int ret;
13926
13927         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13928                bad->key.type, bad->key.offset);
13929         key.objectid = bad->root_id;
13930         key.type = BTRFS_ROOT_ITEM_KEY;
13931         key.offset = (u64)-1;
13932
13933         root = btrfs_read_fs_root(root->fs_info, &key);
13934         if (IS_ERR(root)) {
13935                 fprintf(stderr, "Couldn't find owner root %llu\n",
13936                         key.objectid);
13937                 return PTR_ERR(root);
13938         }
13939
13940         trans = btrfs_start_transaction(root, 1);
13941         if (IS_ERR(trans))
13942                 return PTR_ERR(trans);
13943
13944         btrfs_init_path(&path);
13945         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13946         if (ret) {
13947                 if (ret > 0)
13948                         ret = 0;
13949                 goto out;
13950         }
13951         ret = btrfs_del_item(trans, root, &path);
13952 out:
13953         btrfs_commit_transaction(trans, root);
13954         btrfs_release_path(&path);
13955         return ret;
13956 }
13957
13958 static int zero_log_tree(struct btrfs_root *root)
13959 {
13960         struct btrfs_trans_handle *trans;
13961         int ret;
13962
13963         trans = btrfs_start_transaction(root, 1);
13964         if (IS_ERR(trans)) {
13965                 ret = PTR_ERR(trans);
13966                 return ret;
13967         }
13968         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13969         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13970         ret = btrfs_commit_transaction(trans, root);
13971         return ret;
13972 }
13973
13974 static int populate_csum(struct btrfs_trans_handle *trans,
13975                          struct btrfs_root *csum_root, char *buf, u64 start,
13976                          u64 len)
13977 {
13978         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13979         u64 offset = 0;
13980         u64 sectorsize;
13981         int ret = 0;
13982
13983         while (offset < len) {
13984                 sectorsize = fs_info->sectorsize;
13985                 ret = read_extent_data(fs_info, buf, start + offset,
13986                                        &sectorsize, 0);
13987                 if (ret)
13988                         break;
13989                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13990                                             start + offset, buf, sectorsize);
13991                 if (ret)
13992                         break;
13993                 offset += sectorsize;
13994         }
13995         return ret;
13996 }
13997
13998 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13999                                       struct btrfs_root *csum_root,
14000                                       struct btrfs_root *cur_root)
14001 {
14002         struct btrfs_path path;
14003         struct btrfs_key key;
14004         struct extent_buffer *node;
14005         struct btrfs_file_extent_item *fi;
14006         char *buf = NULL;
14007         u64 start = 0;
14008         u64 len = 0;
14009         int slot = 0;
14010         int ret = 0;
14011
14012         buf = malloc(cur_root->fs_info->sectorsize);
14013         if (!buf)
14014                 return -ENOMEM;
14015
14016         btrfs_init_path(&path);
14017         key.objectid = 0;
14018         key.offset = 0;
14019         key.type = 0;
14020         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
14021         if (ret < 0)
14022                 goto out;
14023         /* Iterate all regular file extents and fill its csum */
14024         while (1) {
14025                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
14026
14027                 if (key.type != BTRFS_EXTENT_DATA_KEY)
14028                         goto next;
14029                 node = path.nodes[0];
14030                 slot = path.slots[0];
14031                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
14032                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
14033                         goto next;
14034                 start = btrfs_file_extent_disk_bytenr(node, fi);
14035                 len = btrfs_file_extent_disk_num_bytes(node, fi);
14036
14037                 ret = populate_csum(trans, csum_root, buf, start, len);
14038                 if (ret == -EEXIST)
14039                         ret = 0;
14040                 if (ret < 0)
14041                         goto out;
14042 next:
14043                 /*
14044                  * TODO: if next leaf is corrupted, jump to nearest next valid
14045                  * leaf.
14046                  */
14047                 ret = btrfs_next_item(cur_root, &path);
14048                 if (ret < 0)
14049                         goto out;
14050                 if (ret > 0) {
14051                         ret = 0;
14052                         goto out;
14053                 }
14054         }
14055
14056 out:
14057         btrfs_release_path(&path);
14058         free(buf);
14059         return ret;
14060 }
14061
14062 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
14063                                   struct btrfs_root *csum_root)
14064 {
14065         struct btrfs_fs_info *fs_info = csum_root->fs_info;
14066         struct btrfs_path path;
14067         struct btrfs_root *tree_root = fs_info->tree_root;
14068         struct btrfs_root *cur_root;
14069         struct extent_buffer *node;
14070         struct btrfs_key key;
14071         int slot = 0;
14072         int ret = 0;
14073
14074         btrfs_init_path(&path);
14075         key.objectid = BTRFS_FS_TREE_OBJECTID;
14076         key.offset = 0;
14077         key.type = BTRFS_ROOT_ITEM_KEY;
14078         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
14079         if (ret < 0)
14080                 goto out;
14081         if (ret > 0) {
14082                 ret = -ENOENT;
14083                 goto out;
14084         }
14085
14086         while (1) {
14087                 node = path.nodes[0];
14088                 slot = path.slots[0];
14089                 btrfs_item_key_to_cpu(node, &key, slot);
14090                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
14091                         goto out;
14092                 if (key.type != BTRFS_ROOT_ITEM_KEY)
14093                         goto next;
14094                 if (!is_fstree(key.objectid))
14095                         goto next;
14096                 key.offset = (u64)-1;
14097
14098                 cur_root = btrfs_read_fs_root(fs_info, &key);
14099                 if (IS_ERR(cur_root) || !cur_root) {
14100                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
14101                                 key.objectid);
14102                         goto out;
14103                 }
14104                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
14105                                 cur_root);
14106                 if (ret < 0)
14107                         goto out;
14108 next:
14109                 ret = btrfs_next_item(tree_root, &path);
14110                 if (ret > 0) {
14111                         ret = 0;
14112                         goto out;
14113                 }
14114                 if (ret < 0)
14115                         goto out;
14116         }
14117
14118 out:
14119         btrfs_release_path(&path);
14120         return ret;
14121 }
14122
14123 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
14124                                       struct btrfs_root *csum_root)
14125 {
14126         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
14127         struct btrfs_path path;
14128         struct btrfs_extent_item *ei;
14129         struct extent_buffer *leaf;
14130         char *buf;
14131         struct btrfs_key key;
14132         int ret;
14133
14134         btrfs_init_path(&path);
14135         key.objectid = 0;
14136         key.type = BTRFS_EXTENT_ITEM_KEY;
14137         key.offset = 0;
14138         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
14139         if (ret < 0) {
14140                 btrfs_release_path(&path);
14141                 return ret;
14142         }
14143
14144         buf = malloc(csum_root->fs_info->sectorsize);
14145         if (!buf) {
14146                 btrfs_release_path(&path);
14147                 return -ENOMEM;
14148         }
14149
14150         while (1) {
14151                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
14152                         ret = btrfs_next_leaf(extent_root, &path);
14153                         if (ret < 0)
14154                                 break;
14155                         if (ret) {
14156                                 ret = 0;
14157                                 break;
14158                         }
14159                 }
14160                 leaf = path.nodes[0];
14161
14162                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
14163                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
14164                         path.slots[0]++;
14165                         continue;
14166                 }
14167
14168                 ei = btrfs_item_ptr(leaf, path.slots[0],
14169                                     struct btrfs_extent_item);
14170                 if (!(btrfs_extent_flags(leaf, ei) &
14171                       BTRFS_EXTENT_FLAG_DATA)) {
14172                         path.slots[0]++;
14173                         continue;
14174                 }
14175
14176                 ret = populate_csum(trans, csum_root, buf, key.objectid,
14177                                     key.offset);
14178                 if (ret)
14179                         break;
14180                 path.slots[0]++;
14181         }
14182
14183         btrfs_release_path(&path);
14184         free(buf);
14185         return ret;
14186 }
14187
14188 /*
14189  * Recalculate the csum and put it into the csum tree.
14190  *
14191  * Extent tree init will wipe out all the extent info, so in that case, we
14192  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
14193  * will use fs/subvol trees to init the csum tree.
14194  */
14195 static int fill_csum_tree(struct btrfs_trans_handle *trans,
14196                           struct btrfs_root *csum_root,
14197                           int search_fs_tree)
14198 {
14199         if (search_fs_tree)
14200                 return fill_csum_tree_from_fs(trans, csum_root);
14201         else
14202                 return fill_csum_tree_from_extent(trans, csum_root);
14203 }
14204
14205 static void free_roots_info_cache(void)
14206 {
14207         if (!roots_info_cache)
14208                 return;
14209
14210         while (!cache_tree_empty(roots_info_cache)) {
14211                 struct cache_extent *entry;
14212                 struct root_item_info *rii;
14213
14214                 entry = first_cache_extent(roots_info_cache);
14215                 if (!entry)
14216                         break;
14217                 remove_cache_extent(roots_info_cache, entry);
14218                 rii = container_of(entry, struct root_item_info, cache_extent);
14219                 free(rii);
14220         }
14221
14222         free(roots_info_cache);
14223         roots_info_cache = NULL;
14224 }
14225
14226 static int build_roots_info_cache(struct btrfs_fs_info *info)
14227 {
14228         int ret = 0;
14229         struct btrfs_key key;
14230         struct extent_buffer *leaf;
14231         struct btrfs_path path;
14232
14233         if (!roots_info_cache) {
14234                 roots_info_cache = malloc(sizeof(*roots_info_cache));
14235                 if (!roots_info_cache)
14236                         return -ENOMEM;
14237                 cache_tree_init(roots_info_cache);
14238         }
14239
14240         btrfs_init_path(&path);
14241         key.objectid = 0;
14242         key.type = BTRFS_EXTENT_ITEM_KEY;
14243         key.offset = 0;
14244         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
14245         if (ret < 0)
14246                 goto out;
14247         leaf = path.nodes[0];
14248
14249         while (1) {
14250                 struct btrfs_key found_key;
14251                 struct btrfs_extent_item *ei;
14252                 struct btrfs_extent_inline_ref *iref;
14253                 int slot = path.slots[0];
14254                 int type;
14255                 u64 flags;
14256                 u64 root_id;
14257                 u8 level;
14258                 struct cache_extent *entry;
14259                 struct root_item_info *rii;
14260
14261                 if (slot >= btrfs_header_nritems(leaf)) {
14262                         ret = btrfs_next_leaf(info->extent_root, &path);
14263                         if (ret < 0) {
14264                                 break;
14265                         } else if (ret) {
14266                                 ret = 0;
14267                                 break;
14268                         }
14269                         leaf = path.nodes[0];
14270                         slot = path.slots[0];
14271                 }
14272
14273                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14274
14275                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
14276                     found_key.type != BTRFS_METADATA_ITEM_KEY)
14277                         goto next;
14278
14279                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
14280                 flags = btrfs_extent_flags(leaf, ei);
14281
14282                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
14283                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
14284                         goto next;
14285
14286                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
14287                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
14288                         level = found_key.offset;
14289                 } else {
14290                         struct btrfs_tree_block_info *binfo;
14291
14292                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
14293                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
14294                         level = btrfs_tree_block_level(leaf, binfo);
14295                 }
14296
14297                 /*
14298                  * For a root extent, it must be of the following type and the
14299                  * first (and only one) iref in the item.
14300                  */
14301                 type = btrfs_extent_inline_ref_type(leaf, iref);
14302                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
14303                         goto next;
14304
14305                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
14306                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14307                 if (!entry) {
14308                         rii = malloc(sizeof(struct root_item_info));
14309                         if (!rii) {
14310                                 ret = -ENOMEM;
14311                                 goto out;
14312                         }
14313                         rii->cache_extent.start = root_id;
14314                         rii->cache_extent.size = 1;
14315                         rii->level = (u8)-1;
14316                         entry = &rii->cache_extent;
14317                         ret = insert_cache_extent(roots_info_cache, entry);
14318                         ASSERT(ret == 0);
14319                 } else {
14320                         rii = container_of(entry, struct root_item_info,
14321                                            cache_extent);
14322                 }
14323
14324                 ASSERT(rii->cache_extent.start == root_id);
14325                 ASSERT(rii->cache_extent.size == 1);
14326
14327                 if (level > rii->level || rii->level == (u8)-1) {
14328                         rii->level = level;
14329                         rii->bytenr = found_key.objectid;
14330                         rii->gen = btrfs_extent_generation(leaf, ei);
14331                         rii->node_count = 1;
14332                 } else if (level == rii->level) {
14333                         rii->node_count++;
14334                 }
14335 next:
14336                 path.slots[0]++;
14337         }
14338
14339 out:
14340         btrfs_release_path(&path);
14341
14342         return ret;
14343 }
14344
14345 static int maybe_repair_root_item(struct btrfs_path *path,
14346                                   const struct btrfs_key *root_key,
14347                                   const int read_only_mode)
14348 {
14349         const u64 root_id = root_key->objectid;
14350         struct cache_extent *entry;
14351         struct root_item_info *rii;
14352         struct btrfs_root_item ri;
14353         unsigned long offset;
14354
14355         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14356         if (!entry) {
14357                 fprintf(stderr,
14358                         "Error: could not find extent items for root %llu\n",
14359                         root_key->objectid);
14360                 return -ENOENT;
14361         }
14362
14363         rii = container_of(entry, struct root_item_info, cache_extent);
14364         ASSERT(rii->cache_extent.start == root_id);
14365         ASSERT(rii->cache_extent.size == 1);
14366
14367         if (rii->node_count != 1) {
14368                 fprintf(stderr,
14369                         "Error: could not find btree root extent for root %llu\n",
14370                         root_id);
14371                 return -ENOENT;
14372         }
14373
14374         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
14375         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
14376
14377         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
14378             btrfs_root_level(&ri) != rii->level ||
14379             btrfs_root_generation(&ri) != rii->gen) {
14380
14381                 /*
14382                  * If we're in repair mode but our caller told us to not update
14383                  * the root item, i.e. just check if it needs to be updated, don't
14384                  * print this message, since the caller will call us again shortly
14385                  * for the same root item without read only mode (the caller will
14386                  * open a transaction first).
14387                  */
14388                 if (!(read_only_mode && repair))
14389                         fprintf(stderr,
14390                                 "%sroot item for root %llu,"
14391                                 " current bytenr %llu, current gen %llu, current level %u,"
14392                                 " new bytenr %llu, new gen %llu, new level %u\n",
14393                                 (read_only_mode ? "" : "fixing "),
14394                                 root_id,
14395                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
14396                                 btrfs_root_level(&ri),
14397                                 rii->bytenr, rii->gen, rii->level);
14398
14399                 if (btrfs_root_generation(&ri) > rii->gen) {
14400                         fprintf(stderr,
14401                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
14402                                 root_id, btrfs_root_generation(&ri), rii->gen);
14403                         return -EINVAL;
14404                 }
14405
14406                 if (!read_only_mode) {
14407                         btrfs_set_root_bytenr(&ri, rii->bytenr);
14408                         btrfs_set_root_level(&ri, rii->level);
14409                         btrfs_set_root_generation(&ri, rii->gen);
14410                         write_extent_buffer(path->nodes[0], &ri,
14411                                             offset, sizeof(ri));
14412                 }
14413
14414                 return 1;
14415         }
14416
14417         return 0;
14418 }
14419
14420 /*
14421  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14422  * caused read-only snapshots to be corrupted if they were created at a moment
14423  * when the source subvolume/snapshot had orphan items. The issue was that the
14424  * on-disk root items became incorrect, referring to the pre orphan cleanup root
14425  * node instead of the post orphan cleanup root node.
14426  * So this function, and its callees, just detects and fixes those cases. Even
14427  * though the regression was for read-only snapshots, this function applies to
14428  * any snapshot/subvolume root.
14429  * This must be run before any other repair code - not doing it so, makes other
14430  * repair code delete or modify backrefs in the extent tree for example, which
14431  * will result in an inconsistent fs after repairing the root items.
14432  */
14433 static int repair_root_items(struct btrfs_fs_info *info)
14434 {
14435         struct btrfs_path path;
14436         struct btrfs_key key;
14437         struct extent_buffer *leaf;
14438         struct btrfs_trans_handle *trans = NULL;
14439         int ret = 0;
14440         int bad_roots = 0;
14441         int need_trans = 0;
14442
14443         btrfs_init_path(&path);
14444
14445         ret = build_roots_info_cache(info);
14446         if (ret)
14447                 goto out;
14448
14449         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14450         key.type = BTRFS_ROOT_ITEM_KEY;
14451         key.offset = 0;
14452
14453 again:
14454         /*
14455          * Avoid opening and committing transactions if a leaf doesn't have
14456          * any root items that need to be fixed, so that we avoid rotating
14457          * backup roots unnecessarily.
14458          */
14459         if (need_trans) {
14460                 trans = btrfs_start_transaction(info->tree_root, 1);
14461                 if (IS_ERR(trans)) {
14462                         ret = PTR_ERR(trans);
14463                         goto out;
14464                 }
14465         }
14466
14467         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14468                                 0, trans ? 1 : 0);
14469         if (ret < 0)
14470                 goto out;
14471         leaf = path.nodes[0];
14472
14473         while (1) {
14474                 struct btrfs_key found_key;
14475
14476                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14477                         int no_more_keys = find_next_key(&path, &key);
14478
14479                         btrfs_release_path(&path);
14480                         if (trans) {
14481                                 ret = btrfs_commit_transaction(trans,
14482                                                                info->tree_root);
14483                                 trans = NULL;
14484                                 if (ret < 0)
14485                                         goto out;
14486                         }
14487                         need_trans = 0;
14488                         if (no_more_keys)
14489                                 break;
14490                         goto again;
14491                 }
14492
14493                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14494
14495                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14496                         goto next;
14497                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14498                         goto next;
14499
14500                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14501                 if (ret < 0)
14502                         goto out;
14503                 if (ret) {
14504                         if (!trans && repair) {
14505                                 need_trans = 1;
14506                                 key = found_key;
14507                                 btrfs_release_path(&path);
14508                                 goto again;
14509                         }
14510                         bad_roots++;
14511                 }
14512 next:
14513                 path.slots[0]++;
14514         }
14515         ret = 0;
14516 out:
14517         free_roots_info_cache();
14518         btrfs_release_path(&path);
14519         if (trans)
14520                 btrfs_commit_transaction(trans, info->tree_root);
14521         if (ret < 0)
14522                 return ret;
14523
14524         return bad_roots;
14525 }
14526
14527 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14528 {
14529         struct btrfs_trans_handle *trans;
14530         struct btrfs_block_group_cache *bg_cache;
14531         u64 current = 0;
14532         int ret = 0;
14533
14534         /* Clear all free space cache inodes and its extent data */
14535         while (1) {
14536                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14537                 if (!bg_cache)
14538                         break;
14539                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14540                 if (ret < 0)
14541                         return ret;
14542                 current = bg_cache->key.objectid + bg_cache->key.offset;
14543         }
14544
14545         /* Don't forget to set cache_generation to -1 */
14546         trans = btrfs_start_transaction(fs_info->tree_root, 0);
14547         if (IS_ERR(trans)) {
14548                 error("failed to update super block cache generation");
14549                 return PTR_ERR(trans);
14550         }
14551         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14552         btrfs_commit_transaction(trans, fs_info->tree_root);
14553
14554         return ret;
14555 }
14556
14557 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14558                 int clear_version)
14559 {
14560         int ret = 0;
14561
14562         if (clear_version == 1) {
14563                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14564                         error(
14565                 "free space cache v2 detected, use --clear-space-cache v2");
14566                         ret = 1;
14567                         goto close_out;
14568                 }
14569                 printf("Clearing free space cache\n");
14570                 ret = clear_free_space_cache(fs_info);
14571                 if (ret) {
14572                         error("failed to clear free space cache");
14573                         ret = 1;
14574                 } else {
14575                         printf("Free space cache cleared\n");
14576                 }
14577         } else if (clear_version == 2) {
14578                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14579                         printf("no free space cache v2 to clear\n");
14580                         ret = 0;
14581                         goto close_out;
14582                 }
14583                 printf("Clear free space cache v2\n");
14584                 ret = btrfs_clear_free_space_tree(fs_info);
14585                 if (ret) {
14586                         error("failed to clear free space cache v2: %d", ret);
14587                         ret = 1;
14588                 } else {
14589                         printf("free space cache v2 cleared\n");
14590                 }
14591         }
14592 close_out:
14593         return ret;
14594 }
14595
14596 const char * const cmd_check_usage[] = {
14597         "btrfs check [options] <device>",
14598         "Check structural integrity of a filesystem (unmounted).",
14599         "Check structural integrity of an unmounted filesystem. Verify internal",
14600         "trees' consistency and item connectivity. In the repair mode try to",
14601         "fix the problems found. ",
14602         "WARNING: the repair mode is considered dangerous",
14603         "",
14604         "-s|--super <superblock>     use this superblock copy",
14605         "-b|--backup                 use the first valid backup root copy",
14606         "--force                     skip mount checks, repair is not possible",
14607         "--repair                    try to repair the filesystem",
14608         "--readonly                  run in read-only mode (default)",
14609         "--init-csum-tree            create a new CRC tree",
14610         "--init-extent-tree          create a new extent tree",
14611         "--mode <MODE>               allows choice of memory/IO trade-offs",
14612         "                            where MODE is one of:",
14613         "                            original - read inodes and extents to memory (requires",
14614         "                                       more memory, does less IO)",
14615         "                            lowmem   - try to use less memory but read blocks again",
14616         "                                       when needed",
14617         "--check-data-csum           verify checksums of data blocks",
14618         "-Q|--qgroup-report          print a report on qgroup consistency",
14619         "-E|--subvol-extents <subvolid>",
14620         "                            print subvolume extents and sharing state",
14621         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
14622         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
14623         "-p|--progress               indicate progress",
14624         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
14625         NULL
14626 };
14627
14628 int cmd_check(int argc, char **argv)
14629 {
14630         struct cache_tree root_cache;
14631         struct btrfs_root *root;
14632         struct btrfs_fs_info *info;
14633         u64 bytenr = 0;
14634         u64 subvolid = 0;
14635         u64 tree_root_bytenr = 0;
14636         u64 chunk_root_bytenr = 0;
14637         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14638         int ret = 0;
14639         int err = 0;
14640         u64 num;
14641         int init_csum_tree = 0;
14642         int readonly = 0;
14643         int clear_space_cache = 0;
14644         int qgroup_report = 0;
14645         int qgroups_repaired = 0;
14646         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14647         int force = 0;
14648
14649         while(1) {
14650                 int c;
14651                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14652                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14653                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14654                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14655                         GETOPT_VAL_FORCE };
14656                 static const struct option long_options[] = {
14657                         { "super", required_argument, NULL, 's' },
14658                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14659                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14660                         { "init-csum-tree", no_argument, NULL,
14661                                 GETOPT_VAL_INIT_CSUM },
14662                         { "init-extent-tree", no_argument, NULL,
14663                                 GETOPT_VAL_INIT_EXTENT },
14664                         { "check-data-csum", no_argument, NULL,
14665                                 GETOPT_VAL_CHECK_CSUM },
14666                         { "backup", no_argument, NULL, 'b' },
14667                         { "subvol-extents", required_argument, NULL, 'E' },
14668                         { "qgroup-report", no_argument, NULL, 'Q' },
14669                         { "tree-root", required_argument, NULL, 'r' },
14670                         { "chunk-root", required_argument, NULL,
14671                                 GETOPT_VAL_CHUNK_TREE },
14672                         { "progress", no_argument, NULL, 'p' },
14673                         { "mode", required_argument, NULL,
14674                                 GETOPT_VAL_MODE },
14675                         { "clear-space-cache", required_argument, NULL,
14676                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
14677                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14678                         { NULL, 0, NULL, 0}
14679                 };
14680
14681                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14682                 if (c < 0)
14683                         break;
14684                 switch(c) {
14685                         case 'a': /* ignored */ break;
14686                         case 'b':
14687                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14688                                 break;
14689                         case 's':
14690                                 num = arg_strtou64(optarg);
14691                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14692                                         error(
14693                                         "super mirror should be less than %d",
14694                                                 BTRFS_SUPER_MIRROR_MAX);
14695                                         exit(1);
14696                                 }
14697                                 bytenr = btrfs_sb_offset(((int)num));
14698                                 printf("using SB copy %llu, bytenr %llu\n", num,
14699                                        (unsigned long long)bytenr);
14700                                 break;
14701                         case 'Q':
14702                                 qgroup_report = 1;
14703                                 break;
14704                         case 'E':
14705                                 subvolid = arg_strtou64(optarg);
14706                                 break;
14707                         case 'r':
14708                                 tree_root_bytenr = arg_strtou64(optarg);
14709                                 break;
14710                         case GETOPT_VAL_CHUNK_TREE:
14711                                 chunk_root_bytenr = arg_strtou64(optarg);
14712                                 break;
14713                         case 'p':
14714                                 ctx.progress_enabled = true;
14715                                 break;
14716                         case '?':
14717                         case 'h':
14718                                 usage(cmd_check_usage);
14719                         case GETOPT_VAL_REPAIR:
14720                                 printf("enabling repair mode\n");
14721                                 repair = 1;
14722                                 ctree_flags |= OPEN_CTREE_WRITES;
14723                                 break;
14724                         case GETOPT_VAL_READONLY:
14725                                 readonly = 1;
14726                                 break;
14727                         case GETOPT_VAL_INIT_CSUM:
14728                                 printf("Creating a new CRC tree\n");
14729                                 init_csum_tree = 1;
14730                                 repair = 1;
14731                                 ctree_flags |= OPEN_CTREE_WRITES;
14732                                 break;
14733                         case GETOPT_VAL_INIT_EXTENT:
14734                                 init_extent_tree = 1;
14735                                 ctree_flags |= (OPEN_CTREE_WRITES |
14736                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
14737                                 repair = 1;
14738                                 break;
14739                         case GETOPT_VAL_CHECK_CSUM:
14740                                 check_data_csum = 1;
14741                                 break;
14742                         case GETOPT_VAL_MODE:
14743                                 check_mode = parse_check_mode(optarg);
14744                                 if (check_mode == CHECK_MODE_UNKNOWN) {
14745                                         error("unknown mode: %s", optarg);
14746                                         exit(1);
14747                                 }
14748                                 break;
14749                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
14750                                 if (strcmp(optarg, "v1") == 0) {
14751                                         clear_space_cache = 1;
14752                                 } else if (strcmp(optarg, "v2") == 0) {
14753                                         clear_space_cache = 2;
14754                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14755                                 } else {
14756                                         error(
14757                 "invalid argument to --clear-space-cache, must be v1 or v2");
14758                                         exit(1);
14759                                 }
14760                                 ctree_flags |= OPEN_CTREE_WRITES;
14761                                 break;
14762                         case GETOPT_VAL_FORCE:
14763                                 force = 1;
14764                                 break;
14765                 }
14766         }
14767
14768         if (check_argc_exact(argc - optind, 1))
14769                 usage(cmd_check_usage);
14770
14771         if (ctx.progress_enabled) {
14772                 ctx.tp = TASK_NOTHING;
14773                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14774         }
14775
14776         /* This check is the only reason for --readonly to exist */
14777         if (readonly && repair) {
14778                 error("repair options are not compatible with --readonly");
14779                 exit(1);
14780         }
14781
14782         /*
14783          * experimental and dangerous
14784          */
14785         if (repair && check_mode == CHECK_MODE_LOWMEM)
14786                 warning("low-memory mode repair support is only partial");
14787
14788         radix_tree_init();
14789         cache_tree_init(&root_cache);
14790
14791         ret = check_mounted(argv[optind]);
14792         if (!force) {
14793                 if (ret < 0) {
14794                         error("could not check mount status: %s",
14795                                         strerror(-ret));
14796                         err |= !!ret;
14797                         goto err_out;
14798                 } else if (ret) {
14799                         error(
14800 "%s is currently mounted, use --force if you really intend to check the filesystem",
14801                                 argv[optind]);
14802                         ret = -EBUSY;
14803                         err |= !!ret;
14804                         goto err_out;
14805                 }
14806         } else {
14807                 if (repair) {
14808                         error("repair and --force is not yet supported");
14809                         ret = 1;
14810                         err |= !!ret;
14811                         goto err_out;
14812                 }
14813                 if (ret < 0) {
14814                         warning(
14815 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14816                                 argv[optind]);
14817                 } else if (ret) {
14818                         warning(
14819                         "filesystem mounted, continuing because of --force");
14820                 }
14821                 /* A block device is mounted in exclusive mode by kernel */
14822                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14823         }
14824
14825         /* only allow partial opening under repair mode */
14826         if (repair)
14827                 ctree_flags |= OPEN_CTREE_PARTIAL;
14828
14829         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14830                                   chunk_root_bytenr, ctree_flags);
14831         if (!info) {
14832                 error("cannot open file system");
14833                 ret = -EIO;
14834                 err |= !!ret;
14835                 goto err_out;
14836         }
14837
14838         global_info = info;
14839         root = info->fs_root;
14840         uuid_unparse(info->super_copy->fsid, uuidbuf);
14841
14842         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14843
14844         /*
14845          * Check the bare minimum before starting anything else that could rely
14846          * on it, namely the tree roots, any local consistency checks
14847          */
14848         if (!extent_buffer_uptodate(info->tree_root->node) ||
14849             !extent_buffer_uptodate(info->dev_root->node) ||
14850             !extent_buffer_uptodate(info->chunk_root->node)) {
14851                 error("critical roots corrupted, unable to check the filesystem");
14852                 err |= !!ret;
14853                 ret = -EIO;
14854                 goto close_out;
14855         }
14856
14857         if (clear_space_cache) {
14858                 ret = do_clear_free_space_cache(info, clear_space_cache);
14859                 err |= !!ret;
14860                 goto close_out;
14861         }
14862
14863         /*
14864          * repair mode will force us to commit transaction which
14865          * will make us fail to load log tree when mounting.
14866          */
14867         if (repair && btrfs_super_log_root(info->super_copy)) {
14868                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14869                 if (!ret) {
14870                         ret = 1;
14871                         err |= !!ret;
14872                         goto close_out;
14873                 }
14874                 ret = zero_log_tree(root);
14875                 err |= !!ret;
14876                 if (ret) {
14877                         error("failed to zero log tree: %d", ret);
14878                         goto close_out;
14879                 }
14880         }
14881
14882         if (qgroup_report) {
14883                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14884                        uuidbuf);
14885                 ret = qgroup_verify_all(info);
14886                 err |= !!ret;
14887                 if (ret == 0)
14888                         report_qgroups(1);
14889                 goto close_out;
14890         }
14891         if (subvolid) {
14892                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14893                        subvolid, argv[optind], uuidbuf);
14894                 ret = print_extent_state(info, subvolid);
14895                 err |= !!ret;
14896                 goto close_out;
14897         }
14898
14899         if (init_extent_tree || init_csum_tree) {
14900                 struct btrfs_trans_handle *trans;
14901
14902                 trans = btrfs_start_transaction(info->extent_root, 0);
14903                 if (IS_ERR(trans)) {
14904                         error("error starting transaction");
14905                         ret = PTR_ERR(trans);
14906                         err |= !!ret;
14907                         goto close_out;
14908                 }
14909
14910                 if (init_extent_tree) {
14911                         printf("Creating a new extent tree\n");
14912                         ret = reinit_extent_tree(trans, info);
14913                         err |= !!ret;
14914                         if (ret)
14915                                 goto close_out;
14916                 }
14917
14918                 if (init_csum_tree) {
14919                         printf("Reinitialize checksum tree\n");
14920                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14921                         if (ret) {
14922                                 error("checksum tree initialization failed: %d",
14923                                                 ret);
14924                                 ret = -EIO;
14925                                 err |= !!ret;
14926                                 goto close_out;
14927                         }
14928
14929                         ret = fill_csum_tree(trans, info->csum_root,
14930                                              init_extent_tree);
14931                         err |= !!ret;
14932                         if (ret) {
14933                                 error("checksum tree refilling failed: %d", ret);
14934                                 return -EIO;
14935                         }
14936                 }
14937                 /*
14938                  * Ok now we commit and run the normal fsck, which will add
14939                  * extent entries for all of the items it finds.
14940                  */
14941                 ret = btrfs_commit_transaction(trans, info->extent_root);
14942                 err |= !!ret;
14943                 if (ret)
14944                         goto close_out;
14945         }
14946         if (!extent_buffer_uptodate(info->extent_root->node)) {
14947                 error("critical: extent_root, unable to check the filesystem");
14948                 ret = -EIO;
14949                 err |= !!ret;
14950                 goto close_out;
14951         }
14952         if (!extent_buffer_uptodate(info->csum_root->node)) {
14953                 error("critical: csum_root, unable to check the filesystem");
14954                 ret = -EIO;
14955                 err |= !!ret;
14956                 goto close_out;
14957         }
14958
14959         if (!init_extent_tree) {
14960                 ret = repair_root_items(info);
14961                 if (ret < 0) {
14962                         err = !!ret;
14963                         error("failed to repair root items: %s", strerror(-ret));
14964                         goto close_out;
14965                 }
14966                 if (repair) {
14967                         fprintf(stderr, "Fixed %d roots.\n", ret);
14968                         ret = 0;
14969                 } else if (ret > 0) {
14970                         fprintf(stderr,
14971                                 "Found %d roots with an outdated root item.\n",
14972                                 ret);
14973                         fprintf(stderr,
14974         "Please run a filesystem check with the option --repair to fix them.\n");
14975                         ret = 1;
14976                         err |= ret;
14977                         goto close_out;
14978                 }
14979         }
14980
14981         ret = do_check_chunks_and_extents(info);
14982         err |= !!ret;
14983         if (ret)
14984                 error(
14985                 "errors found in extent allocation tree or chunk allocation");
14986
14987         /* Only re-check super size after we checked and repaired the fs */
14988         err |= !is_super_size_valid(info);
14989
14990         if (!ctx.progress_enabled) {
14991                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14992                         fprintf(stderr, "checking free space tree\n");
14993                 else
14994                         fprintf(stderr, "checking free space cache\n");
14995         }
14996         ret = check_space_cache(root);
14997         err |= !!ret;
14998         if (ret) {
14999                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
15000                         error("errors found in free space tree");
15001                 else
15002                         error("errors found in free space cache");
15003                 goto out;
15004         }
15005
15006         /*
15007          * We used to have to have these hole extents in between our real
15008          * extents so if we don't have this flag set we need to make sure there
15009          * are no gaps in the file extents for inodes, otherwise we can just
15010          * ignore it when this happens.
15011          */
15012         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
15013         ret = do_check_fs_roots(info, &root_cache);
15014         err |= !!ret;
15015         if (ret) {
15016                 error("errors found in fs roots");
15017                 goto out;
15018         }
15019
15020         fprintf(stderr, "checking csums\n");
15021         ret = check_csums(root);
15022         err |= !!ret;
15023         if (ret) {
15024                 error("errors found in csum tree");
15025                 goto out;
15026         }
15027
15028         fprintf(stderr, "checking root refs\n");
15029         /* For low memory mode, check_fs_roots_v2 handles root refs */
15030         if (check_mode != CHECK_MODE_LOWMEM) {
15031                 ret = check_root_refs(root, &root_cache);
15032                 err |= !!ret;
15033                 if (ret) {
15034                         error("errors found in root refs");
15035                         goto out;
15036                 }
15037         }
15038
15039         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
15040                 struct extent_buffer *eb;
15041
15042                 eb = list_first_entry(&root->fs_info->recow_ebs,
15043                                       struct extent_buffer, recow);
15044                 list_del_init(&eb->recow);
15045                 ret = recow_extent_buffer(root, eb);
15046                 err |= !!ret;
15047                 if (ret) {
15048                         error("fails to fix transid errors");
15049                         break;
15050                 }
15051         }
15052
15053         while (!list_empty(&delete_items)) {
15054                 struct bad_item *bad;
15055
15056                 bad = list_first_entry(&delete_items, struct bad_item, list);
15057                 list_del_init(&bad->list);
15058                 if (repair) {
15059                         ret = delete_bad_item(root, bad);
15060                         err |= !!ret;
15061                 }
15062                 free(bad);
15063         }
15064
15065         if (info->quota_enabled) {
15066                 fprintf(stderr, "checking quota groups\n");
15067                 ret = qgroup_verify_all(info);
15068                 err |= !!ret;
15069                 if (ret) {
15070                         error("failed to check quota groups");
15071                         goto out;
15072                 }
15073                 report_qgroups(0);
15074                 ret = repair_qgroups(info, &qgroups_repaired);
15075                 err |= !!ret;
15076                 if (err) {
15077                         error("failed to repair quota groups");
15078                         goto out;
15079                 }
15080                 ret = 0;
15081         }
15082
15083         if (!list_empty(&root->fs_info->recow_ebs)) {
15084                 error("transid errors in file system");
15085                 ret = 1;
15086                 err |= !!ret;
15087         }
15088 out:
15089         printf("found %llu bytes used, ",
15090                (unsigned long long)bytes_used);
15091         if (err)
15092                 printf("error(s) found\n");
15093         else
15094                 printf("no error found\n");
15095         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
15096         printf("total tree bytes: %llu\n",
15097                (unsigned long long)total_btree_bytes);
15098         printf("total fs tree bytes: %llu\n",
15099                (unsigned long long)total_fs_tree_bytes);
15100         printf("total extent tree bytes: %llu\n",
15101                (unsigned long long)total_extent_tree_bytes);
15102         printf("btree space waste bytes: %llu\n",
15103                (unsigned long long)btree_space_waste);
15104         printf("file data blocks allocated: %llu\n referenced %llu\n",
15105                 (unsigned long long)data_bytes_allocated,
15106                 (unsigned long long)data_bytes_referenced);
15107
15108         free_qgroup_counts();
15109         free_root_recs_tree(&root_cache);
15110 close_out:
15111         close_ctree(root);
15112 err_out:
15113         if (ctx.progress_enabled)
15114                 task_deinit(ctx.info);
15115
15116         return err;
15117 }