btrfs-progs: convert: add missing types header
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING       (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH      (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN         (1<<20) /* DIR isize should be recalculated */
136 #define BG_ACCOUNTING_ERROR     (1<<21) /* Block group accounting error */
137
138 static inline struct data_backref* to_data_backref(struct extent_backref *back)
139 {
140         return container_of(back, struct data_backref, node);
141 }
142
143 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
144 {
145         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
146         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
147         struct data_backref *back1 = to_data_backref(ext1);
148         struct data_backref *back2 = to_data_backref(ext2);
149
150         WARN_ON(!ext1->is_data);
151         WARN_ON(!ext2->is_data);
152
153         /* parent and root are a union, so this covers both */
154         if (back1->parent > back2->parent)
155                 return 1;
156         if (back1->parent < back2->parent)
157                 return -1;
158
159         /* This is a full backref and the parents match. */
160         if (back1->node.full_backref)
161                 return 0;
162
163         if (back1->owner > back2->owner)
164                 return 1;
165         if (back1->owner < back2->owner)
166                 return -1;
167
168         if (back1->offset > back2->offset)
169                 return 1;
170         if (back1->offset < back2->offset)
171                 return -1;
172
173         if (back1->found_ref && back2->found_ref) {
174                 if (back1->disk_bytenr > back2->disk_bytenr)
175                         return 1;
176                 if (back1->disk_bytenr < back2->disk_bytenr)
177                         return -1;
178
179                 if (back1->bytes > back2->bytes)
180                         return 1;
181                 if (back1->bytes < back2->bytes)
182                         return -1;
183         }
184
185         return 0;
186 }
187
188 /*
189  * Much like data_backref, just removed the undetermined members
190  * and change it to use list_head.
191  * During extent scan, it is stored in root->orphan_data_extent.
192  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
193  */
194 struct orphan_data_extent {
195         struct list_head list;
196         u64 root;
197         u64 objectid;
198         u64 offset;
199         u64 disk_bytenr;
200         u64 disk_len;
201 };
202
203 struct tree_backref {
204         struct extent_backref node;
205         union {
206                 u64 parent;
207                 u64 root;
208         };
209 };
210
211 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
212 {
213         return container_of(back, struct tree_backref, node);
214 }
215
216 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
217 {
218         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
219         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
220         struct tree_backref *back1 = to_tree_backref(ext1);
221         struct tree_backref *back2 = to_tree_backref(ext2);
222
223         WARN_ON(ext1->is_data);
224         WARN_ON(ext2->is_data);
225
226         /* parent and root are a union, so this covers both */
227         if (back1->parent > back2->parent)
228                 return 1;
229         if (back1->parent < back2->parent)
230                 return -1;
231
232         return 0;
233 }
234
235 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
236 {
237         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
238         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
239
240         if (ext1->is_data > ext2->is_data)
241                 return 1;
242
243         if (ext1->is_data < ext2->is_data)
244                 return -1;
245
246         if (ext1->full_backref > ext2->full_backref)
247                 return 1;
248         if (ext1->full_backref < ext2->full_backref)
249                 return -1;
250
251         if (ext1->is_data)
252                 return compare_data_backref(node1, node2);
253         else
254                 return compare_tree_backref(node1, node2);
255 }
256
257 /* Explicit initialization for extent_record::flag_block_full_backref */
258 enum { FLAG_UNSET = 2 };
259
260 struct extent_record {
261         struct list_head backrefs;
262         struct list_head dups;
263         struct rb_root backref_tree;
264         struct list_head list;
265         struct cache_extent cache;
266         struct btrfs_disk_key parent_key;
267         u64 start;
268         u64 max_size;
269         u64 nr;
270         u64 refs;
271         u64 extent_item_refs;
272         u64 generation;
273         u64 parent_generation;
274         u64 info_objectid;
275         u32 num_duplicates;
276         u8 info_level;
277         unsigned int flag_block_full_backref:2;
278         unsigned int found_rec:1;
279         unsigned int content_checked:1;
280         unsigned int owner_ref_checked:1;
281         unsigned int is_root:1;
282         unsigned int metadata:1;
283         unsigned int bad_full_backref:1;
284         unsigned int crossing_stripes:1;
285         unsigned int wrong_chunk_type:1;
286 };
287
288 static inline struct extent_record* to_extent_record(struct list_head *entry)
289 {
290         return container_of(entry, struct extent_record, list);
291 }
292
293 struct inode_backref {
294         struct list_head list;
295         unsigned int found_dir_item:1;
296         unsigned int found_dir_index:1;
297         unsigned int found_inode_ref:1;
298         u8 filetype;
299         u8 ref_type;
300         int errors;
301         u64 dir;
302         u64 index;
303         u16 namelen;
304         char name[0];
305 };
306
307 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
308 {
309         return list_entry(entry, struct inode_backref, list);
310 }
311
312 struct root_item_record {
313         struct list_head list;
314         u64 objectid;
315         u64 bytenr;
316         u64 last_snapshot;
317         u8 level;
318         u8 drop_level;
319         struct btrfs_key drop_key;
320 };
321
322 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
323 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
324 #define REF_ERR_NO_INODE_REF            (1 << 2)
325 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
326 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
327 #define REF_ERR_DUP_INODE_REF           (1 << 5)
328 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
329 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
330 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
331 #define REF_ERR_NO_ROOT_REF             (1 << 9)
332 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
333 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
334 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
335
336 struct file_extent_hole {
337         struct rb_node node;
338         u64 start;
339         u64 len;
340 };
341
342 struct inode_record {
343         struct list_head backrefs;
344         unsigned int checked:1;
345         unsigned int merging:1;
346         unsigned int found_inode_item:1;
347         unsigned int found_dir_item:1;
348         unsigned int found_file_extent:1;
349         unsigned int found_csum_item:1;
350         unsigned int some_csum_missing:1;
351         unsigned int nodatasum:1;
352         int errors;
353
354         u64 ino;
355         u32 nlink;
356         u32 imode;
357         u64 isize;
358         u64 nbytes;
359
360         u32 found_link;
361         u64 found_size;
362         u64 extent_start;
363         u64 extent_end;
364         struct rb_root holes;
365         struct list_head orphan_extents;
366
367         u32 refs;
368 };
369
370 #define I_ERR_NO_INODE_ITEM             (1 << 0)
371 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
372 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
373 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
374 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
375 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
376 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
377 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
378 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
379 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
380 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
381 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
382 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
383 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
384 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
385
386 struct root_backref {
387         struct list_head list;
388         unsigned int found_dir_item:1;
389         unsigned int found_dir_index:1;
390         unsigned int found_back_ref:1;
391         unsigned int found_forward_ref:1;
392         unsigned int reachable:1;
393         int errors;
394         u64 ref_root;
395         u64 dir;
396         u64 index;
397         u16 namelen;
398         char name[0];
399 };
400
401 static inline struct root_backref* to_root_backref(struct list_head *entry)
402 {
403         return list_entry(entry, struct root_backref, list);
404 }
405
406 struct root_record {
407         struct list_head backrefs;
408         struct cache_extent cache;
409         unsigned int found_root_item:1;
410         u64 objectid;
411         u32 found_ref;
412 };
413
414 struct ptr_node {
415         struct cache_extent cache;
416         void *data;
417 };
418
419 struct shared_node {
420         struct cache_extent cache;
421         struct cache_tree root_cache;
422         struct cache_tree inode_cache;
423         struct inode_record *current;
424         u32 refs;
425 };
426
427 struct block_info {
428         u64 start;
429         u32 size;
430 };
431
432 struct walk_control {
433         struct cache_tree shared;
434         struct shared_node *nodes[BTRFS_MAX_LEVEL];
435         int active_node;
436         int root_level;
437 };
438
439 struct bad_item {
440         struct btrfs_key key;
441         u64 root_id;
442         struct list_head list;
443 };
444
445 struct extent_entry {
446         u64 bytenr;
447         u64 bytes;
448         int count;
449         int broken;
450         struct list_head list;
451 };
452
453 struct root_item_info {
454         /* level of the root */
455         u8 level;
456         /* number of nodes at this level, must be 1 for a root */
457         int node_count;
458         u64 bytenr;
459         u64 gen;
460         struct cache_extent cache_extent;
461 };
462
463 /*
464  * Error bit for low memory mode check.
465  *
466  * Currently no caller cares about it yet.  Just internal use for error
467  * classification.
468  */
469 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
470 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
471 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
472 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
473 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
474 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
475 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
476 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
477 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
478 #define CHUNK_TYPE_MISMATCH     (1 << 8)
479
480 static void *print_status_check(void *p)
481 {
482         struct task_ctx *priv = p;
483         const char work_indicator[] = { '.', 'o', 'O', 'o' };
484         uint32_t count = 0;
485         static char *task_position_string[] = {
486                 "checking extents",
487                 "checking free space cache",
488                 "checking fs roots",
489         };
490
491         task_period_start(priv->info, 1000 /* 1s */);
492
493         if (priv->tp == TASK_NOTHING)
494                 return NULL;
495
496         while (1) {
497                 printf("%s [%c]\r", task_position_string[priv->tp],
498                                 work_indicator[count % 4]);
499                 count++;
500                 fflush(stdout);
501                 task_period_wait(priv->info);
502         }
503         return NULL;
504 }
505
506 static int print_status_return(void *p)
507 {
508         printf("\n");
509         fflush(stdout);
510
511         return 0;
512 }
513
514 static enum btrfs_check_mode parse_check_mode(const char *str)
515 {
516         if (strcmp(str, "lowmem") == 0)
517                 return CHECK_MODE_LOWMEM;
518         if (strcmp(str, "orig") == 0)
519                 return CHECK_MODE_ORIGINAL;
520         if (strcmp(str, "original") == 0)
521                 return CHECK_MODE_ORIGINAL;
522
523         return CHECK_MODE_UNKNOWN;
524 }
525
526 /* Compatible function to allow reuse of old codes */
527 static u64 first_extent_gap(struct rb_root *holes)
528 {
529         struct file_extent_hole *hole;
530
531         if (RB_EMPTY_ROOT(holes))
532                 return (u64)-1;
533
534         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
535         return hole->start;
536 }
537
538 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
539 {
540         struct file_extent_hole *hole1;
541         struct file_extent_hole *hole2;
542
543         hole1 = rb_entry(node1, struct file_extent_hole, node);
544         hole2 = rb_entry(node2, struct file_extent_hole, node);
545
546         if (hole1->start > hole2->start)
547                 return -1;
548         if (hole1->start < hole2->start)
549                 return 1;
550         /* Now hole1->start == hole2->start */
551         if (hole1->len >= hole2->len)
552                 /*
553                  * Hole 1 will be merge center
554                  * Same hole will be merged later
555                  */
556                 return -1;
557         /* Hole 2 will be merge center */
558         return 1;
559 }
560
561 /*
562  * Add a hole to the record
563  *
564  * This will do hole merge for copy_file_extent_holes(),
565  * which will ensure there won't be continuous holes.
566  */
567 static int add_file_extent_hole(struct rb_root *holes,
568                                 u64 start, u64 len)
569 {
570         struct file_extent_hole *hole;
571         struct file_extent_hole *prev = NULL;
572         struct file_extent_hole *next = NULL;
573
574         hole = malloc(sizeof(*hole));
575         if (!hole)
576                 return -ENOMEM;
577         hole->start = start;
578         hole->len = len;
579         /* Since compare will not return 0, no -EEXIST will happen */
580         rb_insert(holes, &hole->node, compare_hole);
581
582         /* simple merge with previous hole */
583         if (rb_prev(&hole->node))
584                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
585                                 node);
586         if (prev && prev->start + prev->len >= hole->start) {
587                 hole->len = hole->start + hole->len - prev->start;
588                 hole->start = prev->start;
589                 rb_erase(&prev->node, holes);
590                 free(prev);
591                 prev = NULL;
592         }
593
594         /* iterate merge with next holes */
595         while (1) {
596                 if (!rb_next(&hole->node))
597                         break;
598                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
599                                         node);
600                 if (hole->start + hole->len >= next->start) {
601                         if (hole->start + hole->len <= next->start + next->len)
602                                 hole->len = next->start + next->len -
603                                             hole->start;
604                         rb_erase(&next->node, holes);
605                         free(next);
606                         next = NULL;
607                 } else
608                         break;
609         }
610         return 0;
611 }
612
613 static int compare_hole_range(struct rb_node *node, void *data)
614 {
615         struct file_extent_hole *hole;
616         u64 start;
617
618         hole = (struct file_extent_hole *)data;
619         start = hole->start;
620
621         hole = rb_entry(node, struct file_extent_hole, node);
622         if (start < hole->start)
623                 return -1;
624         if (start >= hole->start && start < hole->start + hole->len)
625                 return 0;
626         return 1;
627 }
628
629 /*
630  * Delete a hole in the record
631  *
632  * This will do the hole split and is much restrict than add.
633  */
634 static int del_file_extent_hole(struct rb_root *holes,
635                                 u64 start, u64 len)
636 {
637         struct file_extent_hole *hole;
638         struct file_extent_hole tmp;
639         u64 prev_start = 0;
640         u64 prev_len = 0;
641         u64 next_start = 0;
642         u64 next_len = 0;
643         struct rb_node *node;
644         int have_prev = 0;
645         int have_next = 0;
646         int ret = 0;
647
648         tmp.start = start;
649         tmp.len = len;
650         node = rb_search(holes, &tmp, compare_hole_range, NULL);
651         if (!node)
652                 return -EEXIST;
653         hole = rb_entry(node, struct file_extent_hole, node);
654         if (start + len > hole->start + hole->len)
655                 return -EEXIST;
656
657         /*
658          * Now there will be no overlap, delete the hole and re-add the
659          * split(s) if they exists.
660          */
661         if (start > hole->start) {
662                 prev_start = hole->start;
663                 prev_len = start - hole->start;
664                 have_prev = 1;
665         }
666         if (hole->start + hole->len > start + len) {
667                 next_start = start + len;
668                 next_len = hole->start + hole->len - start - len;
669                 have_next = 1;
670         }
671         rb_erase(node, holes);
672         free(hole);
673         if (have_prev) {
674                 ret = add_file_extent_hole(holes, prev_start, prev_len);
675                 if (ret < 0)
676                         return ret;
677         }
678         if (have_next) {
679                 ret = add_file_extent_hole(holes, next_start, next_len);
680                 if (ret < 0)
681                         return ret;
682         }
683         return 0;
684 }
685
686 static int copy_file_extent_holes(struct rb_root *dst,
687                                   struct rb_root *src)
688 {
689         struct file_extent_hole *hole;
690         struct rb_node *node;
691         int ret = 0;
692
693         node = rb_first(src);
694         while (node) {
695                 hole = rb_entry(node, struct file_extent_hole, node);
696                 ret = add_file_extent_hole(dst, hole->start, hole->len);
697                 if (ret)
698                         break;
699                 node = rb_next(node);
700         }
701         return ret;
702 }
703
704 static void free_file_extent_holes(struct rb_root *holes)
705 {
706         struct rb_node *node;
707         struct file_extent_hole *hole;
708
709         node = rb_first(holes);
710         while (node) {
711                 hole = rb_entry(node, struct file_extent_hole, node);
712                 rb_erase(node, holes);
713                 free(hole);
714                 node = rb_first(holes);
715         }
716 }
717
718 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
719
720 static void record_root_in_trans(struct btrfs_trans_handle *trans,
721                                  struct btrfs_root *root)
722 {
723         if (root->last_trans != trans->transid) {
724                 root->track_dirty = 1;
725                 root->last_trans = trans->transid;
726                 root->commit_root = root->node;
727                 extent_buffer_get(root->node);
728         }
729 }
730
731 static u8 imode_to_type(u32 imode)
732 {
733 #define S_SHIFT 12
734         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
735                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
736                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
737                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
738                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
739                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
740                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
741                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
742         };
743
744         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
745 #undef S_SHIFT
746 }
747
748 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
749 {
750         struct device_record *rec1;
751         struct device_record *rec2;
752
753         rec1 = rb_entry(node1, struct device_record, node);
754         rec2 = rb_entry(node2, struct device_record, node);
755         if (rec1->devid > rec2->devid)
756                 return -1;
757         else if (rec1->devid < rec2->devid)
758                 return 1;
759         else
760                 return 0;
761 }
762
763 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
764 {
765         struct inode_record *rec;
766         struct inode_backref *backref;
767         struct inode_backref *orig;
768         struct inode_backref *tmp;
769         struct orphan_data_extent *src_orphan;
770         struct orphan_data_extent *dst_orphan;
771         struct rb_node *rb;
772         size_t size;
773         int ret;
774
775         rec = malloc(sizeof(*rec));
776         if (!rec)
777                 return ERR_PTR(-ENOMEM);
778         memcpy(rec, orig_rec, sizeof(*rec));
779         rec->refs = 1;
780         INIT_LIST_HEAD(&rec->backrefs);
781         INIT_LIST_HEAD(&rec->orphan_extents);
782         rec->holes = RB_ROOT;
783
784         list_for_each_entry(orig, &orig_rec->backrefs, list) {
785                 size = sizeof(*orig) + orig->namelen + 1;
786                 backref = malloc(size);
787                 if (!backref) {
788                         ret = -ENOMEM;
789                         goto cleanup;
790                 }
791                 memcpy(backref, orig, size);
792                 list_add_tail(&backref->list, &rec->backrefs);
793         }
794         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
795                 dst_orphan = malloc(sizeof(*dst_orphan));
796                 if (!dst_orphan) {
797                         ret = -ENOMEM;
798                         goto cleanup;
799                 }
800                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
801                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
802         }
803         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
804         if (ret < 0)
805                 goto cleanup_rb;
806
807         return rec;
808
809 cleanup_rb:
810         rb = rb_first(&rec->holes);
811         while (rb) {
812                 struct file_extent_hole *hole;
813
814                 hole = rb_entry(rb, struct file_extent_hole, node);
815                 rb = rb_next(rb);
816                 free(hole);
817         }
818
819 cleanup:
820         if (!list_empty(&rec->backrefs))
821                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
822                         list_del(&orig->list);
823                         free(orig);
824                 }
825
826         if (!list_empty(&rec->orphan_extents))
827                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
828                         list_del(&orig->list);
829                         free(orig);
830                 }
831
832         free(rec);
833
834         return ERR_PTR(ret);
835 }
836
837 static void print_orphan_data_extents(struct list_head *orphan_extents,
838                                       u64 objectid)
839 {
840         struct orphan_data_extent *orphan;
841
842         if (list_empty(orphan_extents))
843                 return;
844         printf("The following data extent is lost in tree %llu:\n",
845                objectid);
846         list_for_each_entry(orphan, orphan_extents, list) {
847                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
848                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
849                        orphan->disk_len);
850         }
851 }
852
853 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
854 {
855         u64 root_objectid = root->root_key.objectid;
856         int errors = rec->errors;
857
858         if (!errors)
859                 return;
860         /* reloc root errors, we print its corresponding fs root objectid*/
861         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
862                 root_objectid = root->root_key.offset;
863                 fprintf(stderr, "reloc");
864         }
865         fprintf(stderr, "root %llu inode %llu errors %x",
866                 (unsigned long long) root_objectid,
867                 (unsigned long long) rec->ino, rec->errors);
868
869         if (errors & I_ERR_NO_INODE_ITEM)
870                 fprintf(stderr, ", no inode item");
871         if (errors & I_ERR_NO_ORPHAN_ITEM)
872                 fprintf(stderr, ", no orphan item");
873         if (errors & I_ERR_DUP_INODE_ITEM)
874                 fprintf(stderr, ", dup inode item");
875         if (errors & I_ERR_DUP_DIR_INDEX)
876                 fprintf(stderr, ", dup dir index");
877         if (errors & I_ERR_ODD_DIR_ITEM)
878                 fprintf(stderr, ", odd dir item");
879         if (errors & I_ERR_ODD_FILE_EXTENT)
880                 fprintf(stderr, ", odd file extent");
881         if (errors & I_ERR_BAD_FILE_EXTENT)
882                 fprintf(stderr, ", bad file extent");
883         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
884                 fprintf(stderr, ", file extent overlap");
885         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
886                 fprintf(stderr, ", file extent discount");
887         if (errors & I_ERR_DIR_ISIZE_WRONG)
888                 fprintf(stderr, ", dir isize wrong");
889         if (errors & I_ERR_FILE_NBYTES_WRONG)
890                 fprintf(stderr, ", nbytes wrong");
891         if (errors & I_ERR_ODD_CSUM_ITEM)
892                 fprintf(stderr, ", odd csum item");
893         if (errors & I_ERR_SOME_CSUM_MISSING)
894                 fprintf(stderr, ", some csum missing");
895         if (errors & I_ERR_LINK_COUNT_WRONG)
896                 fprintf(stderr, ", link count wrong");
897         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898                 fprintf(stderr, ", orphan file extent");
899         fprintf(stderr, "\n");
900         /* Print the orphan extents if needed */
901         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
902                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
903
904         /* Print the holes if needed */
905         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
906                 struct file_extent_hole *hole;
907                 struct rb_node *node;
908                 int found = 0;
909
910                 node = rb_first(&rec->holes);
911                 fprintf(stderr, "Found file extent holes:\n");
912                 while (node) {
913                         found = 1;
914                         hole = rb_entry(node, struct file_extent_hole, node);
915                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
916                                 hole->start, hole->len);
917                         node = rb_next(node);
918                 }
919                 if (!found)
920                         fprintf(stderr, "\tstart: 0, len: %llu\n",
921                                 round_up(rec->isize,
922                                          root->fs_info->sectorsize));
923         }
924 }
925
926 static void print_ref_error(int errors)
927 {
928         if (errors & REF_ERR_NO_DIR_ITEM)
929                 fprintf(stderr, ", no dir item");
930         if (errors & REF_ERR_NO_DIR_INDEX)
931                 fprintf(stderr, ", no dir index");
932         if (errors & REF_ERR_NO_INODE_REF)
933                 fprintf(stderr, ", no inode ref");
934         if (errors & REF_ERR_DUP_DIR_ITEM)
935                 fprintf(stderr, ", dup dir item");
936         if (errors & REF_ERR_DUP_DIR_INDEX)
937                 fprintf(stderr, ", dup dir index");
938         if (errors & REF_ERR_DUP_INODE_REF)
939                 fprintf(stderr, ", dup inode ref");
940         if (errors & REF_ERR_INDEX_UNMATCH)
941                 fprintf(stderr, ", index mismatch");
942         if (errors & REF_ERR_FILETYPE_UNMATCH)
943                 fprintf(stderr, ", filetype mismatch");
944         if (errors & REF_ERR_NAME_TOO_LONG)
945                 fprintf(stderr, ", name too long");
946         if (errors & REF_ERR_NO_ROOT_REF)
947                 fprintf(stderr, ", no root ref");
948         if (errors & REF_ERR_NO_ROOT_BACKREF)
949                 fprintf(stderr, ", no root backref");
950         if (errors & REF_ERR_DUP_ROOT_REF)
951                 fprintf(stderr, ", dup root ref");
952         if (errors & REF_ERR_DUP_ROOT_BACKREF)
953                 fprintf(stderr, ", dup root backref");
954         fprintf(stderr, "\n");
955 }
956
957 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
958                                           u64 ino, int mod)
959 {
960         struct ptr_node *node;
961         struct cache_extent *cache;
962         struct inode_record *rec = NULL;
963         int ret;
964
965         cache = lookup_cache_extent(inode_cache, ino, 1);
966         if (cache) {
967                 node = container_of(cache, struct ptr_node, cache);
968                 rec = node->data;
969                 if (mod && rec->refs > 1) {
970                         node->data = clone_inode_rec(rec);
971                         if (IS_ERR(node->data))
972                                 return node->data;
973                         rec->refs--;
974                         rec = node->data;
975                 }
976         } else if (mod) {
977                 rec = calloc(1, sizeof(*rec));
978                 if (!rec)
979                         return ERR_PTR(-ENOMEM);
980                 rec->ino = ino;
981                 rec->extent_start = (u64)-1;
982                 rec->refs = 1;
983                 INIT_LIST_HEAD(&rec->backrefs);
984                 INIT_LIST_HEAD(&rec->orphan_extents);
985                 rec->holes = RB_ROOT;
986
987                 node = malloc(sizeof(*node));
988                 if (!node) {
989                         free(rec);
990                         return ERR_PTR(-ENOMEM);
991                 }
992                 node->cache.start = ino;
993                 node->cache.size = 1;
994                 node->data = rec;
995
996                 if (ino == BTRFS_FREE_INO_OBJECTID)
997                         rec->found_link = 1;
998
999                 ret = insert_cache_extent(inode_cache, &node->cache);
1000                 if (ret)
1001                         return ERR_PTR(-EEXIST);
1002         }
1003         return rec;
1004 }
1005
1006 static void free_orphan_data_extents(struct list_head *orphan_extents)
1007 {
1008         struct orphan_data_extent *orphan;
1009
1010         while (!list_empty(orphan_extents)) {
1011                 orphan = list_entry(orphan_extents->next,
1012                                     struct orphan_data_extent, list);
1013                 list_del(&orphan->list);
1014                 free(orphan);
1015         }
1016 }
1017
1018 static void free_inode_rec(struct inode_record *rec)
1019 {
1020         struct inode_backref *backref;
1021
1022         if (--rec->refs > 0)
1023                 return;
1024
1025         while (!list_empty(&rec->backrefs)) {
1026                 backref = to_inode_backref(rec->backrefs.next);
1027                 list_del(&backref->list);
1028                 free(backref);
1029         }
1030         free_orphan_data_extents(&rec->orphan_extents);
1031         free_file_extent_holes(&rec->holes);
1032         free(rec);
1033 }
1034
1035 static int can_free_inode_rec(struct inode_record *rec)
1036 {
1037         if (!rec->errors && rec->checked && rec->found_inode_item &&
1038             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1039                 return 1;
1040         return 0;
1041 }
1042
1043 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1044                                  struct inode_record *rec)
1045 {
1046         struct cache_extent *cache;
1047         struct inode_backref *tmp, *backref;
1048         struct ptr_node *node;
1049         u8 filetype;
1050
1051         if (!rec->found_inode_item)
1052                 return;
1053
1054         filetype = imode_to_type(rec->imode);
1055         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1056                 if (backref->found_dir_item && backref->found_dir_index) {
1057                         if (backref->filetype != filetype)
1058                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1059                         if (!backref->errors && backref->found_inode_ref &&
1060                             rec->nlink == rec->found_link) {
1061                                 list_del(&backref->list);
1062                                 free(backref);
1063                         }
1064                 }
1065         }
1066
1067         if (!rec->checked || rec->merging)
1068                 return;
1069
1070         if (S_ISDIR(rec->imode)) {
1071                 if (rec->found_size != rec->isize)
1072                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1073                 if (rec->found_file_extent)
1074                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1075         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1076                 if (rec->found_dir_item)
1077                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1078                 if (rec->found_size != rec->nbytes)
1079                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1080                 if (rec->nlink > 0 && !no_holes &&
1081                     (rec->extent_end < rec->isize ||
1082                      first_extent_gap(&rec->holes) < rec->isize))
1083                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1084         }
1085
1086         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1087                 if (rec->found_csum_item && rec->nodatasum)
1088                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1089                 if (rec->some_csum_missing && !rec->nodatasum)
1090                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1091         }
1092
1093         BUG_ON(rec->refs != 1);
1094         if (can_free_inode_rec(rec)) {
1095                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1096                 node = container_of(cache, struct ptr_node, cache);
1097                 BUG_ON(node->data != rec);
1098                 remove_cache_extent(inode_cache, &node->cache);
1099                 free(node);
1100                 free_inode_rec(rec);
1101         }
1102 }
1103
1104 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1105 {
1106         struct btrfs_path path;
1107         struct btrfs_key key;
1108         int ret;
1109
1110         key.objectid = BTRFS_ORPHAN_OBJECTID;
1111         key.type = BTRFS_ORPHAN_ITEM_KEY;
1112         key.offset = ino;
1113
1114         btrfs_init_path(&path);
1115         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1116         btrfs_release_path(&path);
1117         if (ret > 0)
1118                 ret = -ENOENT;
1119         return ret;
1120 }
1121
1122 static int process_inode_item(struct extent_buffer *eb,
1123                               int slot, struct btrfs_key *key,
1124                               struct shared_node *active_node)
1125 {
1126         struct inode_record *rec;
1127         struct btrfs_inode_item *item;
1128
1129         rec = active_node->current;
1130         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1131         if (rec->found_inode_item) {
1132                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1133                 return 1;
1134         }
1135         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1136         rec->nlink = btrfs_inode_nlink(eb, item);
1137         rec->isize = btrfs_inode_size(eb, item);
1138         rec->nbytes = btrfs_inode_nbytes(eb, item);
1139         rec->imode = btrfs_inode_mode(eb, item);
1140         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1141                 rec->nodatasum = 1;
1142         rec->found_inode_item = 1;
1143         if (rec->nlink == 0)
1144                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1145         maybe_free_inode_rec(&active_node->inode_cache, rec);
1146         return 0;
1147 }
1148
1149 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1150                                                 const char *name,
1151                                                 int namelen, u64 dir)
1152 {
1153         struct inode_backref *backref;
1154
1155         list_for_each_entry(backref, &rec->backrefs, list) {
1156                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1157                         break;
1158                 if (backref->dir != dir || backref->namelen != namelen)
1159                         continue;
1160                 if (memcmp(name, backref->name, namelen))
1161                         continue;
1162                 return backref;
1163         }
1164
1165         backref = malloc(sizeof(*backref) + namelen + 1);
1166         if (!backref)
1167                 return NULL;
1168         memset(backref, 0, sizeof(*backref));
1169         backref->dir = dir;
1170         backref->namelen = namelen;
1171         memcpy(backref->name, name, namelen);
1172         backref->name[namelen] = '\0';
1173         list_add_tail(&backref->list, &rec->backrefs);
1174         return backref;
1175 }
1176
1177 static int add_inode_backref(struct cache_tree *inode_cache,
1178                              u64 ino, u64 dir, u64 index,
1179                              const char *name, int namelen,
1180                              u8 filetype, u8 itemtype, int errors)
1181 {
1182         struct inode_record *rec;
1183         struct inode_backref *backref;
1184
1185         rec = get_inode_rec(inode_cache, ino, 1);
1186         BUG_ON(IS_ERR(rec));
1187         backref = get_inode_backref(rec, name, namelen, dir);
1188         BUG_ON(!backref);
1189         if (errors)
1190                 backref->errors |= errors;
1191         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1192                 if (backref->found_dir_index)
1193                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1194                 if (backref->found_inode_ref && backref->index != index)
1195                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1196                 if (backref->found_dir_item && backref->filetype != filetype)
1197                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1198
1199                 backref->index = index;
1200                 backref->filetype = filetype;
1201                 backref->found_dir_index = 1;
1202         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1203                 rec->found_link++;
1204                 if (backref->found_dir_item)
1205                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1206                 if (backref->found_dir_index && backref->filetype != filetype)
1207                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1208
1209                 backref->filetype = filetype;
1210                 backref->found_dir_item = 1;
1211         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1212                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1213                 if (backref->found_inode_ref)
1214                         backref->errors |= REF_ERR_DUP_INODE_REF;
1215                 if (backref->found_dir_index && backref->index != index)
1216                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1217                 else
1218                         backref->index = index;
1219
1220                 backref->ref_type = itemtype;
1221                 backref->found_inode_ref = 1;
1222         } else {
1223                 BUG_ON(1);
1224         }
1225
1226         maybe_free_inode_rec(inode_cache, rec);
1227         return 0;
1228 }
1229
1230 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1231                             struct cache_tree *dst_cache)
1232 {
1233         struct inode_backref *backref;
1234         u32 dir_count = 0;
1235         int ret = 0;
1236
1237         dst->merging = 1;
1238         list_for_each_entry(backref, &src->backrefs, list) {
1239                 if (backref->found_dir_index) {
1240                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1241                                         backref->index, backref->name,
1242                                         backref->namelen, backref->filetype,
1243                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1244                 }
1245                 if (backref->found_dir_item) {
1246                         dir_count++;
1247                         add_inode_backref(dst_cache, dst->ino,
1248                                         backref->dir, 0, backref->name,
1249                                         backref->namelen, backref->filetype,
1250                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1251                 }
1252                 if (backref->found_inode_ref) {
1253                         add_inode_backref(dst_cache, dst->ino,
1254                                         backref->dir, backref->index,
1255                                         backref->name, backref->namelen, 0,
1256                                         backref->ref_type, backref->errors);
1257                 }
1258         }
1259
1260         if (src->found_dir_item)
1261                 dst->found_dir_item = 1;
1262         if (src->found_file_extent)
1263                 dst->found_file_extent = 1;
1264         if (src->found_csum_item)
1265                 dst->found_csum_item = 1;
1266         if (src->some_csum_missing)
1267                 dst->some_csum_missing = 1;
1268         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1269                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1270                 if (ret < 0)
1271                         return ret;
1272         }
1273
1274         BUG_ON(src->found_link < dir_count);
1275         dst->found_link += src->found_link - dir_count;
1276         dst->found_size += src->found_size;
1277         if (src->extent_start != (u64)-1) {
1278                 if (dst->extent_start == (u64)-1) {
1279                         dst->extent_start = src->extent_start;
1280                         dst->extent_end = src->extent_end;
1281                 } else {
1282                         if (dst->extent_end > src->extent_start)
1283                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1284                         else if (dst->extent_end < src->extent_start) {
1285                                 ret = add_file_extent_hole(&dst->holes,
1286                                         dst->extent_end,
1287                                         src->extent_start - dst->extent_end);
1288                         }
1289                         if (dst->extent_end < src->extent_end)
1290                                 dst->extent_end = src->extent_end;
1291                 }
1292         }
1293
1294         dst->errors |= src->errors;
1295         if (src->found_inode_item) {
1296                 if (!dst->found_inode_item) {
1297                         dst->nlink = src->nlink;
1298                         dst->isize = src->isize;
1299                         dst->nbytes = src->nbytes;
1300                         dst->imode = src->imode;
1301                         dst->nodatasum = src->nodatasum;
1302                         dst->found_inode_item = 1;
1303                 } else {
1304                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1305                 }
1306         }
1307         dst->merging = 0;
1308
1309         return 0;
1310 }
1311
1312 static int splice_shared_node(struct shared_node *src_node,
1313                               struct shared_node *dst_node)
1314 {
1315         struct cache_extent *cache;
1316         struct ptr_node *node, *ins;
1317         struct cache_tree *src, *dst;
1318         struct inode_record *rec, *conflict;
1319         u64 current_ino = 0;
1320         int splice = 0;
1321         int ret;
1322
1323         if (--src_node->refs == 0)
1324                 splice = 1;
1325         if (src_node->current)
1326                 current_ino = src_node->current->ino;
1327
1328         src = &src_node->root_cache;
1329         dst = &dst_node->root_cache;
1330 again:
1331         cache = search_cache_extent(src, 0);
1332         while (cache) {
1333                 node = container_of(cache, struct ptr_node, cache);
1334                 rec = node->data;
1335                 cache = next_cache_extent(cache);
1336
1337                 if (splice) {
1338                         remove_cache_extent(src, &node->cache);
1339                         ins = node;
1340                 } else {
1341                         ins = malloc(sizeof(*ins));
1342                         BUG_ON(!ins);
1343                         ins->cache.start = node->cache.start;
1344                         ins->cache.size = node->cache.size;
1345                         ins->data = rec;
1346                         rec->refs++;
1347                 }
1348                 ret = insert_cache_extent(dst, &ins->cache);
1349                 if (ret == -EEXIST) {
1350                         conflict = get_inode_rec(dst, rec->ino, 1);
1351                         BUG_ON(IS_ERR(conflict));
1352                         merge_inode_recs(rec, conflict, dst);
1353                         if (rec->checked) {
1354                                 conflict->checked = 1;
1355                                 if (dst_node->current == conflict)
1356                                         dst_node->current = NULL;
1357                         }
1358                         maybe_free_inode_rec(dst, conflict);
1359                         free_inode_rec(rec);
1360                         free(ins);
1361                 } else {
1362                         BUG_ON(ret);
1363                 }
1364         }
1365
1366         if (src == &src_node->root_cache) {
1367                 src = &src_node->inode_cache;
1368                 dst = &dst_node->inode_cache;
1369                 goto again;
1370         }
1371
1372         if (current_ino > 0 && (!dst_node->current ||
1373             current_ino > dst_node->current->ino)) {
1374                 if (dst_node->current) {
1375                         dst_node->current->checked = 1;
1376                         maybe_free_inode_rec(dst, dst_node->current);
1377                 }
1378                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1379                 BUG_ON(IS_ERR(dst_node->current));
1380         }
1381         return 0;
1382 }
1383
1384 static void free_inode_ptr(struct cache_extent *cache)
1385 {
1386         struct ptr_node *node;
1387         struct inode_record *rec;
1388
1389         node = container_of(cache, struct ptr_node, cache);
1390         rec = node->data;
1391         free_inode_rec(rec);
1392         free(node);
1393 }
1394
1395 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1396
1397 static struct shared_node *find_shared_node(struct cache_tree *shared,
1398                                             u64 bytenr)
1399 {
1400         struct cache_extent *cache;
1401         struct shared_node *node;
1402
1403         cache = lookup_cache_extent(shared, bytenr, 1);
1404         if (cache) {
1405                 node = container_of(cache, struct shared_node, cache);
1406                 return node;
1407         }
1408         return NULL;
1409 }
1410
1411 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1412 {
1413         int ret;
1414         struct shared_node *node;
1415
1416         node = calloc(1, sizeof(*node));
1417         if (!node)
1418                 return -ENOMEM;
1419         node->cache.start = bytenr;
1420         node->cache.size = 1;
1421         cache_tree_init(&node->root_cache);
1422         cache_tree_init(&node->inode_cache);
1423         node->refs = refs;
1424
1425         ret = insert_cache_extent(shared, &node->cache);
1426
1427         return ret;
1428 }
1429
1430 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1431                              struct walk_control *wc, int level)
1432 {
1433         struct shared_node *node;
1434         struct shared_node *dest;
1435         int ret;
1436
1437         if (level == wc->active_node)
1438                 return 0;
1439
1440         BUG_ON(wc->active_node <= level);
1441         node = find_shared_node(&wc->shared, bytenr);
1442         if (!node) {
1443                 ret = add_shared_node(&wc->shared, bytenr, refs);
1444                 BUG_ON(ret);
1445                 node = find_shared_node(&wc->shared, bytenr);
1446                 wc->nodes[level] = node;
1447                 wc->active_node = level;
1448                 return 0;
1449         }
1450
1451         if (wc->root_level == wc->active_node &&
1452             btrfs_root_refs(&root->root_item) == 0) {
1453                 if (--node->refs == 0) {
1454                         free_inode_recs_tree(&node->root_cache);
1455                         free_inode_recs_tree(&node->inode_cache);
1456                         remove_cache_extent(&wc->shared, &node->cache);
1457                         free(node);
1458                 }
1459                 return 1;
1460         }
1461
1462         dest = wc->nodes[wc->active_node];
1463         splice_shared_node(node, dest);
1464         if (node->refs == 0) {
1465                 remove_cache_extent(&wc->shared, &node->cache);
1466                 free(node);
1467         }
1468         return 1;
1469 }
1470
1471 static int leave_shared_node(struct btrfs_root *root,
1472                              struct walk_control *wc, int level)
1473 {
1474         struct shared_node *node;
1475         struct shared_node *dest;
1476         int i;
1477
1478         if (level == wc->root_level)
1479                 return 0;
1480
1481         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1482                 if (wc->nodes[i])
1483                         break;
1484         }
1485         BUG_ON(i >= BTRFS_MAX_LEVEL);
1486
1487         node = wc->nodes[wc->active_node];
1488         wc->nodes[wc->active_node] = NULL;
1489         wc->active_node = i;
1490
1491         dest = wc->nodes[wc->active_node];
1492         if (wc->active_node < wc->root_level ||
1493             btrfs_root_refs(&root->root_item) > 0) {
1494                 BUG_ON(node->refs <= 1);
1495                 splice_shared_node(node, dest);
1496         } else {
1497                 BUG_ON(node->refs < 2);
1498                 node->refs--;
1499         }
1500         return 0;
1501 }
1502
1503 /*
1504  * Returns:
1505  * < 0 - on error
1506  * 1   - if the root with id child_root_id is a child of root parent_root_id
1507  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1508  *       has other root(s) as parent(s)
1509  * 2   - if the root child_root_id doesn't have any parent roots
1510  */
1511 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1512                          u64 child_root_id)
1513 {
1514         struct btrfs_path path;
1515         struct btrfs_key key;
1516         struct extent_buffer *leaf;
1517         int has_parent = 0;
1518         int ret;
1519
1520         btrfs_init_path(&path);
1521
1522         key.objectid = parent_root_id;
1523         key.type = BTRFS_ROOT_REF_KEY;
1524         key.offset = child_root_id;
1525         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1526                                 0, 0);
1527         if (ret < 0)
1528                 return ret;
1529         btrfs_release_path(&path);
1530         if (!ret)
1531                 return 1;
1532
1533         key.objectid = child_root_id;
1534         key.type = BTRFS_ROOT_BACKREF_KEY;
1535         key.offset = 0;
1536         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1537                                 0, 0);
1538         if (ret < 0)
1539                 goto out;
1540
1541         while (1) {
1542                 leaf = path.nodes[0];
1543                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1544                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1545                         if (ret)
1546                                 break;
1547                         leaf = path.nodes[0];
1548                 }
1549
1550                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1551                 if (key.objectid != child_root_id ||
1552                     key.type != BTRFS_ROOT_BACKREF_KEY)
1553                         break;
1554
1555                 has_parent = 1;
1556
1557                 if (key.offset == parent_root_id) {
1558                         btrfs_release_path(&path);
1559                         return 1;
1560                 }
1561
1562                 path.slots[0]++;
1563         }
1564 out:
1565         btrfs_release_path(&path);
1566         if (ret < 0)
1567                 return ret;
1568         return has_parent ? 0 : 2;
1569 }
1570
1571 static int process_dir_item(struct extent_buffer *eb,
1572                             int slot, struct btrfs_key *key,
1573                             struct shared_node *active_node)
1574 {
1575         u32 total;
1576         u32 cur = 0;
1577         u32 len;
1578         u32 name_len;
1579         u32 data_len;
1580         int error;
1581         int nritems = 0;
1582         u8 filetype;
1583         struct btrfs_dir_item *di;
1584         struct inode_record *rec;
1585         struct cache_tree *root_cache;
1586         struct cache_tree *inode_cache;
1587         struct btrfs_key location;
1588         char namebuf[BTRFS_NAME_LEN];
1589
1590         root_cache = &active_node->root_cache;
1591         inode_cache = &active_node->inode_cache;
1592         rec = active_node->current;
1593         rec->found_dir_item = 1;
1594
1595         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1596         total = btrfs_item_size_nr(eb, slot);
1597         while (cur < total) {
1598                 nritems++;
1599                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1600                 name_len = btrfs_dir_name_len(eb, di);
1601                 data_len = btrfs_dir_data_len(eb, di);
1602                 filetype = btrfs_dir_type(eb, di);
1603
1604                 rec->found_size += name_len;
1605                 if (cur + sizeof(*di) + name_len > total ||
1606                     name_len > BTRFS_NAME_LEN) {
1607                         error = REF_ERR_NAME_TOO_LONG;
1608
1609                         if (cur + sizeof(*di) > total)
1610                                 break;
1611                         len = min_t(u32, total - cur - sizeof(*di),
1612                                     BTRFS_NAME_LEN);
1613                 } else {
1614                         len = name_len;
1615                         error = 0;
1616                 }
1617
1618                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1619
1620                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1621                     key->offset != btrfs_name_hash(namebuf, len)) {
1622                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1623                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1624                         key->objectid, key->offset, namebuf, len, filetype,
1625                         key->offset, btrfs_name_hash(namebuf, len));
1626                 }
1627
1628                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1629                         add_inode_backref(inode_cache, location.objectid,
1630                                           key->objectid, key->offset, namebuf,
1631                                           len, filetype, key->type, error);
1632                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1633                         add_inode_backref(root_cache, location.objectid,
1634                                           key->objectid, key->offset,
1635                                           namebuf, len, filetype,
1636                                           key->type, error);
1637                 } else {
1638                         fprintf(stderr, "invalid location in dir item %u\n",
1639                                 location.type);
1640                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1641                                           key->objectid, key->offset, namebuf,
1642                                           len, filetype, key->type, error);
1643                 }
1644
1645                 len = sizeof(*di) + name_len + data_len;
1646                 di = (struct btrfs_dir_item *)((char *)di + len);
1647                 cur += len;
1648         }
1649         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1650                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1651
1652         return 0;
1653 }
1654
1655 static int process_inode_ref(struct extent_buffer *eb,
1656                              int slot, struct btrfs_key *key,
1657                              struct shared_node *active_node)
1658 {
1659         u32 total;
1660         u32 cur = 0;
1661         u32 len;
1662         u32 name_len;
1663         u64 index;
1664         int error;
1665         struct cache_tree *inode_cache;
1666         struct btrfs_inode_ref *ref;
1667         char namebuf[BTRFS_NAME_LEN];
1668
1669         inode_cache = &active_node->inode_cache;
1670
1671         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1672         total = btrfs_item_size_nr(eb, slot);
1673         while (cur < total) {
1674                 name_len = btrfs_inode_ref_name_len(eb, ref);
1675                 index = btrfs_inode_ref_index(eb, ref);
1676
1677                 /* inode_ref + namelen should not cross item boundary */
1678                 if (cur + sizeof(*ref) + name_len > total ||
1679                     name_len > BTRFS_NAME_LEN) {
1680                         if (total < cur + sizeof(*ref))
1681                                 break;
1682
1683                         /* Still try to read out the remaining part */
1684                         len = min_t(u32, total - cur - sizeof(*ref),
1685                                     BTRFS_NAME_LEN);
1686                         error = REF_ERR_NAME_TOO_LONG;
1687                 } else {
1688                         len = name_len;
1689                         error = 0;
1690                 }
1691
1692                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1693                 add_inode_backref(inode_cache, key->objectid, key->offset,
1694                                   index, namebuf, len, 0, key->type, error);
1695
1696                 len = sizeof(*ref) + name_len;
1697                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1698                 cur += len;
1699         }
1700         return 0;
1701 }
1702
1703 static int process_inode_extref(struct extent_buffer *eb,
1704                                 int slot, struct btrfs_key *key,
1705                                 struct shared_node *active_node)
1706 {
1707         u32 total;
1708         u32 cur = 0;
1709         u32 len;
1710         u32 name_len;
1711         u64 index;
1712         u64 parent;
1713         int error;
1714         struct cache_tree *inode_cache;
1715         struct btrfs_inode_extref *extref;
1716         char namebuf[BTRFS_NAME_LEN];
1717
1718         inode_cache = &active_node->inode_cache;
1719
1720         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1721         total = btrfs_item_size_nr(eb, slot);
1722         while (cur < total) {
1723                 name_len = btrfs_inode_extref_name_len(eb, extref);
1724                 index = btrfs_inode_extref_index(eb, extref);
1725                 parent = btrfs_inode_extref_parent(eb, extref);
1726                 if (name_len <= BTRFS_NAME_LEN) {
1727                         len = name_len;
1728                         error = 0;
1729                 } else {
1730                         len = BTRFS_NAME_LEN;
1731                         error = REF_ERR_NAME_TOO_LONG;
1732                 }
1733                 read_extent_buffer(eb, namebuf,
1734                                    (unsigned long)(extref + 1), len);
1735                 add_inode_backref(inode_cache, key->objectid, parent,
1736                                   index, namebuf, len, 0, key->type, error);
1737
1738                 len = sizeof(*extref) + name_len;
1739                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1740                 cur += len;
1741         }
1742         return 0;
1743
1744 }
1745
1746 static int count_csum_range(struct btrfs_root *root, u64 start,
1747                             u64 len, u64 *found)
1748 {
1749         struct btrfs_key key;
1750         struct btrfs_path path;
1751         struct extent_buffer *leaf;
1752         int ret;
1753         size_t size;
1754         *found = 0;
1755         u64 csum_end;
1756         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1757
1758         btrfs_init_path(&path);
1759
1760         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1761         key.offset = start;
1762         key.type = BTRFS_EXTENT_CSUM_KEY;
1763
1764         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1765                                 &key, &path, 0, 0);
1766         if (ret < 0)
1767                 goto out;
1768         if (ret > 0 && path.slots[0] > 0) {
1769                 leaf = path.nodes[0];
1770                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1771                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1772                     key.type == BTRFS_EXTENT_CSUM_KEY)
1773                         path.slots[0]--;
1774         }
1775
1776         while (len > 0) {
1777                 leaf = path.nodes[0];
1778                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1779                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1780                         if (ret > 0)
1781                                 break;
1782                         else if (ret < 0)
1783                                 goto out;
1784                         leaf = path.nodes[0];
1785                 }
1786
1787                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1788                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1789                     key.type != BTRFS_EXTENT_CSUM_KEY)
1790                         break;
1791
1792                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1793                 if (key.offset >= start + len)
1794                         break;
1795
1796                 if (key.offset > start)
1797                         start = key.offset;
1798
1799                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1800                 csum_end = key.offset + (size / csum_size) *
1801                            root->fs_info->sectorsize;
1802                 if (csum_end > start) {
1803                         size = min(csum_end - start, len);
1804                         len -= size;
1805                         start += size;
1806                         *found += size;
1807                 }
1808
1809                 path.slots[0]++;
1810         }
1811 out:
1812         btrfs_release_path(&path);
1813         if (ret < 0)
1814                 return ret;
1815         return 0;
1816 }
1817
1818 static int process_file_extent(struct btrfs_root *root,
1819                                 struct extent_buffer *eb,
1820                                 int slot, struct btrfs_key *key,
1821                                 struct shared_node *active_node)
1822 {
1823         struct inode_record *rec;
1824         struct btrfs_file_extent_item *fi;
1825         u64 num_bytes = 0;
1826         u64 disk_bytenr = 0;
1827         u64 extent_offset = 0;
1828         u64 mask = root->fs_info->sectorsize - 1;
1829         int extent_type;
1830         int ret;
1831
1832         rec = active_node->current;
1833         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1834         rec->found_file_extent = 1;
1835
1836         if (rec->extent_start == (u64)-1) {
1837                 rec->extent_start = key->offset;
1838                 rec->extent_end = key->offset;
1839         }
1840
1841         if (rec->extent_end > key->offset)
1842                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1843         else if (rec->extent_end < key->offset) {
1844                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1845                                            key->offset - rec->extent_end);
1846                 if (ret < 0)
1847                         return ret;
1848         }
1849
1850         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1851         extent_type = btrfs_file_extent_type(eb, fi);
1852
1853         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1854                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1855                 if (num_bytes == 0)
1856                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1857                 rec->found_size += num_bytes;
1858                 num_bytes = (num_bytes + mask) & ~mask;
1859         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1860                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1861                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1862                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1863                 extent_offset = btrfs_file_extent_offset(eb, fi);
1864                 if (num_bytes == 0 || (num_bytes & mask))
1865                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1866                 if (num_bytes + extent_offset >
1867                     btrfs_file_extent_ram_bytes(eb, fi))
1868                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1869                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1870                     (btrfs_file_extent_compression(eb, fi) ||
1871                      btrfs_file_extent_encryption(eb, fi) ||
1872                      btrfs_file_extent_other_encoding(eb, fi)))
1873                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1874                 if (disk_bytenr > 0)
1875                         rec->found_size += num_bytes;
1876         } else {
1877                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1878         }
1879         rec->extent_end = key->offset + num_bytes;
1880
1881         /*
1882          * The data reloc tree will copy full extents into its inode and then
1883          * copy the corresponding csums.  Because the extent it copied could be
1884          * a preallocated extent that hasn't been written to yet there may be no
1885          * csums to copy, ergo we won't have csums for our file extent.  This is
1886          * ok so just don't bother checking csums if the inode belongs to the
1887          * data reloc tree.
1888          */
1889         if (disk_bytenr > 0 &&
1890             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1891                 u64 found;
1892                 if (btrfs_file_extent_compression(eb, fi))
1893                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1894                 else
1895                         disk_bytenr += extent_offset;
1896
1897                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1898                 if (ret < 0)
1899                         return ret;
1900                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1901                         if (found > 0)
1902                                 rec->found_csum_item = 1;
1903                         if (found < num_bytes)
1904                                 rec->some_csum_missing = 1;
1905                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1906                         if (found > 0)
1907                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1908                 }
1909         }
1910         return 0;
1911 }
1912
1913 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1914                             struct walk_control *wc)
1915 {
1916         struct btrfs_key key;
1917         u32 nritems;
1918         int i;
1919         int ret = 0;
1920         struct cache_tree *inode_cache;
1921         struct shared_node *active_node;
1922
1923         if (wc->root_level == wc->active_node &&
1924             btrfs_root_refs(&root->root_item) == 0)
1925                 return 0;
1926
1927         active_node = wc->nodes[wc->active_node];
1928         inode_cache = &active_node->inode_cache;
1929         nritems = btrfs_header_nritems(eb);
1930         for (i = 0; i < nritems; i++) {
1931                 btrfs_item_key_to_cpu(eb, &key, i);
1932
1933                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1934                         continue;
1935                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1936                         continue;
1937
1938                 if (active_node->current == NULL ||
1939                     active_node->current->ino < key.objectid) {
1940                         if (active_node->current) {
1941                                 active_node->current->checked = 1;
1942                                 maybe_free_inode_rec(inode_cache,
1943                                                      active_node->current);
1944                         }
1945                         active_node->current = get_inode_rec(inode_cache,
1946                                                              key.objectid, 1);
1947                         BUG_ON(IS_ERR(active_node->current));
1948                 }
1949                 switch (key.type) {
1950                 case BTRFS_DIR_ITEM_KEY:
1951                 case BTRFS_DIR_INDEX_KEY:
1952                         ret = process_dir_item(eb, i, &key, active_node);
1953                         break;
1954                 case BTRFS_INODE_REF_KEY:
1955                         ret = process_inode_ref(eb, i, &key, active_node);
1956                         break;
1957                 case BTRFS_INODE_EXTREF_KEY:
1958                         ret = process_inode_extref(eb, i, &key, active_node);
1959                         break;
1960                 case BTRFS_INODE_ITEM_KEY:
1961                         ret = process_inode_item(eb, i, &key, active_node);
1962                         break;
1963                 case BTRFS_EXTENT_DATA_KEY:
1964                         ret = process_file_extent(root, eb, i, &key,
1965                                                   active_node);
1966                         break;
1967                 default:
1968                         break;
1969                 };
1970         }
1971         return ret;
1972 }
1973
1974 struct node_refs {
1975         u64 bytenr[BTRFS_MAX_LEVEL];
1976         u64 refs[BTRFS_MAX_LEVEL];
1977         int need_check[BTRFS_MAX_LEVEL];
1978         /* field for checking all trees */
1979         int checked[BTRFS_MAX_LEVEL];
1980         /* the corresponding extent should be marked as full backref or not */
1981         int full_backref[BTRFS_MAX_LEVEL];
1982 };
1983
1984 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1985                              struct extent_buffer *eb, struct node_refs *nrefs,
1986                              u64 level, int check_all);
1987 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1988                             unsigned int ext_ref);
1989
1990 /*
1991  * Returns >0  Found error, not fatal, should continue
1992  * Returns <0  Fatal error, must exit the whole check
1993  * Returns 0   No errors found
1994  */
1995 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1996                                struct node_refs *nrefs, int *level, int ext_ref)
1997 {
1998         struct extent_buffer *cur = path->nodes[0];
1999         struct btrfs_key key;
2000         u64 cur_bytenr;
2001         u32 nritems;
2002         u64 first_ino = 0;
2003         int root_level = btrfs_header_level(root->node);
2004         int i;
2005         int ret = 0; /* Final return value */
2006         int err = 0; /* Positive error bitmap */
2007
2008         cur_bytenr = cur->start;
2009
2010         /* skip to first inode item or the first inode number change */
2011         nritems = btrfs_header_nritems(cur);
2012         for (i = 0; i < nritems; i++) {
2013                 btrfs_item_key_to_cpu(cur, &key, i);
2014                 if (i == 0)
2015                         first_ino = key.objectid;
2016                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2017                     (first_ino && first_ino != key.objectid))
2018                         break;
2019         }
2020         if (i == nritems) {
2021                 path->slots[0] = nritems;
2022                 return 0;
2023         }
2024         path->slots[0] = i;
2025
2026 again:
2027         err |= check_inode_item(root, path, ext_ref);
2028
2029         /* modify cur since check_inode_item may change path */
2030         cur = path->nodes[0];
2031
2032         if (err & LAST_ITEM)
2033                 goto out;
2034
2035         /* still have inode items in thie leaf */
2036         if (cur->start == cur_bytenr)
2037                 goto again;
2038
2039         /*
2040          * we have switched to another leaf, above nodes may
2041          * have changed, here walk down the path, if a node
2042          * or leaf is shared, check whether we can skip this
2043          * node or leaf.
2044          */
2045         for (i = root_level; i >= 0; i--) {
2046                 if (path->nodes[i]->start == nrefs->bytenr[i])
2047                         continue;
2048
2049                 ret = update_nodes_refs(root, path->nodes[i]->start,
2050                                 path->nodes[i], nrefs, i, 0);
2051                 if (ret)
2052                         goto out;
2053
2054                 if (!nrefs->need_check[i]) {
2055                         *level += 1;
2056                         break;
2057                 }
2058         }
2059
2060         for (i = 0; i < *level; i++) {
2061                 free_extent_buffer(path->nodes[i]);
2062                 path->nodes[i] = NULL;
2063         }
2064 out:
2065         err &= ~LAST_ITEM;
2066         if (err && !ret)
2067                 ret = err;
2068         return ret;
2069 }
2070
2071 static void reada_walk_down(struct btrfs_root *root,
2072                             struct extent_buffer *node, int slot)
2073 {
2074         struct btrfs_fs_info *fs_info = root->fs_info;
2075         u64 bytenr;
2076         u64 ptr_gen;
2077         u32 nritems;
2078         int i;
2079         int level;
2080
2081         level = btrfs_header_level(node);
2082         if (level != 1)
2083                 return;
2084
2085         nritems = btrfs_header_nritems(node);
2086         for (i = slot; i < nritems; i++) {
2087                 bytenr = btrfs_node_blockptr(node, i);
2088                 ptr_gen = btrfs_node_ptr_generation(node, i);
2089                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2090         }
2091 }
2092
2093 /*
2094  * Check the child node/leaf by the following condition:
2095  * 1. the first item key of the node/leaf should be the same with the one
2096  *    in parent.
2097  * 2. block in parent node should match the child node/leaf.
2098  * 3. generation of parent node and child's header should be consistent.
2099  *
2100  * Or the child node/leaf pointed by the key in parent is not valid.
2101  *
2102  * We hope to check leaf owner too, but since subvol may share leaves,
2103  * which makes leaf owner check not so strong, key check should be
2104  * sufficient enough for that case.
2105  */
2106 static int check_child_node(struct extent_buffer *parent, int slot,
2107                             struct extent_buffer *child)
2108 {
2109         struct btrfs_key parent_key;
2110         struct btrfs_key child_key;
2111         int ret = 0;
2112
2113         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2114         if (btrfs_header_level(child) == 0)
2115                 btrfs_item_key_to_cpu(child, &child_key, 0);
2116         else
2117                 btrfs_node_key_to_cpu(child, &child_key, 0);
2118
2119         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2120                 ret = -EINVAL;
2121                 fprintf(stderr,
2122                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2123                         parent_key.objectid, parent_key.type, parent_key.offset,
2124                         child_key.objectid, child_key.type, child_key.offset);
2125         }
2126         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2127                 ret = -EINVAL;
2128                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2129                         btrfs_node_blockptr(parent, slot),
2130                         btrfs_header_bytenr(child));
2131         }
2132         if (btrfs_node_ptr_generation(parent, slot) !=
2133             btrfs_header_generation(child)) {
2134                 ret = -EINVAL;
2135                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2136                         btrfs_header_generation(child),
2137                         btrfs_node_ptr_generation(parent, slot));
2138         }
2139         return ret;
2140 }
2141
2142 /*
2143  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2144  * in every fs or file tree check. Here we find its all root ids, and only check
2145  * it in the fs or file tree which has the smallest root id.
2146  */
2147 static int need_check(struct btrfs_root *root, struct ulist *roots)
2148 {
2149         struct rb_node *node;
2150         struct ulist_node *u;
2151
2152         if (roots->nnodes == 1)
2153                 return 1;
2154
2155         node = rb_first(&roots->root);
2156         u = rb_entry(node, struct ulist_node, rb_node);
2157         /*
2158          * current root id is not smallest, we skip it and let it be checked
2159          * in the fs or file tree who hash the smallest root id.
2160          */
2161         if (root->objectid != u->val)
2162                 return 0;
2163
2164         return 1;
2165 }
2166
2167 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
2168                                u64 *flags_ret)
2169 {
2170         struct btrfs_root *extent_root = root->fs_info->extent_root;
2171         struct btrfs_root_item *ri = &root->root_item;
2172         struct btrfs_extent_inline_ref *iref;
2173         struct btrfs_extent_item *ei;
2174         struct btrfs_key key;
2175         struct btrfs_path *path = NULL;
2176         unsigned long ptr;
2177         unsigned long end;
2178         u64 flags;
2179         u64 owner = 0;
2180         u64 offset;
2181         int slot;
2182         int type;
2183         int ret = 0;
2184
2185         /*
2186          * Except file/reloc tree, we can not have FULL BACKREF MODE
2187          */
2188         if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
2189                 goto normal;
2190
2191         /* root node */
2192         if (eb->start == btrfs_root_bytenr(ri))
2193                 goto normal;
2194
2195         if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
2196                 goto full_backref;
2197
2198         owner = btrfs_header_owner(eb);
2199         if (owner == root->objectid)
2200                 goto normal;
2201
2202         path = btrfs_alloc_path();
2203         if (!path)
2204                 return -ENOMEM;
2205
2206         key.objectid = btrfs_header_bytenr(eb);
2207         key.type = (u8)-1;
2208         key.offset = (u64)-1;
2209
2210         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2211         if (ret <= 0) {
2212                 ret = -EIO;
2213                 goto out;
2214         }
2215
2216         if (ret > 0) {
2217                 ret = btrfs_previous_extent_item(extent_root, path,
2218                                                  key.objectid);
2219                 if (ret)
2220                         goto full_backref;
2221
2222         }
2223         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2224
2225         eb = path->nodes[0];
2226         slot = path->slots[0];
2227         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
2228
2229         flags = btrfs_extent_flags(eb, ei);
2230         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2231                 goto full_backref;
2232
2233         ptr = (unsigned long)(ei + 1);
2234         end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
2235
2236         if (key.type == BTRFS_EXTENT_ITEM_KEY)
2237                 ptr += sizeof(struct btrfs_tree_block_info);
2238
2239 next:
2240         /* Reached extent item ends normally */
2241         if (ptr == end)
2242                 goto full_backref;
2243
2244         /* Beyond extent item end, wrong item size */
2245         if (ptr > end) {
2246                 error("extent item at bytenr %llu slot %d has wrong size",
2247                         eb->start, slot);
2248                 goto full_backref;
2249         }
2250
2251         iref = (struct btrfs_extent_inline_ref *)ptr;
2252         offset = btrfs_extent_inline_ref_offset(eb, iref);
2253         type = btrfs_extent_inline_ref_type(eb, iref);
2254
2255         if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
2256                 goto normal;
2257         ptr += btrfs_extent_inline_ref_size(type);
2258         goto next;
2259
2260 normal:
2261         *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
2262         goto out;
2263
2264 full_backref:
2265         *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2266 out:
2267         btrfs_free_path(path);
2268         return ret;
2269 }
2270
2271 /*
2272  * for a tree node or leaf, we record its reference count, so later if we still
2273  * process this node or leaf, don't need to compute its reference count again.
2274  *
2275  * @bytenr  if @bytenr == (u64)-1, only update nrefs->full_backref[level]
2276  */
2277 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2278                              struct extent_buffer *eb, struct node_refs *nrefs,
2279                              u64 level, int check_all)
2280 {
2281         struct ulist *roots;
2282         u64 refs = 0;
2283         u64 flags = 0;
2284         int root_level = btrfs_header_level(root->node);
2285         int check;
2286         int ret;
2287
2288         if (nrefs->bytenr[level] == bytenr)
2289                 return 0;
2290
2291         if (bytenr != (u64)-1) {
2292                 /* the return value of this function seems a mistake */
2293                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2294                                        level, 1, &refs, &flags);
2295                 /* temporary fix */
2296                 if (ret < 0 && !check_all)
2297                         return ret;
2298
2299                 nrefs->bytenr[level] = bytenr;
2300                 nrefs->refs[level] = refs;
2301                 nrefs->full_backref[level] = 0;
2302                 nrefs->checked[level] = 0;
2303
2304                 if (refs > 1) {
2305                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2306                                                    0, &roots);
2307                         if (ret)
2308                                 return -EIO;
2309
2310                         check = need_check(root, roots);
2311                         ulist_free(roots);
2312                         nrefs->need_check[level] = check;
2313                 } else {
2314                         if (!check_all) {
2315                                 nrefs->need_check[level] = 1;
2316                         } else {
2317                                 if (level == root_level) {
2318                                         nrefs->need_check[level] = 1;
2319                                 } else {
2320                                         /*
2321                                          * The node refs may have not been
2322                                          * updated if upper needs checking (the
2323                                          * lowest root_objectid) the node can
2324                                          * be checked.
2325                                          */
2326                                         nrefs->need_check[level] =
2327                                                 nrefs->need_check[level + 1];
2328                                 }
2329                         }
2330                 }
2331         }
2332
2333         if (check_all && eb) {
2334                 calc_extent_flag_v2(root, eb, &flags);
2335                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2336                         nrefs->full_backref[level] = 1;
2337         }
2338
2339         return 0;
2340 }
2341
2342 /*
2343  * @level           if @level == -1 means extent data item
2344  *                  else normal treeblocl.
2345  */
2346 static int should_check_extent_strictly(struct btrfs_root *root,
2347                                         struct node_refs *nrefs, int level)
2348 {
2349         int root_level = btrfs_header_level(root->node);
2350
2351         if (level > root_level || level < -1)
2352                 return 1;
2353         if (level == root_level)
2354                 return 1;
2355         /*
2356          * if the upper node is marked full backref, it should contain shared
2357          * backref of the parent (except owner == root->objectid).
2358          */
2359         while (++level <= root_level)
2360                 if (nrefs->refs[level] > 1)
2361                         return 0;
2362
2363         return 1;
2364 }
2365
2366 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2367                           struct walk_control *wc, int *level,
2368                           struct node_refs *nrefs)
2369 {
2370         enum btrfs_tree_block_status status;
2371         u64 bytenr;
2372         u64 ptr_gen;
2373         struct btrfs_fs_info *fs_info = root->fs_info;
2374         struct extent_buffer *next;
2375         struct extent_buffer *cur;
2376         int ret, err = 0;
2377         u64 refs;
2378
2379         WARN_ON(*level < 0);
2380         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2381
2382         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2383                 refs = nrefs->refs[*level];
2384                 ret = 0;
2385         } else {
2386                 ret = btrfs_lookup_extent_info(NULL, root,
2387                                        path->nodes[*level]->start,
2388                                        *level, 1, &refs, NULL);
2389                 if (ret < 0) {
2390                         err = ret;
2391                         goto out;
2392                 }
2393                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2394                 nrefs->refs[*level] = refs;
2395         }
2396
2397         if (refs > 1) {
2398                 ret = enter_shared_node(root, path->nodes[*level]->start,
2399                                         refs, wc, *level);
2400                 if (ret > 0) {
2401                         err = ret;
2402                         goto out;
2403                 }
2404         }
2405
2406         while (*level >= 0) {
2407                 WARN_ON(*level < 0);
2408                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2409                 cur = path->nodes[*level];
2410
2411                 if (btrfs_header_level(cur) != *level)
2412                         WARN_ON(1);
2413
2414                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2415                         break;
2416                 if (*level == 0) {
2417                         ret = process_one_leaf(root, cur, wc);
2418                         if (ret < 0)
2419                                 err = ret;
2420                         break;
2421                 }
2422                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2423                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2424
2425                 if (bytenr == nrefs->bytenr[*level - 1]) {
2426                         refs = nrefs->refs[*level - 1];
2427                 } else {
2428                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2429                                         *level - 1, 1, &refs, NULL);
2430                         if (ret < 0) {
2431                                 refs = 0;
2432                         } else {
2433                                 nrefs->bytenr[*level - 1] = bytenr;
2434                                 nrefs->refs[*level - 1] = refs;
2435                         }
2436                 }
2437
2438                 if (refs > 1) {
2439                         ret = enter_shared_node(root, bytenr, refs,
2440                                                 wc, *level - 1);
2441                         if (ret > 0) {
2442                                 path->slots[*level]++;
2443                                 continue;
2444                         }
2445                 }
2446
2447                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2448                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2449                         free_extent_buffer(next);
2450                         reada_walk_down(root, cur, path->slots[*level]);
2451                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2452                         if (!extent_buffer_uptodate(next)) {
2453                                 struct btrfs_key node_key;
2454
2455                                 btrfs_node_key_to_cpu(path->nodes[*level],
2456                                                       &node_key,
2457                                                       path->slots[*level]);
2458                                 btrfs_add_corrupt_extent_record(root->fs_info,
2459                                                 &node_key,
2460                                                 path->nodes[*level]->start,
2461                                                 root->fs_info->nodesize,
2462                                                 *level);
2463                                 err = -EIO;
2464                                 goto out;
2465                         }
2466                 }
2467
2468                 ret = check_child_node(cur, path->slots[*level], next);
2469                 if (ret) {
2470                         free_extent_buffer(next);
2471                         err = ret;
2472                         goto out;
2473                 }
2474
2475                 if (btrfs_is_leaf(next))
2476                         status = btrfs_check_leaf(root, NULL, next);
2477                 else
2478                         status = btrfs_check_node(root, NULL, next);
2479                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2480                         free_extent_buffer(next);
2481                         err = -EIO;
2482                         goto out;
2483                 }
2484
2485                 *level = *level - 1;
2486                 free_extent_buffer(path->nodes[*level]);
2487                 path->nodes[*level] = next;
2488                 path->slots[*level] = 0;
2489         }
2490 out:
2491         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2492         return err;
2493 }
2494
2495 static int fs_root_objectid(u64 objectid);
2496
2497 /*
2498  * Update global fs information.
2499  */
2500 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2501                          int level)
2502 {
2503         u32 free_nrs;
2504         struct extent_buffer *eb = path->nodes[level];
2505
2506         total_btree_bytes += eb->len;
2507         if (fs_root_objectid(root->objectid))
2508                 total_fs_tree_bytes += eb->len;
2509         if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2510                 total_extent_tree_bytes += eb->len;
2511
2512         if (level == 0) {
2513                 btree_space_waste += btrfs_leaf_free_space(root, eb);
2514         } else {
2515                 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root) -
2516                             btrfs_header_nritems(eb));
2517                 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2518         }
2519 }
2520
2521 /*
2522  * This function only handles BACKREF_MISSING,
2523  * If corresponding extent item exists, increase the ref, else insert an extent
2524  * item and backref.
2525  *
2526  * Returns error bits after repair.
2527  */
2528 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2529                                  struct btrfs_root *root,
2530                                  struct extent_buffer *node,
2531                                  struct node_refs *nrefs, int level, int err)
2532 {
2533         struct btrfs_fs_info *fs_info = root->fs_info;
2534         struct btrfs_root *extent_root = fs_info->extent_root;
2535         struct btrfs_path path;
2536         struct btrfs_extent_item *ei;
2537         struct btrfs_tree_block_info *bi;
2538         struct btrfs_key key;
2539         struct extent_buffer *eb;
2540         u32 size = sizeof(*ei);
2541         u32 node_size = root->fs_info->nodesize;
2542         int insert_extent = 0;
2543         int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2544         int root_level = btrfs_header_level(root->node);
2545         int generation;
2546         int ret;
2547         u64 owner;
2548         u64 bytenr;
2549         u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2550         u64 parent = 0;
2551
2552         if ((err & BACKREF_MISSING) == 0)
2553                 return err;
2554
2555         WARN_ON(level > BTRFS_MAX_LEVEL);
2556         WARN_ON(level < 0);
2557
2558         btrfs_init_path(&path);
2559         bytenr = btrfs_header_bytenr(node);
2560         owner = btrfs_header_owner(node);
2561         generation = btrfs_header_generation(node);
2562
2563         key.objectid = bytenr;
2564         key.type = (u8)-1;
2565         key.offset = (u64)-1;
2566
2567         /* Search for the extent item */
2568         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2569         if (ret <= 0) {
2570                 ret = -EIO;
2571                 goto out;
2572         }
2573
2574         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2575         if (ret)
2576                 insert_extent = 1;
2577
2578         /* calculate if the extent item flag is full backref or not */
2579         if (nrefs->full_backref[level] != 0)
2580                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2581
2582         /* insert an extent item */
2583         if (insert_extent) {
2584                 struct btrfs_disk_key copy_key;
2585
2586                 generation = btrfs_header_generation(node);
2587
2588                 if (level < root_level && nrefs->full_backref[level + 1] &&
2589                     owner != root->objectid) {
2590                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2591                 }
2592
2593                 key.objectid = bytenr;
2594                 if (!skinny_metadata) {
2595                         key.type = BTRFS_EXTENT_ITEM_KEY;
2596                         key.offset = node_size;
2597                         size += sizeof(*bi);
2598                 } else {
2599                         key.type = BTRFS_METADATA_ITEM_KEY;
2600                         key.offset = level;
2601                 }
2602
2603                 btrfs_release_path(&path);
2604                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2605                                               size);
2606                 if (ret)
2607                         goto out;
2608
2609                 eb = path.nodes[0];
2610                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2611
2612                 btrfs_set_extent_refs(eb, ei, 0);
2613                 btrfs_set_extent_generation(eb, ei, generation);
2614                 btrfs_set_extent_flags(eb, ei, flags);
2615
2616                 if (!skinny_metadata) {
2617                         bi = (struct btrfs_tree_block_info *)(ei + 1);
2618                         memset_extent_buffer(eb, 0, (unsigned long)bi,
2619                                              sizeof(*bi));
2620                         btrfs_set_disk_key_objectid(&copy_key, root->objectid);
2621                         btrfs_set_disk_key_type(&copy_key, 0);
2622                         btrfs_set_disk_key_offset(&copy_key, 0);
2623
2624                         btrfs_set_tree_block_level(eb, bi, level);
2625                         btrfs_set_tree_block_key(eb, bi, &copy_key);
2626                 }
2627                 btrfs_mark_buffer_dirty(eb);
2628                 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2629                 btrfs_update_block_group(trans, extent_root, bytenr, node_size,
2630                                          1, 0);
2631
2632                 nrefs->refs[level] = 0;
2633                 nrefs->full_backref[level] =
2634                         flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2635                 btrfs_release_path(&path);
2636         }
2637
2638         if (level < root_level && nrefs->full_backref[level + 1] &&
2639             owner != root->objectid)
2640                 parent = nrefs->bytenr[level + 1];
2641
2642         /* increase the ref */
2643         ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2644                         parent, root->objectid, level, 0);
2645
2646         nrefs->refs[level]++;
2647 out:
2648         btrfs_release_path(&path);
2649         if (ret) {
2650                 error(
2651         "failed to repair tree block ref start %llu root %llu due to %s",
2652                       bytenr, root->objectid, strerror(-ret));
2653         } else {
2654                 printf("Added one tree block ref start %llu %s %llu\n",
2655                        bytenr, parent ? "parent" : "root",
2656                        parent ? parent : root->objectid);
2657                 err &= ~BACKREF_MISSING;
2658         }
2659
2660         return err;
2661 }
2662
2663 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2664                             unsigned int ext_ref);
2665 static int check_tree_block_ref(struct btrfs_root *root,
2666                                 struct extent_buffer *eb, u64 bytenr,
2667                                 int level, u64 owner, struct node_refs *nrefs);
2668 static int check_leaf_items(struct btrfs_trans_handle *trans,
2669                             struct btrfs_root *root, struct btrfs_path *path,
2670                             struct node_refs *nrefs, int account_bytes);
2671
2672 /*
2673  * @trans      just for lowmem repair mode
2674  * @check all  if not 0 then check all tree block backrefs and items
2675  *             0 then just check relationship of items in fs tree(s)
2676  *
2677  * Returns >0  Found error, should continue
2678  * Returns <0  Fatal error, must exit the whole check
2679  * Returns 0   No errors found
2680  */
2681 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2682                              struct btrfs_root *root, struct btrfs_path *path,
2683                              int *level, struct node_refs *nrefs, int ext_ref,
2684                              int check_all)
2685
2686 {
2687         enum btrfs_tree_block_status status;
2688         u64 bytenr;
2689         u64 ptr_gen;
2690         struct btrfs_fs_info *fs_info = root->fs_info;
2691         struct extent_buffer *next;
2692         struct extent_buffer *cur;
2693         int ret;
2694         int err = 0;
2695         int check;
2696         int account_file_data = 0;
2697
2698         WARN_ON(*level < 0);
2699         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2700
2701         ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2702                                 path->nodes[*level], nrefs, *level, check_all);
2703         if (ret < 0)
2704                 return ret;
2705
2706         while (*level >= 0) {
2707                 WARN_ON(*level < 0);
2708                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2709                 cur = path->nodes[*level];
2710                 bytenr = btrfs_header_bytenr(cur);
2711                 check = nrefs->need_check[*level];
2712
2713                 if (btrfs_header_level(cur) != *level)
2714                         WARN_ON(1);
2715                /*
2716                 * Update bytes accounting and check tree block ref
2717                 * NOTE: Doing accounting and check before checking nritems
2718                 * is necessary because of empty node/leaf.
2719                 */
2720                 if ((check_all && !nrefs->checked[*level]) ||
2721                     (!check_all && nrefs->need_check[*level])) {
2722                         ret = check_tree_block_ref(root, cur,
2723                            btrfs_header_bytenr(cur), btrfs_header_level(cur),
2724                            btrfs_header_owner(cur), nrefs);
2725
2726                         if (repair && ret)
2727                                 ret = repair_tree_block_ref(trans, root,
2728                                     path->nodes[*level], nrefs, *level, ret);
2729                         err |= ret;
2730
2731                         if (check_all && nrefs->need_check[*level] &&
2732                                 nrefs->refs[*level]) {
2733                                 account_bytes(root, path, *level);
2734                                 account_file_data = 1;
2735                         }
2736                         nrefs->checked[*level] = 1;
2737                 }
2738
2739                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2740                         break;
2741
2742                 /* Don't forgot to check leaf/node validation */
2743                 if (*level == 0) {
2744                         /* skip duplicate check */
2745                         if (check || !check_all) {
2746                                 ret = btrfs_check_leaf(root, NULL, cur);
2747                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2748                                         err |= -EIO;
2749                                         break;
2750                                 }
2751                         }
2752
2753                         ret = 0;
2754                         if (!check_all)
2755                                 ret = process_one_leaf_v2(root, path, nrefs,
2756                                                           level, ext_ref);
2757                         else
2758                                 ret = check_leaf_items(trans, root, path,
2759                                                nrefs, account_file_data);
2760                         err |= ret;
2761                         break;
2762                 } else {
2763                         if (check || !check_all) {
2764                                 ret = btrfs_check_node(root, NULL, cur);
2765                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2766                                         err |= -EIO;
2767                                         break;
2768                                 }
2769                         }
2770                 }
2771
2772                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2773                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2774
2775                 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2776                                         check_all);
2777                 if (ret < 0)
2778                         break;
2779                 /*
2780                  * check all trees in check_chunks_and_extent_v2
2781                  * check shared node once in check_fs_roots
2782                  */
2783                 if (!check_all && !nrefs->need_check[*level - 1]) {
2784                         path->slots[*level]++;
2785                         continue;
2786                 }
2787
2788                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2789                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2790                         free_extent_buffer(next);
2791                         reada_walk_down(root, cur, path->slots[*level]);
2792                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2793                         if (!extent_buffer_uptodate(next)) {
2794                                 struct btrfs_key node_key;
2795
2796                                 btrfs_node_key_to_cpu(path->nodes[*level],
2797                                                       &node_key,
2798                                                       path->slots[*level]);
2799                                 btrfs_add_corrupt_extent_record(fs_info,
2800                                         &node_key, path->nodes[*level]->start,
2801                                         fs_info->nodesize, *level);
2802                                 err |= -EIO;
2803                                 break;
2804                         }
2805                 }
2806
2807                 ret = check_child_node(cur, path->slots[*level], next);
2808                 err |= ret;
2809                 if (ret < 0) 
2810                         break;
2811
2812                 if (btrfs_is_leaf(next))
2813                         status = btrfs_check_leaf(root, NULL, next);
2814                 else
2815                         status = btrfs_check_node(root, NULL, next);
2816                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2817                         free_extent_buffer(next);
2818                         err |= -EIO;
2819                         break;
2820                 }
2821
2822                 *level = *level - 1;
2823                 free_extent_buffer(path->nodes[*level]);
2824                 path->nodes[*level] = next;
2825                 path->slots[*level] = 0;
2826                 account_file_data = 0;
2827
2828                 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2829         }
2830         return err;
2831 }
2832
2833 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2834                         struct walk_control *wc, int *level)
2835 {
2836         int i;
2837         struct extent_buffer *leaf;
2838
2839         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2840                 leaf = path->nodes[i];
2841                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2842                         path->slots[i]++;
2843                         *level = i;
2844                         return 0;
2845                 } else {
2846                         free_extent_buffer(path->nodes[*level]);
2847                         path->nodes[*level] = NULL;
2848                         BUG_ON(*level > wc->active_node);
2849                         if (*level == wc->active_node)
2850                                 leave_shared_node(root, wc, *level);
2851                         *level = i + 1;
2852                 }
2853         }
2854         return 1;
2855 }
2856
2857 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2858                            int *level)
2859 {
2860         int i;
2861         struct extent_buffer *leaf;
2862
2863         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2864                 leaf = path->nodes[i];
2865                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2866                         path->slots[i]++;
2867                         *level = i;
2868                         return 0;
2869                 } else {
2870                         free_extent_buffer(path->nodes[*level]);
2871                         path->nodes[*level] = NULL;
2872                         *level = i + 1;
2873                 }
2874         }
2875         return 1;
2876 }
2877
2878 static int check_root_dir(struct inode_record *rec)
2879 {
2880         struct inode_backref *backref;
2881         int ret = -1;
2882
2883         if (!rec->found_inode_item || rec->errors)
2884                 goto out;
2885         if (rec->nlink != 1 || rec->found_link != 0)
2886                 goto out;
2887         if (list_empty(&rec->backrefs))
2888                 goto out;
2889         backref = to_inode_backref(rec->backrefs.next);
2890         if (!backref->found_inode_ref)
2891                 goto out;
2892         if (backref->index != 0 || backref->namelen != 2 ||
2893             memcmp(backref->name, "..", 2))
2894                 goto out;
2895         if (backref->found_dir_index || backref->found_dir_item)
2896                 goto out;
2897         ret = 0;
2898 out:
2899         return ret;
2900 }
2901
2902 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2903                               struct btrfs_root *root, struct btrfs_path *path,
2904                               struct inode_record *rec)
2905 {
2906         struct btrfs_inode_item *ei;
2907         struct btrfs_key key;
2908         int ret;
2909
2910         key.objectid = rec->ino;
2911         key.type = BTRFS_INODE_ITEM_KEY;
2912         key.offset = (u64)-1;
2913
2914         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2915         if (ret < 0)
2916                 goto out;
2917         if (ret) {
2918                 if (!path->slots[0]) {
2919                         ret = -ENOENT;
2920                         goto out;
2921                 }
2922                 path->slots[0]--;
2923                 ret = 0;
2924         }
2925         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2926         if (key.objectid != rec->ino) {
2927                 ret = -ENOENT;
2928                 goto out;
2929         }
2930
2931         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2932                             struct btrfs_inode_item);
2933         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2934         btrfs_mark_buffer_dirty(path->nodes[0]);
2935         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2936         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2937                root->root_key.objectid);
2938 out:
2939         btrfs_release_path(path);
2940         return ret;
2941 }
2942
2943 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2944                                     struct btrfs_root *root,
2945                                     struct btrfs_path *path,
2946                                     struct inode_record *rec)
2947 {
2948         int ret;
2949
2950         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2951         btrfs_release_path(path);
2952         if (!ret)
2953                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2954         return ret;
2955 }
2956
2957 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2958                                struct btrfs_root *root,
2959                                struct btrfs_path *path,
2960                                struct inode_record *rec)
2961 {
2962         struct btrfs_inode_item *ei;
2963         struct btrfs_key key;
2964         int ret = 0;
2965
2966         key.objectid = rec->ino;
2967         key.type = BTRFS_INODE_ITEM_KEY;
2968         key.offset = 0;
2969
2970         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2971         if (ret) {
2972                 if (ret > 0)
2973                         ret = -ENOENT;
2974                 goto out;
2975         }
2976
2977         /* Since ret == 0, no need to check anything */
2978         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2979                             struct btrfs_inode_item);
2980         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2981         btrfs_mark_buffer_dirty(path->nodes[0]);
2982         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2983         printf("reset nbytes for ino %llu root %llu\n",
2984                rec->ino, root->root_key.objectid);
2985 out:
2986         btrfs_release_path(path);
2987         return ret;
2988 }
2989
2990 static int add_missing_dir_index(struct btrfs_root *root,
2991                                  struct cache_tree *inode_cache,
2992                                  struct inode_record *rec,
2993                                  struct inode_backref *backref)
2994 {
2995         struct btrfs_path path;
2996         struct btrfs_trans_handle *trans;
2997         struct btrfs_dir_item *dir_item;
2998         struct extent_buffer *leaf;
2999         struct btrfs_key key;
3000         struct btrfs_disk_key disk_key;
3001         struct inode_record *dir_rec;
3002         unsigned long name_ptr;
3003         u32 data_size = sizeof(*dir_item) + backref->namelen;
3004         int ret;
3005
3006         trans = btrfs_start_transaction(root, 1);
3007         if (IS_ERR(trans))
3008                 return PTR_ERR(trans);
3009
3010         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
3011                 (unsigned long long)rec->ino);
3012
3013         btrfs_init_path(&path);
3014         key.objectid = backref->dir;
3015         key.type = BTRFS_DIR_INDEX_KEY;
3016         key.offset = backref->index;
3017         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
3018         BUG_ON(ret);
3019
3020         leaf = path.nodes[0];
3021         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
3022
3023         disk_key.objectid = cpu_to_le64(rec->ino);
3024         disk_key.type = BTRFS_INODE_ITEM_KEY;
3025         disk_key.offset = 0;
3026
3027         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
3028         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
3029         btrfs_set_dir_data_len(leaf, dir_item, 0);
3030         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
3031         name_ptr = (unsigned long)(dir_item + 1);
3032         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
3033         btrfs_mark_buffer_dirty(leaf);
3034         btrfs_release_path(&path);
3035         btrfs_commit_transaction(trans, root);
3036
3037         backref->found_dir_index = 1;
3038         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
3039         BUG_ON(IS_ERR(dir_rec));
3040         if (!dir_rec)
3041                 return 0;
3042         dir_rec->found_size += backref->namelen;
3043         if (dir_rec->found_size == dir_rec->isize &&
3044             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
3045                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
3046         if (dir_rec->found_size != dir_rec->isize)
3047                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
3048
3049         return 0;
3050 }
3051
3052 static int delete_dir_index(struct btrfs_root *root,
3053                             struct inode_backref *backref)
3054 {
3055         struct btrfs_trans_handle *trans;
3056         struct btrfs_dir_item *di;
3057         struct btrfs_path path;
3058         int ret = 0;
3059
3060         trans = btrfs_start_transaction(root, 1);
3061         if (IS_ERR(trans))
3062                 return PTR_ERR(trans);
3063
3064         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
3065                 (unsigned long long)backref->dir,
3066                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
3067                 (unsigned long long)root->objectid);
3068
3069         btrfs_init_path(&path);
3070         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
3071                                     backref->name, backref->namelen,
3072                                     backref->index, -1);
3073         if (IS_ERR(di)) {
3074                 ret = PTR_ERR(di);
3075                 btrfs_release_path(&path);
3076                 btrfs_commit_transaction(trans, root);
3077                 if (ret == -ENOENT)
3078                         return 0;
3079                 return ret;
3080         }
3081
3082         if (!di)
3083                 ret = btrfs_del_item(trans, root, &path);
3084         else
3085                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
3086         BUG_ON(ret);
3087         btrfs_release_path(&path);
3088         btrfs_commit_transaction(trans, root);
3089         return ret;
3090 }
3091
3092 static int __create_inode_item(struct btrfs_trans_handle *trans,
3093                                struct btrfs_root *root, u64 ino, u64 size,
3094                                u64 nbytes, u64 nlink, u32 mode)
3095 {
3096         struct btrfs_inode_item ii;
3097         time_t now = time(NULL);
3098         int ret;
3099
3100         btrfs_set_stack_inode_size(&ii, size);
3101         btrfs_set_stack_inode_nbytes(&ii, nbytes);
3102         btrfs_set_stack_inode_nlink(&ii, nlink);
3103         btrfs_set_stack_inode_mode(&ii, mode);
3104         btrfs_set_stack_inode_generation(&ii, trans->transid);
3105         btrfs_set_stack_timespec_nsec(&ii.atime, 0);
3106         btrfs_set_stack_timespec_sec(&ii.ctime, now);
3107         btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
3108         btrfs_set_stack_timespec_sec(&ii.mtime, now);
3109         btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
3110         btrfs_set_stack_timespec_sec(&ii.otime, 0);
3111         btrfs_set_stack_timespec_nsec(&ii.otime, 0);
3112
3113         ret = btrfs_insert_inode(trans, root, ino, &ii);
3114         ASSERT(!ret);
3115
3116         warning("root %llu inode %llu recreating inode item, this may "
3117                 "be incomplete, please check permissions and content after "
3118                 "the fsck completes.\n", (unsigned long long)root->objectid,
3119                 (unsigned long long)ino);
3120
3121         return 0;
3122 }
3123
3124 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
3125                                     struct btrfs_root *root, u64 ino,
3126                                     u8 filetype)
3127 {
3128         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
3129
3130         return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
3131 }
3132
3133 static int create_inode_item(struct btrfs_root *root,
3134                              struct inode_record *rec, int root_dir)
3135 {
3136         struct btrfs_trans_handle *trans;
3137         u64 nlink = 0;
3138         u32 mode = 0;
3139         u64 size = 0;
3140         int ret;
3141
3142         trans = btrfs_start_transaction(root, 1);
3143         if (IS_ERR(trans)) {
3144                 ret = PTR_ERR(trans);
3145                 return ret;
3146         }
3147
3148         nlink = root_dir ? 1 : rec->found_link;
3149         if (rec->found_dir_item) {
3150                 if (rec->found_file_extent)
3151                         fprintf(stderr, "root %llu inode %llu has both a dir "
3152                                 "item and extents, unsure if it is a dir or a "
3153                                 "regular file so setting it as a directory\n",
3154                                 (unsigned long long)root->objectid,
3155                                 (unsigned long long)rec->ino);
3156                 mode = S_IFDIR | 0755;
3157                 size = rec->found_size;
3158         } else if (!rec->found_dir_item) {
3159                 size = rec->extent_end;
3160                 mode =  S_IFREG | 0755;
3161         }
3162
3163         ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
3164                                   nlink, mode);
3165         btrfs_commit_transaction(trans, root);
3166         return 0;
3167 }
3168
3169 static int repair_inode_backrefs(struct btrfs_root *root,
3170                                  struct inode_record *rec,
3171                                  struct cache_tree *inode_cache,
3172                                  int delete)
3173 {
3174         struct inode_backref *tmp, *backref;
3175         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3176         int ret = 0;
3177         int repaired = 0;
3178
3179         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3180                 if (!delete && rec->ino == root_dirid) {
3181                         if (!rec->found_inode_item) {
3182                                 ret = create_inode_item(root, rec, 1);
3183                                 if (ret)
3184                                         break;
3185                                 repaired++;
3186                         }
3187                 }
3188
3189                 /* Index 0 for root dir's are special, don't mess with it */
3190                 if (rec->ino == root_dirid && backref->index == 0)
3191                         continue;
3192
3193                 if (delete &&
3194                     ((backref->found_dir_index && !backref->found_inode_ref) ||
3195                      (backref->found_dir_index && backref->found_inode_ref &&
3196                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
3197                         ret = delete_dir_index(root, backref);
3198                         if (ret)
3199                                 break;
3200                         repaired++;
3201                         list_del(&backref->list);
3202                         free(backref);
3203                         continue;
3204                 }
3205
3206                 if (!delete && !backref->found_dir_index &&
3207                     backref->found_dir_item && backref->found_inode_ref) {
3208                         ret = add_missing_dir_index(root, inode_cache, rec,
3209                                                     backref);
3210                         if (ret)
3211                                 break;
3212                         repaired++;
3213                         if (backref->found_dir_item &&
3214                             backref->found_dir_index) {
3215                                 if (!backref->errors &&
3216                                     backref->found_inode_ref) {
3217                                         list_del(&backref->list);
3218                                         free(backref);
3219                                         continue;
3220                                 }
3221                         }
3222                 }
3223
3224                 if (!delete && (!backref->found_dir_index &&
3225                                 !backref->found_dir_item &&
3226                                 backref->found_inode_ref)) {
3227                         struct btrfs_trans_handle *trans;
3228                         struct btrfs_key location;
3229
3230                         ret = check_dir_conflict(root, backref->name,
3231                                                  backref->namelen,
3232                                                  backref->dir,
3233                                                  backref->index);
3234                         if (ret) {
3235                                 /*
3236                                  * let nlink fixing routine to handle it,
3237                                  * which can do it better.
3238                                  */
3239                                 ret = 0;
3240                                 break;
3241                         }
3242                         location.objectid = rec->ino;
3243                         location.type = BTRFS_INODE_ITEM_KEY;
3244                         location.offset = 0;
3245
3246                         trans = btrfs_start_transaction(root, 1);
3247                         if (IS_ERR(trans)) {
3248                                 ret = PTR_ERR(trans);
3249                                 break;
3250                         }
3251                         fprintf(stderr, "adding missing dir index/item pair "
3252                                 "for inode %llu\n",
3253                                 (unsigned long long)rec->ino);
3254                         ret = btrfs_insert_dir_item(trans, root, backref->name,
3255                                                     backref->namelen,
3256                                                     backref->dir, &location,
3257                                                     imode_to_type(rec->imode),
3258                                                     backref->index);
3259                         BUG_ON(ret);
3260                         btrfs_commit_transaction(trans, root);
3261                         repaired++;
3262                 }
3263
3264                 if (!delete && (backref->found_inode_ref &&
3265                                 backref->found_dir_index &&
3266                                 backref->found_dir_item &&
3267                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
3268                                 !rec->found_inode_item)) {
3269                         ret = create_inode_item(root, rec, 0);
3270                         if (ret)
3271                                 break;
3272                         repaired++;
3273                 }
3274
3275         }
3276         return ret ? ret : repaired;
3277 }
3278
3279 /*
3280  * To determine the file type for nlink/inode_item repair
3281  *
3282  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
3283  * Return -ENOENT if file type is not found.
3284  */
3285 static int find_file_type(struct inode_record *rec, u8 *type)
3286 {
3287         struct inode_backref *backref;
3288
3289         /* For inode item recovered case */
3290         if (rec->found_inode_item) {
3291                 *type = imode_to_type(rec->imode);
3292                 return 0;
3293         }
3294
3295         list_for_each_entry(backref, &rec->backrefs, list) {
3296                 if (backref->found_dir_index || backref->found_dir_item) {
3297                         *type = backref->filetype;
3298                         return 0;
3299                 }
3300         }
3301         return -ENOENT;
3302 }
3303
3304 /*
3305  * To determine the file name for nlink repair
3306  *
3307  * Return 0 if file name is found, set name and namelen.
3308  * Return -ENOENT if file name is not found.
3309  */
3310 static int find_file_name(struct inode_record *rec,
3311                           char *name, int *namelen)
3312 {
3313         struct inode_backref *backref;
3314
3315         list_for_each_entry(backref, &rec->backrefs, list) {
3316                 if (backref->found_dir_index || backref->found_dir_item ||
3317                     backref->found_inode_ref) {
3318                         memcpy(name, backref->name, backref->namelen);
3319                         *namelen = backref->namelen;
3320                         return 0;
3321                 }
3322         }
3323         return -ENOENT;
3324 }
3325
3326 /* Reset the nlink of the inode to the correct one */
3327 static int reset_nlink(struct btrfs_trans_handle *trans,
3328                        struct btrfs_root *root,
3329                        struct btrfs_path *path,
3330                        struct inode_record *rec)
3331 {
3332         struct inode_backref *backref;
3333         struct inode_backref *tmp;
3334         struct btrfs_key key;
3335         struct btrfs_inode_item *inode_item;
3336         int ret = 0;
3337
3338         /* We don't believe this either, reset it and iterate backref */
3339         rec->found_link = 0;
3340
3341         /* Remove all backref including the valid ones */
3342         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3343                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3344                                    backref->index, backref->name,
3345                                    backref->namelen, 0);
3346                 if (ret < 0)
3347                         goto out;
3348
3349                 /* remove invalid backref, so it won't be added back */
3350                 if (!(backref->found_dir_index &&
3351                       backref->found_dir_item &&
3352                       backref->found_inode_ref)) {
3353                         list_del(&backref->list);
3354                         free(backref);
3355                 } else {
3356                         rec->found_link++;
3357                 }
3358         }
3359
3360         /* Set nlink to 0 */
3361         key.objectid = rec->ino;
3362         key.type = BTRFS_INODE_ITEM_KEY;
3363         key.offset = 0;
3364         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3365         if (ret < 0)
3366                 goto out;
3367         if (ret > 0) {
3368                 ret = -ENOENT;
3369                 goto out;
3370         }
3371         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3372                                     struct btrfs_inode_item);
3373         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3374         btrfs_mark_buffer_dirty(path->nodes[0]);
3375         btrfs_release_path(path);
3376
3377         /*
3378          * Add back valid inode_ref/dir_item/dir_index,
3379          * add_link() will handle the nlink inc, so new nlink must be correct
3380          */
3381         list_for_each_entry(backref, &rec->backrefs, list) {
3382                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3383                                      backref->name, backref->namelen,
3384                                      backref->filetype, &backref->index, 1, 0);
3385                 if (ret < 0)
3386                         goto out;
3387         }
3388 out:
3389         btrfs_release_path(path);
3390         return ret;
3391 }
3392
3393 static int get_highest_inode(struct btrfs_trans_handle *trans,
3394                                 struct btrfs_root *root,
3395                                 struct btrfs_path *path,
3396                                 u64 *highest_ino)
3397 {
3398         struct btrfs_key key, found_key;
3399         int ret;
3400
3401         btrfs_init_path(path);
3402         key.objectid = BTRFS_LAST_FREE_OBJECTID;
3403         key.offset = -1;
3404         key.type = BTRFS_INODE_ITEM_KEY;
3405         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3406         if (ret == 1) {
3407                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3408                                 path->slots[0] - 1);
3409                 *highest_ino = found_key.objectid;
3410                 ret = 0;
3411         }
3412         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3413                 ret = -EOVERFLOW;
3414         btrfs_release_path(path);
3415         return ret;
3416 }
3417
3418 /*
3419  * Link inode to dir 'lost+found'. Increase @ref_count.
3420  *
3421  * Returns 0 means success.
3422  * Returns <0 means failure.
3423  */
3424 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3425                                    struct btrfs_root *root,
3426                                    struct btrfs_path *path,
3427                                    u64 ino, char *namebuf, u32 name_len,
3428                                    u8 filetype, u64 *ref_count)
3429 {
3430         char *dir_name = "lost+found";
3431         u64 lost_found_ino;
3432         int ret;
3433         u32 mode = 0700;
3434
3435         btrfs_release_path(path);
3436         ret = get_highest_inode(trans, root, path, &lost_found_ino);
3437         if (ret < 0)
3438                 goto out;
3439         lost_found_ino++;
3440
3441         ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3442                           BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3443                           mode);
3444         if (ret < 0) {
3445                 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3446                 goto out;
3447         }
3448         ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3449                              namebuf, name_len, filetype, NULL, 1, 0);
3450         /*
3451          * Add ".INO" suffix several times to handle case where
3452          * "FILENAME.INO" is already taken by another file.
3453          */
3454         while (ret == -EEXIST) {
3455                 /*
3456                  * Conflicting file name, add ".INO" as suffix * +1 for '.'
3457                  */
3458                 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3459                         ret = -EFBIG;
3460                         goto out;
3461                 }
3462                 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3463                          ".%llu", ino);
3464                 name_len += count_digits(ino) + 1;
3465                 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3466                                      name_len, filetype, NULL, 1, 0);
3467         }
3468         if (ret < 0) {
3469                 error("failed to link the inode %llu to %s dir: %s",
3470                       ino, dir_name, strerror(-ret));
3471                 goto out;
3472         }
3473
3474         ++*ref_count;
3475         printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3476                name_len, namebuf, dir_name);
3477 out:
3478         btrfs_release_path(path);
3479         if (ret)
3480                 error("failed to move file '%.*s' to '%s' dir", name_len,
3481                                 namebuf, dir_name);
3482         return ret;
3483 }
3484
3485 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3486                                struct btrfs_root *root,
3487                                struct btrfs_path *path,
3488                                struct inode_record *rec)
3489 {
3490         char namebuf[BTRFS_NAME_LEN] = {0};
3491         u8 type = 0;
3492         int namelen = 0;
3493         int name_recovered = 0;
3494         int type_recovered = 0;
3495         int ret = 0;
3496
3497         /*
3498          * Get file name and type first before these invalid inode ref
3499          * are deleted by remove_all_invalid_backref()
3500          */
3501         name_recovered = !find_file_name(rec, namebuf, &namelen);
3502         type_recovered = !find_file_type(rec, &type);
3503
3504         if (!name_recovered) {
3505                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3506                        rec->ino, rec->ino);
3507                 namelen = count_digits(rec->ino);
3508                 sprintf(namebuf, "%llu", rec->ino);
3509                 name_recovered = 1;
3510         }
3511         if (!type_recovered) {
3512                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3513                        rec->ino);
3514                 type = BTRFS_FT_REG_FILE;
3515                 type_recovered = 1;
3516         }
3517
3518         ret = reset_nlink(trans, root, path, rec);
3519         if (ret < 0) {
3520                 fprintf(stderr,
3521                         "Failed to reset nlink for inode %llu: %s\n",
3522                         rec->ino, strerror(-ret));
3523                 goto out;
3524         }
3525
3526         if (rec->found_link == 0) {
3527                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3528                                               namebuf, namelen, type,
3529                                               (u64 *)&rec->found_link);
3530                 if (ret)
3531                         goto out;
3532         }
3533         printf("Fixed the nlink of inode %llu\n", rec->ino);
3534 out:
3535         /*
3536          * Clear the flag anyway, or we will loop forever for the same inode
3537          * as it will not be removed from the bad inode list and the dead loop
3538          * happens.
3539          */
3540         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3541         btrfs_release_path(path);
3542         return ret;
3543 }
3544
3545 /*
3546  * Check if there is any normal(reg or prealloc) file extent for given
3547  * ino.
3548  * This is used to determine the file type when neither its dir_index/item or
3549  * inode_item exists.
3550  *
3551  * This will *NOT* report error, if any error happens, just consider it does
3552  * not have any normal file extent.
3553  */
3554 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3555 {
3556         struct btrfs_path path;
3557         struct btrfs_key key;
3558         struct btrfs_key found_key;
3559         struct btrfs_file_extent_item *fi;
3560         u8 type;
3561         int ret = 0;
3562
3563         btrfs_init_path(&path);
3564         key.objectid = ino;
3565         key.type = BTRFS_EXTENT_DATA_KEY;
3566         key.offset = 0;
3567
3568         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3569         if (ret < 0) {
3570                 ret = 0;
3571                 goto out;
3572         }
3573         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3574                 ret = btrfs_next_leaf(root, &path);
3575                 if (ret) {
3576                         ret = 0;
3577                         goto out;
3578                 }
3579         }
3580         while (1) {
3581                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3582                                       path.slots[0]);
3583                 if (found_key.objectid != ino ||
3584                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3585                         break;
3586                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3587                                     struct btrfs_file_extent_item);
3588                 type = btrfs_file_extent_type(path.nodes[0], fi);
3589                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3590                         ret = 1;
3591                         goto out;
3592                 }
3593         }
3594 out:
3595         btrfs_release_path(&path);
3596         return ret;
3597 }
3598
3599 static u32 btrfs_type_to_imode(u8 type)
3600 {
3601         static u32 imode_by_btrfs_type[] = {
3602                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3603                 [BTRFS_FT_DIR]          = S_IFDIR,
3604                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3605                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3606                 [BTRFS_FT_FIFO]         = S_IFIFO,
3607                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3608                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3609         };
3610
3611         return imode_by_btrfs_type[(type)];
3612 }
3613
3614 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3615                                 struct btrfs_root *root,
3616                                 struct btrfs_path *path,
3617                                 struct inode_record *rec)
3618 {
3619         u8 filetype;
3620         u32 mode = 0700;
3621         int type_recovered = 0;
3622         int ret = 0;
3623
3624         printf("Trying to rebuild inode:%llu\n", rec->ino);
3625
3626         type_recovered = !find_file_type(rec, &filetype);
3627
3628         /*
3629          * Try to determine inode type if type not found.
3630          *
3631          * For found regular file extent, it must be FILE.
3632          * For found dir_item/index, it must be DIR.
3633          *
3634          * For undetermined one, use FILE as fallback.
3635          *
3636          * TODO:
3637          * 1. If found backref(inode_index/item is already handled) to it,
3638          *    it must be DIR.
3639          *    Need new inode-inode ref structure to allow search for that.
3640          */
3641         if (!type_recovered) {
3642                 if (rec->found_file_extent &&
3643                     find_normal_file_extent(root, rec->ino)) {
3644                         type_recovered = 1;
3645                         filetype = BTRFS_FT_REG_FILE;
3646                 } else if (rec->found_dir_item) {
3647                         type_recovered = 1;
3648                         filetype = BTRFS_FT_DIR;
3649                 } else if (!list_empty(&rec->orphan_extents)) {
3650                         type_recovered = 1;
3651                         filetype = BTRFS_FT_REG_FILE;
3652                 } else{
3653                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3654                                rec->ino);
3655                         type_recovered = 1;
3656                         filetype = BTRFS_FT_REG_FILE;
3657                 }
3658         }
3659
3660         ret = btrfs_new_inode(trans, root, rec->ino,
3661                               mode | btrfs_type_to_imode(filetype));
3662         if (ret < 0)
3663                 goto out;
3664
3665         /*
3666          * Here inode rebuild is done, we only rebuild the inode item,
3667          * don't repair the nlink(like move to lost+found).
3668          * That is the job of nlink repair.
3669          *
3670          * We just fill the record and return
3671          */
3672         rec->found_dir_item = 1;
3673         rec->imode = mode | btrfs_type_to_imode(filetype);
3674         rec->nlink = 0;
3675         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3676         /* Ensure the inode_nlinks repair function will be called */
3677         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3678 out:
3679         return ret;
3680 }
3681
3682 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3683                                       struct btrfs_root *root,
3684                                       struct btrfs_path *path,
3685                                       struct inode_record *rec)
3686 {
3687         struct orphan_data_extent *orphan;
3688         struct orphan_data_extent *tmp;
3689         int ret = 0;
3690
3691         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3692                 /*
3693                  * Check for conflicting file extents
3694                  *
3695                  * Here we don't know whether the extents is compressed or not,
3696                  * so we can only assume it not compressed nor data offset,
3697                  * and use its disk_len as extent length.
3698                  */
3699                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3700                                        orphan->offset, orphan->disk_len, 0);
3701                 btrfs_release_path(path);
3702                 if (ret < 0)
3703                         goto out;
3704                 if (!ret) {
3705                         fprintf(stderr,
3706                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3707                                 orphan->disk_bytenr, orphan->disk_len);
3708                         ret = btrfs_free_extent(trans,
3709                                         root->fs_info->extent_root,
3710                                         orphan->disk_bytenr, orphan->disk_len,
3711                                         0, root->objectid, orphan->objectid,
3712                                         orphan->offset);
3713                         if (ret < 0)
3714                                 goto out;
3715                 }
3716                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3717                                 orphan->offset, orphan->disk_bytenr,
3718                                 orphan->disk_len, orphan->disk_len);
3719                 if (ret < 0)
3720                         goto out;
3721
3722                 /* Update file size info */
3723                 rec->found_size += orphan->disk_len;
3724                 if (rec->found_size == rec->nbytes)
3725                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3726
3727                 /* Update the file extent hole info too */
3728                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3729                                            orphan->disk_len);
3730                 if (ret < 0)
3731                         goto out;
3732                 if (RB_EMPTY_ROOT(&rec->holes))
3733                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3734
3735                 list_del(&orphan->list);
3736                 free(orphan);
3737         }
3738         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3739 out:
3740         return ret;
3741 }
3742
3743 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3744                                         struct btrfs_root *root,
3745                                         struct btrfs_path *path,
3746                                         struct inode_record *rec)
3747 {
3748         struct rb_node *node;
3749         struct file_extent_hole *hole;
3750         int found = 0;
3751         int ret = 0;
3752
3753         node = rb_first(&rec->holes);
3754
3755         while (node) {
3756                 found = 1;
3757                 hole = rb_entry(node, struct file_extent_hole, node);
3758                 ret = btrfs_punch_hole(trans, root, rec->ino,
3759                                        hole->start, hole->len);
3760                 if (ret < 0)
3761                         goto out;
3762                 ret = del_file_extent_hole(&rec->holes, hole->start,
3763                                            hole->len);
3764                 if (ret < 0)
3765                         goto out;
3766                 if (RB_EMPTY_ROOT(&rec->holes))
3767                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3768                 node = rb_first(&rec->holes);
3769         }
3770         /* special case for a file losing all its file extent */
3771         if (!found) {
3772                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3773                                        round_up(rec->isize,
3774                                                 root->fs_info->sectorsize));
3775                 if (ret < 0)
3776                         goto out;
3777         }
3778         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3779                rec->ino, root->objectid);
3780 out:
3781         return ret;
3782 }
3783
3784 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3785 {
3786         struct btrfs_trans_handle *trans;
3787         struct btrfs_path path;
3788         int ret = 0;
3789
3790         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3791                              I_ERR_NO_ORPHAN_ITEM |
3792                              I_ERR_LINK_COUNT_WRONG |
3793                              I_ERR_NO_INODE_ITEM |
3794                              I_ERR_FILE_EXTENT_ORPHAN |
3795                              I_ERR_FILE_EXTENT_DISCOUNT|
3796                              I_ERR_FILE_NBYTES_WRONG)))
3797                 return rec->errors;
3798
3799         /*
3800          * For nlink repair, it may create a dir and add link, so
3801          * 2 for parent(256)'s dir_index and dir_item
3802          * 2 for lost+found dir's inode_item and inode_ref
3803          * 1 for the new inode_ref of the file
3804          * 2 for lost+found dir's dir_index and dir_item for the file
3805          */
3806         trans = btrfs_start_transaction(root, 7);
3807         if (IS_ERR(trans))
3808                 return PTR_ERR(trans);
3809
3810         btrfs_init_path(&path);
3811         if (rec->errors & I_ERR_NO_INODE_ITEM)
3812                 ret = repair_inode_no_item(trans, root, &path, rec);
3813         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3814                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3815         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3816                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3817         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3818                 ret = repair_inode_isize(trans, root, &path, rec);
3819         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3820                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3821         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3822                 ret = repair_inode_nlinks(trans, root, &path, rec);
3823         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3824                 ret = repair_inode_nbytes(trans, root, &path, rec);
3825         btrfs_commit_transaction(trans, root);
3826         btrfs_release_path(&path);
3827         return ret;
3828 }
3829
3830 static int check_inode_recs(struct btrfs_root *root,
3831                             struct cache_tree *inode_cache)
3832 {
3833         struct cache_extent *cache;
3834         struct ptr_node *node;
3835         struct inode_record *rec;
3836         struct inode_backref *backref;
3837         int stage = 0;
3838         int ret = 0;
3839         int err = 0;
3840         u64 error = 0;
3841         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3842
3843         if (btrfs_root_refs(&root->root_item) == 0) {
3844                 if (!cache_tree_empty(inode_cache))
3845                         fprintf(stderr, "warning line %d\n", __LINE__);
3846                 return 0;
3847         }
3848
3849         /*
3850          * We need to repair backrefs first because we could change some of the
3851          * errors in the inode recs.
3852          *
3853          * We also need to go through and delete invalid backrefs first and then
3854          * add the correct ones second.  We do this because we may get EEXIST
3855          * when adding back the correct index because we hadn't yet deleted the
3856          * invalid index.
3857          *
3858          * For example, if we were missing a dir index then the directories
3859          * isize would be wrong, so if we fixed the isize to what we thought it
3860          * would be and then fixed the backref we'd still have a invalid fs, so
3861          * we need to add back the dir index and then check to see if the isize
3862          * is still wrong.
3863          */
3864         while (stage < 3) {
3865                 stage++;
3866                 if (stage == 3 && !err)
3867                         break;
3868
3869                 cache = search_cache_extent(inode_cache, 0);
3870                 while (repair && cache) {
3871                         node = container_of(cache, struct ptr_node, cache);
3872                         rec = node->data;
3873                         cache = next_cache_extent(cache);
3874
3875                         /* Need to free everything up and rescan */
3876                         if (stage == 3) {
3877                                 remove_cache_extent(inode_cache, &node->cache);
3878                                 free(node);
3879                                 free_inode_rec(rec);
3880                                 continue;
3881                         }
3882
3883                         if (list_empty(&rec->backrefs))
3884                                 continue;
3885
3886                         ret = repair_inode_backrefs(root, rec, inode_cache,
3887                                                     stage == 1);
3888                         if (ret < 0) {
3889                                 err = ret;
3890                                 stage = 2;
3891                                 break;
3892                         } if (ret > 0) {
3893                                 err = -EAGAIN;
3894                         }
3895                 }
3896         }
3897         if (err)
3898                 return err;
3899
3900         rec = get_inode_rec(inode_cache, root_dirid, 0);
3901         BUG_ON(IS_ERR(rec));
3902         if (rec) {
3903                 ret = check_root_dir(rec);
3904                 if (ret) {
3905                         fprintf(stderr, "root %llu root dir %llu error\n",
3906                                 (unsigned long long)root->root_key.objectid,
3907                                 (unsigned long long)root_dirid);
3908                         print_inode_error(root, rec);
3909                         error++;
3910                 }
3911         } else {
3912                 if (repair) {
3913                         struct btrfs_trans_handle *trans;
3914
3915                         trans = btrfs_start_transaction(root, 1);
3916                         if (IS_ERR(trans)) {
3917                                 err = PTR_ERR(trans);
3918                                 return err;
3919                         }
3920
3921                         fprintf(stderr,
3922                                 "root %llu missing its root dir, recreating\n",
3923                                 (unsigned long long)root->objectid);
3924
3925                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3926                         BUG_ON(ret);
3927
3928                         btrfs_commit_transaction(trans, root);
3929                         return -EAGAIN;
3930                 }
3931
3932                 fprintf(stderr, "root %llu root dir %llu not found\n",
3933                         (unsigned long long)root->root_key.objectid,
3934                         (unsigned long long)root_dirid);
3935         }
3936
3937         while (1) {
3938                 cache = search_cache_extent(inode_cache, 0);
3939                 if (!cache)
3940                         break;
3941                 node = container_of(cache, struct ptr_node, cache);
3942                 rec = node->data;
3943                 remove_cache_extent(inode_cache, &node->cache);
3944                 free(node);
3945                 if (rec->ino == root_dirid ||
3946                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3947                         free_inode_rec(rec);
3948                         continue;
3949                 }
3950
3951                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3952                         ret = check_orphan_item(root, rec->ino);
3953                         if (ret == 0)
3954                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3955                         if (can_free_inode_rec(rec)) {
3956                                 free_inode_rec(rec);
3957                                 continue;
3958                         }
3959                 }
3960
3961                 if (!rec->found_inode_item)
3962                         rec->errors |= I_ERR_NO_INODE_ITEM;
3963                 if (rec->found_link != rec->nlink)
3964                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3965                 if (repair) {
3966                         ret = try_repair_inode(root, rec);
3967                         if (ret == 0 && can_free_inode_rec(rec)) {
3968                                 free_inode_rec(rec);
3969                                 continue;
3970                         }
3971                         ret = 0;
3972                 }
3973
3974                 if (!(repair && ret == 0))
3975                         error++;
3976                 print_inode_error(root, rec);
3977                 list_for_each_entry(backref, &rec->backrefs, list) {
3978                         if (!backref->found_dir_item)
3979                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3980                         if (!backref->found_dir_index)
3981                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3982                         if (!backref->found_inode_ref)
3983                                 backref->errors |= REF_ERR_NO_INODE_REF;
3984                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3985                                 " namelen %u name %s filetype %d errors %x",
3986                                 (unsigned long long)backref->dir,
3987                                 (unsigned long long)backref->index,
3988                                 backref->namelen, backref->name,
3989                                 backref->filetype, backref->errors);
3990                         print_ref_error(backref->errors);
3991                 }
3992                 free_inode_rec(rec);
3993         }
3994         return (error > 0) ? -1 : 0;
3995 }
3996
3997 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3998                                         u64 objectid)
3999 {
4000         struct cache_extent *cache;
4001         struct root_record *rec = NULL;
4002         int ret;
4003
4004         cache = lookup_cache_extent(root_cache, objectid, 1);
4005         if (cache) {
4006                 rec = container_of(cache, struct root_record, cache);
4007         } else {
4008                 rec = calloc(1, sizeof(*rec));
4009                 if (!rec)
4010                         return ERR_PTR(-ENOMEM);
4011                 rec->objectid = objectid;
4012                 INIT_LIST_HEAD(&rec->backrefs);
4013                 rec->cache.start = objectid;
4014                 rec->cache.size = 1;
4015
4016                 ret = insert_cache_extent(root_cache, &rec->cache);
4017                 if (ret)
4018                         return ERR_PTR(-EEXIST);
4019         }
4020         return rec;
4021 }
4022
4023 static struct root_backref *get_root_backref(struct root_record *rec,
4024                                              u64 ref_root, u64 dir, u64 index,
4025                                              const char *name, int namelen)
4026 {
4027         struct root_backref *backref;
4028
4029         list_for_each_entry(backref, &rec->backrefs, list) {
4030                 if (backref->ref_root != ref_root || backref->dir != dir ||
4031                     backref->namelen != namelen)
4032                         continue;
4033                 if (memcmp(name, backref->name, namelen))
4034                         continue;
4035                 return backref;
4036         }
4037
4038         backref = calloc(1, sizeof(*backref) + namelen + 1);
4039         if (!backref)
4040                 return NULL;
4041         backref->ref_root = ref_root;
4042         backref->dir = dir;
4043         backref->index = index;
4044         backref->namelen = namelen;
4045         memcpy(backref->name, name, namelen);
4046         backref->name[namelen] = '\0';
4047         list_add_tail(&backref->list, &rec->backrefs);
4048         return backref;
4049 }
4050
4051 static void free_root_record(struct cache_extent *cache)
4052 {
4053         struct root_record *rec;
4054         struct root_backref *backref;
4055
4056         rec = container_of(cache, struct root_record, cache);
4057         while (!list_empty(&rec->backrefs)) {
4058                 backref = to_root_backref(rec->backrefs.next);
4059                 list_del(&backref->list);
4060                 free(backref);
4061         }
4062
4063         free(rec);
4064 }
4065
4066 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
4067
4068 static int add_root_backref(struct cache_tree *root_cache,
4069                             u64 root_id, u64 ref_root, u64 dir, u64 index,
4070                             const char *name, int namelen,
4071                             int item_type, int errors)
4072 {
4073         struct root_record *rec;
4074         struct root_backref *backref;
4075
4076         rec = get_root_rec(root_cache, root_id);
4077         BUG_ON(IS_ERR(rec));
4078         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
4079         BUG_ON(!backref);
4080
4081         backref->errors |= errors;
4082
4083         if (item_type != BTRFS_DIR_ITEM_KEY) {
4084                 if (backref->found_dir_index || backref->found_back_ref ||
4085                     backref->found_forward_ref) {
4086                         if (backref->index != index)
4087                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
4088                 } else {
4089                         backref->index = index;
4090                 }
4091         }
4092
4093         if (item_type == BTRFS_DIR_ITEM_KEY) {
4094                 if (backref->found_forward_ref)
4095                         rec->found_ref++;
4096                 backref->found_dir_item = 1;
4097         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
4098                 backref->found_dir_index = 1;
4099         } else if (item_type == BTRFS_ROOT_REF_KEY) {
4100                 if (backref->found_forward_ref)
4101                         backref->errors |= REF_ERR_DUP_ROOT_REF;
4102                 else if (backref->found_dir_item)
4103                         rec->found_ref++;
4104                 backref->found_forward_ref = 1;
4105         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
4106                 if (backref->found_back_ref)
4107                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
4108                 backref->found_back_ref = 1;
4109         } else {
4110                 BUG_ON(1);
4111         }
4112
4113         if (backref->found_forward_ref && backref->found_dir_item)
4114                 backref->reachable = 1;
4115         return 0;
4116 }
4117
4118 static int merge_root_recs(struct btrfs_root *root,
4119                            struct cache_tree *src_cache,
4120                            struct cache_tree *dst_cache)
4121 {
4122         struct cache_extent *cache;
4123         struct ptr_node *node;
4124         struct inode_record *rec;
4125         struct inode_backref *backref;
4126         int ret = 0;
4127
4128         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4129                 free_inode_recs_tree(src_cache);
4130                 return 0;
4131         }
4132
4133         while (1) {
4134                 cache = search_cache_extent(src_cache, 0);
4135                 if (!cache)
4136                         break;
4137                 node = container_of(cache, struct ptr_node, cache);
4138                 rec = node->data;
4139                 remove_cache_extent(src_cache, &node->cache);
4140                 free(node);
4141
4142                 ret = is_child_root(root, root->objectid, rec->ino);
4143                 if (ret < 0)
4144                         break;
4145                 else if (ret == 0)
4146                         goto skip;
4147
4148                 list_for_each_entry(backref, &rec->backrefs, list) {
4149                         BUG_ON(backref->found_inode_ref);
4150                         if (backref->found_dir_item)
4151                                 add_root_backref(dst_cache, rec->ino,
4152                                         root->root_key.objectid, backref->dir,
4153                                         backref->index, backref->name,
4154                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
4155                                         backref->errors);
4156                         if (backref->found_dir_index)
4157                                 add_root_backref(dst_cache, rec->ino,
4158                                         root->root_key.objectid, backref->dir,
4159                                         backref->index, backref->name,
4160                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
4161                                         backref->errors);
4162                 }
4163 skip:
4164                 free_inode_rec(rec);
4165         }
4166         if (ret < 0)
4167                 return ret;
4168         return 0;
4169 }
4170
4171 static int check_root_refs(struct btrfs_root *root,
4172                            struct cache_tree *root_cache)
4173 {
4174         struct root_record *rec;
4175         struct root_record *ref_root;
4176         struct root_backref *backref;
4177         struct cache_extent *cache;
4178         int loop = 1;
4179         int ret;
4180         int error;
4181         int errors = 0;
4182
4183         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
4184         BUG_ON(IS_ERR(rec));
4185         rec->found_ref = 1;
4186
4187         /* fixme: this can not detect circular references */
4188         while (loop) {
4189                 loop = 0;
4190                 cache = search_cache_extent(root_cache, 0);
4191                 while (1) {
4192                         if (!cache)
4193                                 break;
4194                         rec = container_of(cache, struct root_record, cache);
4195                         cache = next_cache_extent(cache);
4196
4197                         if (rec->found_ref == 0)
4198                                 continue;
4199
4200                         list_for_each_entry(backref, &rec->backrefs, list) {
4201                                 if (!backref->reachable)
4202                                         continue;
4203
4204                                 ref_root = get_root_rec(root_cache,
4205                                                         backref->ref_root);
4206                                 BUG_ON(IS_ERR(ref_root));
4207                                 if (ref_root->found_ref > 0)
4208                                         continue;
4209
4210                                 backref->reachable = 0;
4211                                 rec->found_ref--;
4212                                 if (rec->found_ref == 0)
4213                                         loop = 1;
4214                         }
4215                 }
4216         }
4217
4218         cache = search_cache_extent(root_cache, 0);
4219         while (1) {
4220                 if (!cache)
4221                         break;
4222                 rec = container_of(cache, struct root_record, cache);
4223                 cache = next_cache_extent(cache);
4224
4225                 if (rec->found_ref == 0 &&
4226                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
4227                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
4228                         ret = check_orphan_item(root->fs_info->tree_root,
4229                                                 rec->objectid);
4230                         if (ret == 0)
4231                                 continue;
4232
4233                         /*
4234                          * If we don't have a root item then we likely just have
4235                          * a dir item in a snapshot for this root but no actual
4236                          * ref key or anything so it's meaningless.
4237                          */
4238                         if (!rec->found_root_item)
4239                                 continue;
4240                         errors++;
4241                         fprintf(stderr, "fs tree %llu not referenced\n",
4242                                 (unsigned long long)rec->objectid);
4243                 }
4244
4245                 error = 0;
4246                 if (rec->found_ref > 0 && !rec->found_root_item)
4247                         error = 1;
4248                 list_for_each_entry(backref, &rec->backrefs, list) {
4249                         if (!backref->found_dir_item)
4250                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
4251                         if (!backref->found_dir_index)
4252                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
4253                         if (!backref->found_back_ref)
4254                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
4255                         if (!backref->found_forward_ref)
4256                                 backref->errors |= REF_ERR_NO_ROOT_REF;
4257                         if (backref->reachable && backref->errors)
4258                                 error = 1;
4259                 }
4260                 if (!error)
4261                         continue;
4262
4263                 errors++;
4264                 fprintf(stderr, "fs tree %llu refs %u %s\n",
4265                         (unsigned long long)rec->objectid, rec->found_ref,
4266                          rec->found_root_item ? "" : "not found");
4267
4268                 list_for_each_entry(backref, &rec->backrefs, list) {
4269                         if (!backref->reachable)
4270                                 continue;
4271                         if (!backref->errors && rec->found_root_item)
4272                                 continue;
4273                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
4274                                 " index %llu namelen %u name %s errors %x\n",
4275                                 (unsigned long long)backref->ref_root,
4276                                 (unsigned long long)backref->dir,
4277                                 (unsigned long long)backref->index,
4278                                 backref->namelen, backref->name,
4279                                 backref->errors);
4280                         print_ref_error(backref->errors);
4281                 }
4282         }
4283         return errors > 0 ? 1 : 0;
4284 }
4285
4286 static int process_root_ref(struct extent_buffer *eb, int slot,
4287                             struct btrfs_key *key,
4288                             struct cache_tree *root_cache)
4289 {
4290         u64 dirid;
4291         u64 index;
4292         u32 len;
4293         u32 name_len;
4294         struct btrfs_root_ref *ref;
4295         char namebuf[BTRFS_NAME_LEN];
4296         int error;
4297
4298         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
4299
4300         dirid = btrfs_root_ref_dirid(eb, ref);
4301         index = btrfs_root_ref_sequence(eb, ref);
4302         name_len = btrfs_root_ref_name_len(eb, ref);
4303
4304         if (name_len <= BTRFS_NAME_LEN) {
4305                 len = name_len;
4306                 error = 0;
4307         } else {
4308                 len = BTRFS_NAME_LEN;
4309                 error = REF_ERR_NAME_TOO_LONG;
4310         }
4311         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
4312
4313         if (key->type == BTRFS_ROOT_REF_KEY) {
4314                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
4315                                  index, namebuf, len, key->type, error);
4316         } else {
4317                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
4318                                  index, namebuf, len, key->type, error);
4319         }
4320         return 0;
4321 }
4322
4323 static void free_corrupt_block(struct cache_extent *cache)
4324 {
4325         struct btrfs_corrupt_block *corrupt;
4326
4327         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4328         free(corrupt);
4329 }
4330
4331 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4332
4333 /*
4334  * Repair the btree of the given root.
4335  *
4336  * The fix is to remove the node key in corrupt_blocks cache_tree.
4337  * and rebalance the tree.
4338  * After the fix, the btree should be writeable.
4339  */
4340 static int repair_btree(struct btrfs_root *root,
4341                         struct cache_tree *corrupt_blocks)
4342 {
4343         struct btrfs_trans_handle *trans;
4344         struct btrfs_path path;
4345         struct btrfs_corrupt_block *corrupt;
4346         struct cache_extent *cache;
4347         struct btrfs_key key;
4348         u64 offset;
4349         int level;
4350         int ret = 0;
4351
4352         if (cache_tree_empty(corrupt_blocks))
4353                 return 0;
4354
4355         trans = btrfs_start_transaction(root, 1);
4356         if (IS_ERR(trans)) {
4357                 ret = PTR_ERR(trans);
4358                 fprintf(stderr, "Error starting transaction: %s\n",
4359                         strerror(-ret));
4360                 return ret;
4361         }
4362         btrfs_init_path(&path);
4363         cache = first_cache_extent(corrupt_blocks);
4364         while (cache) {
4365                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4366                                        cache);
4367                 level = corrupt->level;
4368                 path.lowest_level = level;
4369                 key.objectid = corrupt->key.objectid;
4370                 key.type = corrupt->key.type;
4371                 key.offset = corrupt->key.offset;
4372
4373                 /*
4374                  * Here we don't want to do any tree balance, since it may
4375                  * cause a balance with corrupted brother leaf/node,
4376                  * so ins_len set to 0 here.
4377                  * Balance will be done after all corrupt node/leaf is deleted.
4378                  */
4379                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4380                 if (ret < 0)
4381                         goto out;
4382                 offset = btrfs_node_blockptr(path.nodes[level],
4383                                              path.slots[level]);
4384
4385                 /* Remove the ptr */
4386                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4387                 if (ret < 0)
4388                         goto out;
4389                 /*
4390                  * Remove the corresponding extent
4391                  * return value is not concerned.
4392                  */
4393                 btrfs_release_path(&path);
4394                 ret = btrfs_free_extent(trans, root, offset,
4395                                 root->fs_info->nodesize, 0,
4396                                 root->root_key.objectid, level - 1, 0);
4397                 cache = next_cache_extent(cache);
4398         }
4399
4400         /* Balance the btree using btrfs_search_slot() */
4401         cache = first_cache_extent(corrupt_blocks);
4402         while (cache) {
4403                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4404                                        cache);
4405                 memcpy(&key, &corrupt->key, sizeof(key));
4406                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4407                 if (ret < 0)
4408                         goto out;
4409                 /* return will always >0 since it won't find the item */
4410                 ret = 0;
4411                 btrfs_release_path(&path);
4412                 cache = next_cache_extent(cache);
4413         }
4414 out:
4415         btrfs_commit_transaction(trans, root);
4416         btrfs_release_path(&path);
4417         return ret;
4418 }
4419
4420 static int check_fs_root(struct btrfs_root *root,
4421                          struct cache_tree *root_cache,
4422                          struct walk_control *wc)
4423 {
4424         int ret = 0;
4425         int err = 0;
4426         int wret;
4427         int level;
4428         struct btrfs_path path;
4429         struct shared_node root_node;
4430         struct root_record *rec;
4431         struct btrfs_root_item *root_item = &root->root_item;
4432         struct cache_tree corrupt_blocks;
4433         struct orphan_data_extent *orphan;
4434         struct orphan_data_extent *tmp;
4435         enum btrfs_tree_block_status status;
4436         struct node_refs nrefs;
4437
4438         /*
4439          * Reuse the corrupt_block cache tree to record corrupted tree block
4440          *
4441          * Unlike the usage in extent tree check, here we do it in a per
4442          * fs/subvol tree base.
4443          */
4444         cache_tree_init(&corrupt_blocks);
4445         root->fs_info->corrupt_blocks = &corrupt_blocks;
4446
4447         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4448                 rec = get_root_rec(root_cache, root->root_key.objectid);
4449                 BUG_ON(IS_ERR(rec));
4450                 if (btrfs_root_refs(root_item) > 0)
4451                         rec->found_root_item = 1;
4452         }
4453
4454         btrfs_init_path(&path);
4455         memset(&root_node, 0, sizeof(root_node));
4456         cache_tree_init(&root_node.root_cache);
4457         cache_tree_init(&root_node.inode_cache);
4458         memset(&nrefs, 0, sizeof(nrefs));
4459
4460         /* Move the orphan extent record to corresponding inode_record */
4461         list_for_each_entry_safe(orphan, tmp,
4462                                  &root->orphan_data_extents, list) {
4463                 struct inode_record *inode;
4464
4465                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4466                                       1);
4467                 BUG_ON(IS_ERR(inode));
4468                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4469                 list_move(&orphan->list, &inode->orphan_extents);
4470         }
4471
4472         level = btrfs_header_level(root->node);
4473         memset(wc->nodes, 0, sizeof(wc->nodes));
4474         wc->nodes[level] = &root_node;
4475         wc->active_node = level;
4476         wc->root_level = level;
4477
4478         /* We may not have checked the root block, lets do that now */
4479         if (btrfs_is_leaf(root->node))
4480                 status = btrfs_check_leaf(root, NULL, root->node);
4481         else
4482                 status = btrfs_check_node(root, NULL, root->node);
4483         if (status != BTRFS_TREE_BLOCK_CLEAN)
4484                 return -EIO;
4485
4486         if (btrfs_root_refs(root_item) > 0 ||
4487             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4488                 path.nodes[level] = root->node;
4489                 extent_buffer_get(root->node);
4490                 path.slots[level] = 0;
4491         } else {
4492                 struct btrfs_key key;
4493                 struct btrfs_disk_key found_key;
4494
4495                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4496                 level = root_item->drop_level;
4497                 path.lowest_level = level;
4498                 if (level > btrfs_header_level(root->node) ||
4499                     level >= BTRFS_MAX_LEVEL) {
4500                         error("ignoring invalid drop level: %u", level);
4501                         goto skip_walking;
4502                 }
4503                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4504                 if (wret < 0)
4505                         goto skip_walking;
4506                 btrfs_node_key(path.nodes[level], &found_key,
4507                                 path.slots[level]);
4508                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4509                                         sizeof(found_key)));
4510         }
4511
4512         while (1) {
4513                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4514                 if (wret < 0)
4515                         ret = wret;
4516                 if (wret != 0)
4517                         break;
4518
4519                 wret = walk_up_tree(root, &path, wc, &level);
4520                 if (wret < 0)
4521                         ret = wret;
4522                 if (wret != 0)
4523                         break;
4524         }
4525 skip_walking:
4526         btrfs_release_path(&path);
4527
4528         if (!cache_tree_empty(&corrupt_blocks)) {
4529                 struct cache_extent *cache;
4530                 struct btrfs_corrupt_block *corrupt;
4531
4532                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4533                        root->root_key.objectid);
4534                 cache = first_cache_extent(&corrupt_blocks);
4535                 while (cache) {
4536                         corrupt = container_of(cache,
4537                                                struct btrfs_corrupt_block,
4538                                                cache);
4539                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4540                                cache->start, corrupt->level,
4541                                corrupt->key.objectid, corrupt->key.type,
4542                                corrupt->key.offset);
4543                         cache = next_cache_extent(cache);
4544                 }
4545                 if (repair) {
4546                         printf("Try to repair the btree for root %llu\n",
4547                                root->root_key.objectid);
4548                         ret = repair_btree(root, &corrupt_blocks);
4549                         if (ret < 0)
4550                                 fprintf(stderr, "Failed to repair btree: %s\n",
4551                                         strerror(-ret));
4552                         if (!ret)
4553                                 printf("Btree for root %llu is fixed\n",
4554                                        root->root_key.objectid);
4555                 }
4556         }
4557
4558         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4559         if (err < 0)
4560                 ret = err;
4561
4562         if (root_node.current) {
4563                 root_node.current->checked = 1;
4564                 maybe_free_inode_rec(&root_node.inode_cache,
4565                                 root_node.current);
4566         }
4567
4568         err = check_inode_recs(root, &root_node.inode_cache);
4569         if (!ret)
4570                 ret = err;
4571
4572         free_corrupt_blocks_tree(&corrupt_blocks);
4573         root->fs_info->corrupt_blocks = NULL;
4574         free_orphan_data_extents(&root->orphan_data_extents);
4575         return ret;
4576 }
4577
4578 static int fs_root_objectid(u64 objectid)
4579 {
4580         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4581             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4582                 return 1;
4583         return is_fstree(objectid);
4584 }
4585
4586 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4587                           struct cache_tree *root_cache)
4588 {
4589         struct btrfs_path path;
4590         struct btrfs_key key;
4591         struct walk_control wc;
4592         struct extent_buffer *leaf, *tree_node;
4593         struct btrfs_root *tmp_root;
4594         struct btrfs_root *tree_root = fs_info->tree_root;
4595         int ret;
4596         int err = 0;
4597
4598         if (ctx.progress_enabled) {
4599                 ctx.tp = TASK_FS_ROOTS;
4600                 task_start(ctx.info);
4601         }
4602
4603         /*
4604          * Just in case we made any changes to the extent tree that weren't
4605          * reflected into the free space cache yet.
4606          */
4607         if (repair)
4608                 reset_cached_block_groups(fs_info);
4609         memset(&wc, 0, sizeof(wc));
4610         cache_tree_init(&wc.shared);
4611         btrfs_init_path(&path);
4612
4613 again:
4614         key.offset = 0;
4615         key.objectid = 0;
4616         key.type = BTRFS_ROOT_ITEM_KEY;
4617         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4618         if (ret < 0) {
4619                 err = 1;
4620                 goto out;
4621         }
4622         tree_node = tree_root->node;
4623         while (1) {
4624                 if (tree_node != tree_root->node) {
4625                         free_root_recs_tree(root_cache);
4626                         btrfs_release_path(&path);
4627                         goto again;
4628                 }
4629                 leaf = path.nodes[0];
4630                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4631                         ret = btrfs_next_leaf(tree_root, &path);
4632                         if (ret) {
4633                                 if (ret < 0)
4634                                         err = 1;
4635                                 break;
4636                         }
4637                         leaf = path.nodes[0];
4638                 }
4639                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4640                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4641                     fs_root_objectid(key.objectid)) {
4642                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4643                                 tmp_root = btrfs_read_fs_root_no_cache(
4644                                                 fs_info, &key);
4645                         } else {
4646                                 key.offset = (u64)-1;
4647                                 tmp_root = btrfs_read_fs_root(
4648                                                 fs_info, &key);
4649                         }
4650                         if (IS_ERR(tmp_root)) {
4651                                 err = 1;
4652                                 goto next;
4653                         }
4654                         ret = check_fs_root(tmp_root, root_cache, &wc);
4655                         if (ret == -EAGAIN) {
4656                                 free_root_recs_tree(root_cache);
4657                                 btrfs_release_path(&path);
4658                                 goto again;
4659                         }
4660                         if (ret)
4661                                 err = 1;
4662                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4663                                 btrfs_free_fs_root(tmp_root);
4664                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4665                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4666                         process_root_ref(leaf, path.slots[0], &key,
4667                                          root_cache);
4668                 }
4669 next:
4670                 path.slots[0]++;
4671         }
4672 out:
4673         btrfs_release_path(&path);
4674         if (err)
4675                 free_extent_cache_tree(&wc.shared);
4676         if (!cache_tree_empty(&wc.shared))
4677                 fprintf(stderr, "warning line %d\n", __LINE__);
4678
4679         task_stop(ctx.info);
4680
4681         return err;
4682 }
4683
4684 /*
4685  * Find the @index according by @ino and name.
4686  * Notice:time efficiency is O(N)
4687  *
4688  * @root:       the root of the fs/file tree
4689  * @index_ret:  the index as return value
4690  * @namebuf:    the name to match
4691  * @name_len:   the length of name to match
4692  * @file_type:  the file_type of INODE_ITEM to match
4693  *
4694  * Returns 0 if found and *@index_ret will be modified with right value
4695  * Returns< 0 not found and *@index_ret will be (u64)-1
4696  */
4697 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4698                           u64 *index_ret, char *namebuf, u32 name_len,
4699                           u8 file_type)
4700 {
4701         struct btrfs_path path;
4702         struct extent_buffer *node;
4703         struct btrfs_dir_item *di;
4704         struct btrfs_key key;
4705         struct btrfs_key location;
4706         char name[BTRFS_NAME_LEN] = {0};
4707
4708         u32 total;
4709         u32 cur = 0;
4710         u32 len;
4711         u32 data_len;
4712         u8 filetype;
4713         int slot;
4714         int ret;
4715
4716         ASSERT(index_ret);
4717
4718         /* search from the last index */
4719         key.objectid = dirid;
4720         key.offset = (u64)-1;
4721         key.type = BTRFS_DIR_INDEX_KEY;
4722
4723         btrfs_init_path(&path);
4724         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4725         if (ret < 0)
4726                 return ret;
4727
4728 loop:
4729         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4730         if (ret) {
4731                 ret = -ENOENT;
4732                 *index_ret = (64)-1;
4733                 goto out;
4734         }
4735         /* Check whether inode_id/filetype/name match */
4736         node = path.nodes[0];
4737         slot = path.slots[0];
4738         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4739         total = btrfs_item_size_nr(node, slot);
4740         while (cur < total) {
4741                 ret = -ENOENT;
4742                 len = btrfs_dir_name_len(node, di);
4743                 data_len = btrfs_dir_data_len(node, di);
4744
4745                 btrfs_dir_item_key_to_cpu(node, di, &location);
4746                 if (location.objectid != location_id ||
4747                     location.type != BTRFS_INODE_ITEM_KEY ||
4748                     location.offset != 0)
4749                         goto next;
4750
4751                 filetype = btrfs_dir_type(node, di);
4752                 if (file_type != filetype)
4753                         goto next;
4754
4755                 if (len > BTRFS_NAME_LEN)
4756                         len = BTRFS_NAME_LEN;
4757
4758                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4759                 if (len != name_len || strncmp(namebuf, name, len))
4760                         goto next;
4761
4762                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4763                 *index_ret = key.offset;
4764                 ret = 0;
4765                 goto out;
4766 next:
4767                 len += sizeof(*di) + data_len;
4768                 di = (struct btrfs_dir_item *)((char *)di + len);
4769                 cur += len;
4770         }
4771         goto loop;
4772
4773 out:
4774         btrfs_release_path(&path);
4775         return ret;
4776 }
4777
4778 /*
4779  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4780  * INODE_REF/INODE_EXTREF match.
4781  *
4782  * @root:       the root of the fs/file tree
4783  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4784  *              value while find index
4785  * @location_key: location key of the struct btrfs_dir_item to match
4786  * @name:       the name to match
4787  * @namelen:    the length of name
4788  * @file_type:  the type of file to math
4789  *
4790  * Return 0 if no error occurred.
4791  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4792  * DIR_ITEM/DIR_INDEX
4793  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4794  * and DIR_ITEM/DIR_INDEX mismatch
4795  */
4796 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4797                          struct btrfs_key *location_key, char *name,
4798                          u32 namelen, u8 file_type)
4799 {
4800         struct btrfs_path path;
4801         struct extent_buffer *node;
4802         struct btrfs_dir_item *di;
4803         struct btrfs_key location;
4804         char namebuf[BTRFS_NAME_LEN] = {0};
4805         u32 total;
4806         u32 cur = 0;
4807         u32 len;
4808         u32 data_len;
4809         u8 filetype;
4810         int slot;
4811         int ret;
4812
4813         /* get the index by traversing all index */
4814         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4815                 ret = find_dir_index(root, key->objectid,
4816                                      location_key->objectid, &key->offset,
4817                                      name, namelen, file_type);
4818                 if (ret)
4819                         ret = DIR_INDEX_MISSING;
4820                 return ret;
4821         }
4822
4823         btrfs_init_path(&path);
4824         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4825         if (ret) {
4826                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4827                         DIR_INDEX_MISSING;
4828                 goto out;
4829         }
4830
4831         /* Check whether inode_id/filetype/name match */
4832         node = path.nodes[0];
4833         slot = path.slots[0];
4834         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4835         total = btrfs_item_size_nr(node, slot);
4836         while (cur < total) {
4837                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4838                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4839
4840                 len = btrfs_dir_name_len(node, di);
4841                 data_len = btrfs_dir_data_len(node, di);
4842
4843                 btrfs_dir_item_key_to_cpu(node, di, &location);
4844                 if (location.objectid != location_key->objectid ||
4845                     location.type != location_key->type ||
4846                     location.offset != location_key->offset)
4847                         goto next;
4848
4849                 filetype = btrfs_dir_type(node, di);
4850                 if (file_type != filetype)
4851                         goto next;
4852
4853                 if (len > BTRFS_NAME_LEN) {
4854                         len = BTRFS_NAME_LEN;
4855                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4856                         root->objectid,
4857                         key->type == BTRFS_DIR_ITEM_KEY ?
4858                         "DIR_ITEM" : "DIR_INDEX",
4859                         key->objectid, key->offset, len);
4860                 }
4861                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4862                                    len);
4863                 if (len != namelen || strncmp(namebuf, name, len))
4864                         goto next;
4865
4866                 ret = 0;
4867                 goto out;
4868 next:
4869                 len += sizeof(*di) + data_len;
4870                 di = (struct btrfs_dir_item *)((char *)di + len);
4871                 cur += len;
4872         }
4873
4874 out:
4875         btrfs_release_path(&path);
4876         return ret;
4877 }
4878
4879 /*
4880  * Prints inode ref error message
4881  */
4882 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4883                                 u64 index, const char *namebuf, int name_len,
4884                                 u8 filetype, int err)
4885 {
4886         if (!err)
4887                 return;
4888
4889         /* root dir error */
4890         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4891                 error(
4892         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4893                       root->objectid, key->objectid, key->offset, namebuf);
4894                 return;
4895         }
4896
4897         /* normal error */
4898         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4899                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4900                       root->objectid, key->offset,
4901                       btrfs_name_hash(namebuf, name_len),
4902                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4903                       namebuf, filetype);
4904         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4905                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4906                       root->objectid, key->offset, index,
4907                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4908                       namebuf, filetype);
4909 }
4910
4911 /*
4912  * Insert the missing inode item.
4913  *
4914  * Returns 0 means success.
4915  * Returns <0 means error.
4916  */
4917 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4918                                      u8 filetype)
4919 {
4920         struct btrfs_key key;
4921         struct btrfs_trans_handle *trans;
4922         struct btrfs_path path;
4923         int ret;
4924
4925         key.objectid = ino;
4926         key.type = BTRFS_INODE_ITEM_KEY;
4927         key.offset = 0;
4928
4929         btrfs_init_path(&path);
4930         trans = btrfs_start_transaction(root, 1);
4931         if (IS_ERR(trans)) {
4932                 ret = -EIO;
4933                 goto out;
4934         }
4935
4936         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4937         if (ret < 0 || !ret)
4938                 goto fail;
4939
4940         /* insert inode item */
4941         create_inode_item_lowmem(trans, root, ino, filetype);
4942         ret = 0;
4943 fail:
4944         btrfs_commit_transaction(trans, root);
4945 out:
4946         if (ret)
4947                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4948                       root->objectid, ino);
4949         btrfs_release_path(&path);
4950         return ret;
4951 }
4952
4953 /*
4954  * The ternary means dir item, dir index and relative inode ref.
4955  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4956  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4957  * strategy:
4958  * If two of three is missing or mismatched, delete the existing one.
4959  * If one of three is missing or mismatched, add the missing one.
4960  *
4961  * returns 0 means success.
4962  * returns not 0 means on error;
4963  */
4964 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4965                           u64 index, char *name, int name_len, u8 filetype,
4966                           int err)
4967 {
4968         struct btrfs_trans_handle *trans;
4969         int stage = 0;
4970         int ret = 0;
4971
4972         /*
4973          * stage shall be one of following valild values:
4974          *      0: Fine, nothing to do.
4975          *      1: One of three is wrong, so add missing one.
4976          *      2: Two of three is wrong, so delete existed one.
4977          */
4978         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4979                 stage++;
4980         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4981                 stage++;
4982         if (err & (INODE_REF_MISSING))
4983                 stage++;
4984
4985         /* stage must be smllarer than 3 */
4986         ASSERT(stage < 3);
4987
4988         trans = btrfs_start_transaction(root, 1);
4989         if (stage == 2) {
4990                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4991                                    name_len, 0);
4992                 goto out;
4993         }
4994         if (stage == 1) {
4995                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4996                                filetype, &index, 1, 1);
4997                 goto out;
4998         }
4999 out:
5000         btrfs_commit_transaction(trans, root);
5001
5002         if (ret)
5003                 error("fail to repair inode %llu name %s filetype %u",
5004                       ino, name, filetype);
5005         else
5006                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
5007                        stage == 2 ? "Delete" : "Add",
5008                        ino, name, filetype);
5009
5010         return ret;
5011 }
5012
5013 /*
5014  * Traverse the given INODE_REF and call find_dir_item() to find related
5015  * DIR_ITEM/DIR_INDEX.
5016  *
5017  * @root:       the root of the fs/file tree
5018  * @ref_key:    the key of the INODE_REF
5019  * @path        the path provides node and slot
5020  * @refs:       the count of INODE_REF
5021  * @mode:       the st_mode of INODE_ITEM
5022  * @name_ret:   returns with the first ref's name
5023  * @name_len_ret:    len of the name_ret
5024  *
5025  * Return 0 if no error occurred.
5026  */
5027 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5028                            struct btrfs_path *path, char *name_ret,
5029                            u32 *namelen_ret, u64 *refs_ret, int mode)
5030 {
5031         struct btrfs_key key;
5032         struct btrfs_key location;
5033         struct btrfs_inode_ref *ref;
5034         struct extent_buffer *node;
5035         char namebuf[BTRFS_NAME_LEN] = {0};
5036         u32 total;
5037         u32 cur = 0;
5038         u32 len;
5039         u32 name_len;
5040         u64 index;
5041         int ret;
5042         int err = 0;
5043         int tmp_err;
5044         int slot;
5045         int need_research = 0;
5046         u64 refs;
5047
5048 begin:
5049         err = 0;
5050         cur = 0;
5051         refs = *refs_ret;
5052
5053         /* since after repair, path and the dir item may be changed */
5054         if (need_research) {
5055                 need_research = 0;
5056                 btrfs_release_path(path);
5057                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
5058                 /* the item was deleted, let path point to the last checked item */
5059                 if (ret > 0) {
5060                         if (path->slots[0] == 0)
5061                                 btrfs_prev_leaf(root, path);
5062                         else
5063                                 path->slots[0]--;
5064                 }
5065                 if (ret)
5066                         goto out;
5067         }
5068
5069         location.objectid = ref_key->objectid;
5070         location.type = BTRFS_INODE_ITEM_KEY;
5071         location.offset = 0;
5072         node = path->nodes[0];
5073         slot = path->slots[0];
5074
5075         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5076         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5077         total = btrfs_item_size_nr(node, slot);
5078
5079 next:
5080         /* Update inode ref count */
5081         refs++;
5082         tmp_err = 0;
5083         index = btrfs_inode_ref_index(node, ref);
5084         name_len = btrfs_inode_ref_name_len(node, ref);
5085
5086         if (name_len <= BTRFS_NAME_LEN) {
5087                 len = name_len;
5088         } else {
5089                 len = BTRFS_NAME_LEN;
5090                 warning("root %llu INODE_REF[%llu %llu] name too long",
5091                         root->objectid, ref_key->objectid, ref_key->offset);
5092         }
5093
5094         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
5095
5096         /* copy the first name found to name_ret */
5097         if (refs == 1 && name_ret) {
5098                 memcpy(name_ret, namebuf, len);
5099                 *namelen_ret = len;
5100         }
5101
5102         /* Check root dir ref */
5103         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
5104                 if (index != 0 || len != strlen("..") ||
5105                     strncmp("..", namebuf, len) ||
5106                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
5107                         /* set err bits then repair will delete the ref */
5108                         err |= DIR_INDEX_MISSING;
5109                         err |= DIR_ITEM_MISSING;
5110                 }
5111                 goto end;
5112         }
5113
5114         /* Find related DIR_INDEX */
5115         key.objectid = ref_key->offset;
5116         key.type = BTRFS_DIR_INDEX_KEY;
5117         key.offset = index;
5118         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5119                             imode_to_type(mode));
5120
5121         /* Find related dir_item */
5122         key.objectid = ref_key->offset;
5123         key.type = BTRFS_DIR_ITEM_KEY;
5124         key.offset = btrfs_name_hash(namebuf, len);
5125         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5126                             imode_to_type(mode));
5127 end:
5128         if (tmp_err && repair) {
5129                 ret = repair_ternary_lowmem(root, ref_key->offset,
5130                                             ref_key->objectid, index, namebuf,
5131                                             name_len, imode_to_type(mode),
5132                                             tmp_err);
5133                 if (!ret) {
5134                         need_research = 1;
5135                         goto begin;
5136                 }
5137         }
5138         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
5139                             imode_to_type(mode), tmp_err);
5140         err |= tmp_err;
5141         len = sizeof(*ref) + name_len;
5142         ref = (struct btrfs_inode_ref *)((char *)ref + len);
5143         cur += len;
5144         if (cur < total)
5145                 goto next;
5146
5147 out:
5148         *refs_ret = refs;
5149         return err;
5150 }
5151
5152 /*
5153  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
5154  * DIR_ITEM/DIR_INDEX.
5155  *
5156  * @root:       the root of the fs/file tree
5157  * @ref_key:    the key of the INODE_EXTREF
5158  * @refs:       the count of INODE_EXTREF
5159  * @mode:       the st_mode of INODE_ITEM
5160  *
5161  * Return 0 if no error occurred.
5162  */
5163 static int check_inode_extref(struct btrfs_root *root,
5164                               struct btrfs_key *ref_key,
5165                               struct extent_buffer *node, int slot, u64 *refs,
5166                               int mode)
5167 {
5168         struct btrfs_key key;
5169         struct btrfs_key location;
5170         struct btrfs_inode_extref *extref;
5171         char namebuf[BTRFS_NAME_LEN] = {0};
5172         u32 total;
5173         u32 cur = 0;
5174         u32 len;
5175         u32 name_len;
5176         u64 index;
5177         u64 parent;
5178         int ret;
5179         int err = 0;
5180
5181         location.objectid = ref_key->objectid;
5182         location.type = BTRFS_INODE_ITEM_KEY;
5183         location.offset = 0;
5184
5185         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5186         total = btrfs_item_size_nr(node, slot);
5187
5188 next:
5189         /* update inode ref count */
5190         (*refs)++;
5191         name_len = btrfs_inode_extref_name_len(node, extref);
5192         index = btrfs_inode_extref_index(node, extref);
5193         parent = btrfs_inode_extref_parent(node, extref);
5194         if (name_len <= BTRFS_NAME_LEN) {
5195                 len = name_len;
5196         } else {
5197                 len = BTRFS_NAME_LEN;
5198                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
5199                         root->objectid, ref_key->objectid, ref_key->offset);
5200         }
5201         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
5202
5203         /* Check root dir ref name */
5204         if (index == 0 && strncmp(namebuf, "..", name_len)) {
5205                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
5206                       root->objectid, ref_key->objectid, ref_key->offset,
5207                       namebuf);
5208                 err |= ROOT_DIR_ERROR;
5209         }
5210
5211         /* find related dir_index */
5212         key.objectid = parent;
5213         key.type = BTRFS_DIR_INDEX_KEY;
5214         key.offset = index;
5215         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5216         err |= ret;
5217
5218         /* find related dir_item */
5219         key.objectid = parent;
5220         key.type = BTRFS_DIR_ITEM_KEY;
5221         key.offset = btrfs_name_hash(namebuf, len);
5222         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5223         err |= ret;
5224
5225         len = sizeof(*extref) + name_len;
5226         extref = (struct btrfs_inode_extref *)((char *)extref + len);
5227         cur += len;
5228
5229         if (cur < total)
5230                 goto next;
5231
5232         return err;
5233 }
5234
5235 /*
5236  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
5237  * DIR_ITEM/DIR_INDEX match.
5238  * Return with @index_ret.
5239  *
5240  * @root:       the root of the fs/file tree
5241  * @key:        the key of the INODE_REF/INODE_EXTREF
5242  * @name:       the name in the INODE_REF/INODE_EXTREF
5243  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
5244  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
5245  *              value (64)-1 means do not check index
5246  * @ext_ref:    the EXTENDED_IREF feature
5247  *
5248  * Return 0 if no error occurred.
5249  * Return >0 for error bitmap
5250  */
5251 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
5252                           char *name, int namelen, u64 *index_ret,
5253                           unsigned int ext_ref)
5254 {
5255         struct btrfs_path path;
5256         struct btrfs_inode_ref *ref;
5257         struct btrfs_inode_extref *extref;
5258         struct extent_buffer *node;
5259         char ref_namebuf[BTRFS_NAME_LEN] = {0};
5260         u32 total;
5261         u32 cur = 0;
5262         u32 len;
5263         u32 ref_namelen;
5264         u64 ref_index;
5265         u64 parent;
5266         u64 dir_id;
5267         int slot;
5268         int ret;
5269
5270         ASSERT(index_ret);
5271
5272         btrfs_init_path(&path);
5273         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5274         if (ret) {
5275                 ret = INODE_REF_MISSING;
5276                 goto extref;
5277         }
5278
5279         node = path.nodes[0];
5280         slot = path.slots[0];
5281
5282         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5283         total = btrfs_item_size_nr(node, slot);
5284
5285         /* Iterate all entry of INODE_REF */
5286         while (cur < total) {
5287                 ret = INODE_REF_MISSING;
5288
5289                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
5290                 ref_index = btrfs_inode_ref_index(node, ref);
5291                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5292                         goto next_ref;
5293
5294                 if (cur + sizeof(*ref) + ref_namelen > total ||
5295                     ref_namelen > BTRFS_NAME_LEN) {
5296                         warning("root %llu INODE %s[%llu %llu] name too long",
5297                                 root->objectid,
5298                                 key->type == BTRFS_INODE_REF_KEY ?
5299                                         "REF" : "EXTREF",
5300                                 key->objectid, key->offset);
5301
5302                         if (cur + sizeof(*ref) > total)
5303                                 break;
5304                         len = min_t(u32, total - cur - sizeof(*ref),
5305                                     BTRFS_NAME_LEN);
5306                 } else {
5307                         len = ref_namelen;
5308                 }
5309
5310                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
5311                                    len);
5312
5313                 if (len != namelen || strncmp(ref_namebuf, name, len))
5314                         goto next_ref;
5315
5316                 *index_ret = ref_index;
5317                 ret = 0;
5318                 goto out;
5319 next_ref:
5320                 len = sizeof(*ref) + ref_namelen;
5321                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5322                 cur += len;
5323         }
5324
5325 extref:
5326         /* Skip if not support EXTENDED_IREF feature */
5327         if (!ext_ref)
5328                 goto out;
5329
5330         btrfs_release_path(&path);
5331         btrfs_init_path(&path);
5332
5333         dir_id = key->offset;
5334         key->type = BTRFS_INODE_EXTREF_KEY;
5335         key->offset = btrfs_extref_hash(dir_id, name, namelen);
5336
5337         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5338         if (ret) {
5339                 ret = INODE_REF_MISSING;
5340                 goto out;
5341         }
5342
5343         node = path.nodes[0];
5344         slot = path.slots[0];
5345
5346         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5347         cur = 0;
5348         total = btrfs_item_size_nr(node, slot);
5349
5350         /* Iterate all entry of INODE_EXTREF */
5351         while (cur < total) {
5352                 ret = INODE_REF_MISSING;
5353
5354                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5355                 ref_index = btrfs_inode_extref_index(node, extref);
5356                 parent = btrfs_inode_extref_parent(node, extref);
5357                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5358                         goto next_extref;
5359
5360                 if (parent != dir_id)
5361                         goto next_extref;
5362
5363                 if (ref_namelen <= BTRFS_NAME_LEN) {
5364                         len = ref_namelen;
5365                 } else {
5366                         len = BTRFS_NAME_LEN;
5367                         warning("root %llu INODE %s[%llu %llu] name too long",
5368                                 root->objectid,
5369                                 key->type == BTRFS_INODE_REF_KEY ?
5370                                         "REF" : "EXTREF",
5371                                 key->objectid, key->offset);
5372                 }
5373                 read_extent_buffer(node, ref_namebuf,
5374                                    (unsigned long)(extref + 1), len);
5375
5376                 if (len != namelen || strncmp(ref_namebuf, name, len))
5377                         goto next_extref;
5378
5379                 *index_ret = ref_index;
5380                 ret = 0;
5381                 goto out;
5382
5383 next_extref:
5384                 len = sizeof(*extref) + ref_namelen;
5385                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5386                 cur += len;
5387
5388         }
5389 out:
5390         btrfs_release_path(&path);
5391         return ret;
5392 }
5393
5394 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5395                                u64 ino, u64 index, const char *namebuf,
5396                                int name_len, u8 filetype, int err)
5397 {
5398         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5399                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5400                       root->objectid, key->objectid, key->offset, namebuf,
5401                       filetype,
5402                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5403         }
5404
5405         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5406                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5407                       root->objectid, key->objectid, index, namebuf, filetype,
5408                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5409         }
5410
5411         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5412                 error(
5413                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5414                       root->objectid, ino, index, namebuf, filetype,
5415                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5416         }
5417
5418         if (err & INODE_REF_MISSING)
5419                 error(
5420                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5421                       root->objectid, ino, key->objectid, namebuf, filetype);
5422
5423 }
5424
5425 /*
5426  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5427  *
5428  * Returns error after repair
5429  */
5430 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5431                            u64 index, u8 filetype, char *namebuf, u32 name_len,
5432                            int err)
5433 {
5434         int ret;
5435
5436         if (err & INODE_ITEM_MISSING) {
5437                 ret = repair_inode_item_missing(root, ino, filetype);
5438                 if (!ret)
5439                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5440         }
5441
5442         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5443                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5444                                             name_len, filetype, err);
5445                 if (!ret) {
5446                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5447                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5448                         err &= ~(INODE_REF_MISSING);
5449                 }
5450         }
5451         return err;
5452 }
5453
5454 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5455                 u64 *size_ret)
5456 {
5457         struct btrfs_key key;
5458         struct btrfs_path path;
5459         u32 len;
5460         struct btrfs_dir_item *di;
5461         int ret;
5462         int cur = 0;
5463         int total = 0;
5464
5465         ASSERT(size_ret);
5466         *size_ret = 0;
5467
5468         key.objectid = ino;
5469         key.type = type;
5470         key.offset = (u64)-1;
5471
5472         btrfs_init_path(&path);
5473         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5474         if (ret < 0) {
5475                 ret = -EIO;
5476                 goto out;
5477         }
5478         /* if found, go to spacial case */
5479         if (ret == 0)
5480                 goto special_case;
5481
5482 loop:
5483         ret = btrfs_previous_item(root, &path, ino, type);
5484
5485         if (ret) {
5486                 ret = 0;
5487                 goto out;
5488         }
5489
5490 special_case:
5491         di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5492         cur = 0;
5493         total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5494
5495         while (cur < total) {
5496                 len = btrfs_dir_name_len(path.nodes[0], di);
5497                 if (len > BTRFS_NAME_LEN)
5498                         len = BTRFS_NAME_LEN;
5499                 *size_ret += len;
5500
5501                 len += btrfs_dir_data_len(path.nodes[0], di);
5502                 len += sizeof(*di);
5503                 di = (struct btrfs_dir_item *)((char *)di + len);
5504                 cur += len;
5505         }
5506         goto loop;
5507
5508 out:
5509         btrfs_release_path(&path);
5510         return ret;
5511 }
5512
5513 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5514 {
5515         u64 item_size;
5516         u64 index_size;
5517         int ret;
5518
5519         ASSERT(size);
5520         ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5521         if (ret)
5522                 goto out;
5523
5524         ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5525         if (ret)
5526                 goto out;
5527
5528         *size = item_size + index_size;
5529
5530 out:
5531         if (ret)
5532                 error("failed to count root %llu INODE[%llu] root size",
5533                       root->objectid, ino);
5534         return ret;
5535 }
5536
5537 /*
5538  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5539  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5540  *
5541  * @root:       the root of the fs/file tree
5542  * @key:        the key of the INODE_REF/INODE_EXTREF
5543  * @path:       the path
5544  * @size:       the st_size of the INODE_ITEM
5545  * @ext_ref:    the EXTENDED_IREF feature
5546  *
5547  * Return 0 if no error occurred.
5548  * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5549  */
5550 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5551                           struct btrfs_path *path, u64 *size,
5552                           unsigned int ext_ref)
5553 {
5554         struct btrfs_dir_item *di;
5555         struct btrfs_inode_item *ii;
5556         struct btrfs_key key;
5557         struct btrfs_key location;
5558         struct extent_buffer *node;
5559         int slot;
5560         char namebuf[BTRFS_NAME_LEN] = {0};
5561         u32 total;
5562         u32 cur = 0;
5563         u32 len;
5564         u32 name_len;
5565         u32 data_len;
5566         u8 filetype;
5567         u32 mode = 0;
5568         u64 index;
5569         int ret;
5570         int err;
5571         int tmp_err;
5572         int need_research = 0;
5573
5574         /*
5575          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5576          * ignore index check.
5577          */
5578         if (di_key->type == BTRFS_DIR_INDEX_KEY)
5579                 index = di_key->offset;
5580         else
5581                 index = (u64)-1;
5582 begin:
5583         err = 0;
5584         cur = 0;
5585
5586         /* since after repair, path and the dir item may be changed */
5587         if (need_research) {
5588                 need_research = 0;
5589                 err |= DIR_COUNT_AGAIN;
5590                 btrfs_release_path(path);
5591                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5592                 /* the item was deleted, let path point the last checked item */
5593                 if (ret > 0) {
5594                         if (path->slots[0] == 0)
5595                                 btrfs_prev_leaf(root, path);
5596                         else
5597                                 path->slots[0]--;
5598                 }
5599                 if (ret)
5600                         goto out;
5601         }
5602
5603         node = path->nodes[0];
5604         slot = path->slots[0];
5605
5606         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5607         total = btrfs_item_size_nr(node, slot);
5608         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5609
5610         while (cur < total) {
5611                 data_len = btrfs_dir_data_len(node, di);
5612                 tmp_err = 0;
5613                 if (data_len)
5614                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5615                               root->objectid,
5616               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5617                               di_key->objectid, di_key->offset, data_len);
5618
5619                 name_len = btrfs_dir_name_len(node, di);
5620                 if (name_len <= BTRFS_NAME_LEN) {
5621                         len = name_len;
5622                 } else {
5623                         len = BTRFS_NAME_LEN;
5624                         warning("root %llu %s[%llu %llu] name too long",
5625                                 root->objectid,
5626                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5627                                 di_key->objectid, di_key->offset);
5628                 }
5629                 (*size) += name_len;
5630                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5631                                    len);
5632                 filetype = btrfs_dir_type(node, di);
5633
5634                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5635                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5636                         err |= -EIO;
5637                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5638                         root->objectid, di_key->objectid, di_key->offset,
5639                         namebuf, len, filetype, di_key->offset,
5640                         btrfs_name_hash(namebuf, len));
5641                 }
5642
5643                 btrfs_dir_item_key_to_cpu(node, di, &location);
5644                 /* Ignore related ROOT_ITEM check */
5645                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5646                         goto next;
5647
5648                 btrfs_release_path(path);
5649                 /* Check relative INODE_ITEM(existence/filetype) */
5650                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5651                 if (ret) {
5652                         tmp_err |= INODE_ITEM_MISSING;
5653                         goto next;
5654                 }
5655
5656                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5657                                     struct btrfs_inode_item);
5658                 mode = btrfs_inode_mode(path->nodes[0], ii);
5659                 if (imode_to_type(mode) != filetype) {
5660                         tmp_err |= INODE_ITEM_MISMATCH;
5661                         goto next;
5662                 }
5663
5664                 /* Check relative INODE_REF/INODE_EXTREF */
5665                 key.objectid = location.objectid;
5666                 key.type = BTRFS_INODE_REF_KEY;
5667                 key.offset = di_key->objectid;
5668                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5669                                           &index, ext_ref);
5670
5671                 /* check relative INDEX/ITEM */
5672                 key.objectid = di_key->objectid;
5673                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5674                         key.type = BTRFS_DIR_INDEX_KEY;
5675                         key.offset = index;
5676                 } else {
5677                         key.type = BTRFS_DIR_ITEM_KEY;
5678                         key.offset = btrfs_name_hash(namebuf, name_len);
5679                 }
5680
5681                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5682                                          name_len, filetype);
5683                 /* find_dir_item may find index */
5684                 if (key.type == BTRFS_DIR_INDEX_KEY)
5685                         index = key.offset;
5686 next:
5687
5688                 if (tmp_err && repair) {
5689                         ret = repair_dir_item(root, di_key->objectid,
5690                                               location.objectid, index,
5691                                               imode_to_type(mode), namebuf,
5692                                               name_len, tmp_err);
5693                         if (ret != tmp_err) {
5694                                 need_research = 1;
5695                                 goto begin;
5696                         }
5697                 }
5698                 btrfs_release_path(path);
5699                 print_dir_item_err(root, di_key, location.objectid, index,
5700                                    namebuf, name_len, filetype, tmp_err);
5701                 err |= tmp_err;
5702                 len = sizeof(*di) + name_len + data_len;
5703                 di = (struct btrfs_dir_item *)((char *)di + len);
5704                 cur += len;
5705
5706                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5707                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5708                               root->objectid, di_key->objectid,
5709                               di_key->offset);
5710                         break;
5711                 }
5712         }
5713 out:
5714         /* research path */
5715         btrfs_release_path(path);
5716         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5717         if (ret)
5718                 err |= ret > 0 ? -ENOENT : ret;
5719         return err;
5720 }
5721
5722 /*
5723  * Wrapper function of btrfs_punch_hole.
5724  *
5725  * Returns 0 means success.
5726  * Returns not 0 means error.
5727  */
5728 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5729                              u64 len)
5730 {
5731         struct btrfs_trans_handle *trans;
5732         int ret = 0;
5733
5734         trans = btrfs_start_transaction(root, 1);
5735         if (IS_ERR(trans))
5736                 return PTR_ERR(trans);
5737
5738         ret = btrfs_punch_hole(trans, root, ino, start, len);
5739         if (ret)
5740                 error("failed to add hole [%llu, %llu] in inode [%llu]",
5741                       start, len, ino);
5742         else
5743                 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5744                        ino);
5745
5746         btrfs_commit_transaction(trans, root);
5747         return ret;
5748 }
5749
5750 /*
5751  * Check file extent datasum/hole, update the size of the file extents,
5752  * check and update the last offset of the file extent.
5753  *
5754  * @root:       the root of fs/file tree.
5755  * @fkey:       the key of the file extent.
5756  * @nodatasum:  INODE_NODATASUM feature.
5757  * @size:       the sum of all EXTENT_DATA items size for this inode.
5758  * @end:        the offset of the last extent.
5759  *
5760  * Return 0 if no error occurred.
5761  */
5762 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5763                              struct extent_buffer *node, int slot,
5764                              unsigned int nodatasum, u64 *size, u64 *end)
5765 {
5766         struct btrfs_file_extent_item *fi;
5767         u64 disk_bytenr;
5768         u64 disk_num_bytes;
5769         u64 extent_num_bytes;
5770         u64 extent_offset;
5771         u64 csum_found;         /* In byte size, sectorsize aligned */
5772         u64 search_start;       /* Logical range start we search for csum */
5773         u64 search_len;         /* Logical range len we search for csum */
5774         unsigned int extent_type;
5775         unsigned int is_hole;
5776         int compressed = 0;
5777         int ret;
5778         int err = 0;
5779
5780         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5781
5782         /* Check inline extent */
5783         extent_type = btrfs_file_extent_type(node, fi);
5784         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5785                 struct btrfs_item *e = btrfs_item_nr(slot);
5786                 u32 item_inline_len;
5787
5788                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5789                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5790                 compressed = btrfs_file_extent_compression(node, fi);
5791                 if (extent_num_bytes == 0) {
5792                         error(
5793                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5794                                 root->objectid, fkey->objectid, fkey->offset);
5795                         err |= FILE_EXTENT_ERROR;
5796                 }
5797                 if (!compressed && extent_num_bytes != item_inline_len) {
5798                         error(
5799                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5800                                 root->objectid, fkey->objectid, fkey->offset,
5801                                 extent_num_bytes, item_inline_len);
5802                         err |= FILE_EXTENT_ERROR;
5803                 }
5804                 *end += extent_num_bytes;
5805                 *size += extent_num_bytes;
5806                 return err;
5807         }
5808
5809         /* Check extent type */
5810         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5811                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5812                 err |= FILE_EXTENT_ERROR;
5813                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5814                       root->objectid, fkey->objectid, fkey->offset);
5815                 return err;
5816         }
5817
5818         /* Check REG_EXTENT/PREALLOC_EXTENT */
5819         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5820         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5821         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5822         extent_offset = btrfs_file_extent_offset(node, fi);
5823         compressed = btrfs_file_extent_compression(node, fi);
5824         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5825
5826         /*
5827          * Check EXTENT_DATA csum
5828          *
5829          * For plain (uncompressed) extent, we should only check the range
5830          * we're referring to, as it's possible that part of prealloc extent
5831          * has been written, and has csum:
5832          *
5833          * |<--- Original large preallocated extent A ---->|
5834          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5835          *      No csum                         Has csum
5836          *
5837          * For compressed extent, we should check the whole range.
5838          */
5839         if (!compressed) {
5840                 search_start = disk_bytenr + extent_offset;
5841                 search_len = extent_num_bytes;
5842         } else {
5843                 search_start = disk_bytenr;
5844                 search_len = disk_num_bytes;
5845         }
5846         ret = count_csum_range(root, search_start, search_len, &csum_found);
5847         if (csum_found > 0 && nodatasum) {
5848                 err |= ODD_CSUM_ITEM;
5849                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5850                       root->objectid, fkey->objectid, fkey->offset);
5851         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5852                    !is_hole && (ret < 0 || csum_found < search_len)) {
5853                 err |= CSUM_ITEM_MISSING;
5854                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5855                       root->objectid, fkey->objectid, fkey->offset,
5856                       csum_found, search_len);
5857         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5858                 err |= ODD_CSUM_ITEM;
5859                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5860                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5861         }
5862
5863         /* Check EXTENT_DATA hole */
5864         if (!no_holes && *end != fkey->offset) {
5865                 if (repair)
5866                         ret = punch_extent_hole(root, fkey->objectid,
5867                                                 *end, fkey->offset - *end);
5868                 if (!repair || ret) {
5869                         err |= FILE_EXTENT_ERROR;
5870                         error(
5871                 "root %llu EXTENT_DATA[%llu %llu] interrupt, should start at %llu",
5872                         root->objectid, fkey->objectid, fkey->offset, *end);
5873                 }
5874         }
5875
5876         *end += extent_num_bytes;
5877         if (!is_hole)
5878                 *size += extent_num_bytes;
5879
5880         return err;
5881 }
5882
5883 /*
5884  * Set inode item nbytes to @nbytes
5885  *
5886  * Returns  0     on success
5887  * Returns  != 0  on error
5888  */
5889 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5890                                       struct btrfs_path *path,
5891                                       u64 ino, u64 nbytes)
5892 {
5893         struct btrfs_trans_handle *trans;
5894         struct btrfs_inode_item *ii;
5895         struct btrfs_key key;
5896         struct btrfs_key research_key;
5897         int err = 0;
5898         int ret;
5899
5900         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5901
5902         key.objectid = ino;
5903         key.type = BTRFS_INODE_ITEM_KEY;
5904         key.offset = 0;
5905
5906         trans = btrfs_start_transaction(root, 1);
5907         if (IS_ERR(trans)) {
5908                 ret = PTR_ERR(trans);
5909                 err |= ret;
5910                 goto out;
5911         }
5912
5913         btrfs_release_path(path);
5914         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5915         if (ret > 0)
5916                 ret = -ENOENT;
5917         if (ret) {
5918                 err |= ret;
5919                 goto fail;
5920         }
5921
5922         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5923                             struct btrfs_inode_item);
5924         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5925         btrfs_mark_buffer_dirty(path->nodes[0]);
5926 fail:
5927         btrfs_commit_transaction(trans, root);
5928 out:
5929         if (ret)
5930                 error("failed to set nbytes in inode %llu root %llu",
5931                       ino, root->root_key.objectid);
5932         else
5933                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5934                        root->root_key.objectid, nbytes);
5935
5936         /* research path */
5937         btrfs_release_path(path);
5938         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5939         err |= ret;
5940
5941         return err;
5942 }
5943
5944 /*
5945  * Set directory inode isize to @isize.
5946  *
5947  * Returns 0     on success.
5948  * Returns != 0  on error.
5949  */
5950 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5951                                    struct btrfs_path *path,
5952                                    u64 ino, u64 isize)
5953 {
5954         struct btrfs_trans_handle *trans;
5955         struct btrfs_inode_item *ii;
5956         struct btrfs_key key;
5957         struct btrfs_key research_key;
5958         int ret;
5959         int err = 0;
5960
5961         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5962
5963         key.objectid = ino;
5964         key.type = BTRFS_INODE_ITEM_KEY;
5965         key.offset = 0;
5966
5967         trans = btrfs_start_transaction(root, 1);
5968         if (IS_ERR(trans)) {
5969                 ret = PTR_ERR(trans);
5970                 err |= ret;
5971                 goto out;
5972         }
5973
5974         btrfs_release_path(path);
5975         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5976         if (ret > 0)
5977                 ret = -ENOENT;
5978         if (ret) {
5979                 err |= ret;
5980                 goto fail;
5981         }
5982
5983         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5984                             struct btrfs_inode_item);
5985         btrfs_set_inode_size(path->nodes[0], ii, isize);
5986         btrfs_mark_buffer_dirty(path->nodes[0]);
5987 fail:
5988         btrfs_commit_transaction(trans, root);
5989 out:
5990         if (ret)
5991                 error("failed to set isize in inode %llu root %llu",
5992                       ino, root->root_key.objectid);
5993         else
5994                 printf("Set isize in inode %llu root %llu to %llu\n",
5995                        ino, root->root_key.objectid, isize);
5996
5997         btrfs_release_path(path);
5998         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5999         err |= ret;
6000
6001         return err;
6002 }
6003
6004 /*
6005  * Wrapper function for btrfs_add_orphan_item().
6006  *
6007  * Returns 0     on success.
6008  * Returns != 0  on error.
6009  */
6010 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
6011                                            struct btrfs_path *path, u64 ino)
6012 {
6013         struct btrfs_trans_handle *trans;
6014         struct btrfs_key research_key;
6015         int ret;
6016         int err = 0;
6017
6018         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
6019
6020         trans = btrfs_start_transaction(root, 1);
6021         if (IS_ERR(trans)) {
6022                 ret = PTR_ERR(trans);
6023                 err |= ret;
6024                 goto out;
6025         }
6026
6027         btrfs_release_path(path);
6028         ret = btrfs_add_orphan_item(trans, root, path, ino);
6029         err |= ret;
6030         btrfs_commit_transaction(trans, root);
6031 out:
6032         if (ret)
6033                 error("failed to add inode %llu as orphan item root %llu",
6034                       ino, root->root_key.objectid);
6035         else
6036                 printf("Added inode %llu as orphan item root %llu\n",
6037                        ino, root->root_key.objectid);
6038
6039         btrfs_release_path(path);
6040         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6041         err |= ret;
6042
6043         return err;
6044 }
6045
6046 /* Set inode_item nlink to @ref_count.
6047  * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
6048  *
6049  * Returns 0 on success
6050  */
6051 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
6052                                       struct btrfs_path *path, u64 ino,
6053                                       const char *name, u32 namelen,
6054                                       u64 ref_count, u8 filetype, u64 *nlink)
6055 {
6056         struct btrfs_trans_handle *trans;
6057         struct btrfs_inode_item *ii;
6058         struct btrfs_key key;
6059         struct btrfs_key old_key;
6060         char namebuf[BTRFS_NAME_LEN] = {0};
6061         int name_len;
6062         int ret;
6063         int ret2;
6064
6065         /* save the key */
6066         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
6067
6068         if (name && namelen) {
6069                 ASSERT(namelen <= BTRFS_NAME_LEN);
6070                 memcpy(namebuf, name, namelen);
6071                 name_len = namelen;
6072         } else {
6073                 sprintf(namebuf, "%llu", ino);
6074                 name_len = count_digits(ino);
6075                 printf("Can't find file name for inode %llu, use %s instead\n",
6076                        ino, namebuf);
6077         }
6078
6079         trans = btrfs_start_transaction(root, 1);
6080         if (IS_ERR(trans)) {
6081                 ret = PTR_ERR(trans);
6082                 goto out;
6083         }
6084
6085         btrfs_release_path(path);
6086         /* if refs is 0, put it into lostfound */
6087         if (ref_count == 0) {
6088                 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
6089                                               name_len, filetype, &ref_count);
6090                 if (ret)
6091                         goto fail;
6092         }
6093
6094         /* reset inode_item's nlink to ref_count */
6095         key.objectid = ino;
6096         key.type = BTRFS_INODE_ITEM_KEY;
6097         key.offset = 0;
6098
6099         btrfs_release_path(path);
6100         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6101         if (ret > 0)
6102                 ret = -ENOENT;
6103         if (ret)
6104                 goto fail;
6105
6106         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
6107                             struct btrfs_inode_item);
6108         btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
6109         btrfs_mark_buffer_dirty(path->nodes[0]);
6110
6111         if (nlink)
6112                 *nlink = ref_count;
6113 fail:
6114         btrfs_commit_transaction(trans, root);
6115 out:
6116         if (ret)
6117                 error(
6118         "fail to repair nlink of inode %llu root %llu name %s filetype %u",
6119                        root->objectid, ino, namebuf, filetype);
6120         else
6121                 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
6122                        root->objectid, ino, namebuf, filetype);
6123
6124         /* research */
6125         btrfs_release_path(path);
6126         ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
6127         if (ret2 < 0)
6128                 return ret |= ret2;
6129         return ret;
6130 }
6131
6132 /*
6133  * Check INODE_ITEM and related ITEMs (the same inode number)
6134  * 1. check link count
6135  * 2. check inode ref/extref
6136  * 3. check dir item/index
6137  *
6138  * @ext_ref:    the EXTENDED_IREF feature
6139  *
6140  * Return 0 if no error occurred.
6141  * Return >0 for error or hit the traversal is done(by error bitmap)
6142  */
6143 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
6144                             unsigned int ext_ref)
6145 {
6146         struct extent_buffer *node;
6147         struct btrfs_inode_item *ii;
6148         struct btrfs_key key;
6149         struct btrfs_key last_key;
6150         u64 inode_id;
6151         u32 mode;
6152         u64 nlink;
6153         u64 nbytes;
6154         u64 isize;
6155         u64 size = 0;
6156         u64 refs = 0;
6157         u64 extent_end = 0;
6158         u64 extent_size = 0;
6159         unsigned int dir;
6160         unsigned int nodatasum;
6161         int slot;
6162         int ret;
6163         int err = 0;
6164         char namebuf[BTRFS_NAME_LEN] = {0};
6165         u32 name_len = 0;
6166
6167         node = path->nodes[0];
6168         slot = path->slots[0];
6169
6170         btrfs_item_key_to_cpu(node, &key, slot);
6171         inode_id = key.objectid;
6172
6173         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
6174                 ret = btrfs_next_item(root, path);
6175                 if (ret > 0)
6176                         err |= LAST_ITEM;
6177                 return err;
6178         }
6179
6180         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
6181         isize = btrfs_inode_size(node, ii);
6182         nbytes = btrfs_inode_nbytes(node, ii);
6183         mode = btrfs_inode_mode(node, ii);
6184         dir = imode_to_type(mode) == BTRFS_FT_DIR;
6185         nlink = btrfs_inode_nlink(node, ii);
6186         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
6187
6188         while (1) {
6189                 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
6190                 ret = btrfs_next_item(root, path);
6191                 if (ret < 0) {
6192                         /* out will fill 'err' rusing current statistics */
6193                         goto out;
6194                 } else if (ret > 0) {
6195                         err |= LAST_ITEM;
6196                         goto out;
6197                 }
6198
6199                 node = path->nodes[0];
6200                 slot = path->slots[0];
6201                 btrfs_item_key_to_cpu(node, &key, slot);
6202                 if (key.objectid != inode_id)
6203                         goto out;
6204
6205                 switch (key.type) {
6206                 case BTRFS_INODE_REF_KEY:
6207                         ret = check_inode_ref(root, &key, path, namebuf,
6208                                               &name_len, &refs, mode);
6209                         err |= ret;
6210                         break;
6211                 case BTRFS_INODE_EXTREF_KEY:
6212                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
6213                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
6214                                         root->objectid, key.objectid,
6215                                         key.offset);
6216                         ret = check_inode_extref(root, &key, node, slot, &refs,
6217                                                  mode);
6218                         err |= ret;
6219                         break;
6220                 case BTRFS_DIR_ITEM_KEY:
6221                 case BTRFS_DIR_INDEX_KEY:
6222                         if (!dir) {
6223                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
6224                                         root->objectid, inode_id,
6225                                         imode_to_type(mode), key.objectid,
6226                                         key.offset);
6227                         }
6228                         ret = check_dir_item(root, &key, path, &size, ext_ref);
6229                         err |= ret;
6230                         break;
6231                 case BTRFS_EXTENT_DATA_KEY:
6232                         if (dir) {
6233                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
6234                                         root->objectid, inode_id, key.objectid,
6235                                         key.offset);
6236                         }
6237                         ret = check_file_extent(root, &key, node, slot,
6238                                                 nodatasum, &extent_size,
6239                                                 &extent_end);
6240                         err |= ret;
6241                         break;
6242                 case BTRFS_XATTR_ITEM_KEY:
6243                         break;
6244                 default:
6245                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
6246                               key.objectid, key.type, key.offset);
6247                 }
6248         }
6249
6250 out:
6251         if (err & LAST_ITEM) {
6252                 btrfs_release_path(path);
6253                 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
6254                 if (ret)
6255                         return err;
6256         }
6257
6258         /* verify INODE_ITEM nlink/isize/nbytes */
6259         if (dir) {
6260                 if (repair && (err & DIR_COUNT_AGAIN)) {
6261                         err &= ~DIR_COUNT_AGAIN;
6262                         count_dir_isize(root, inode_id, &size);
6263                 }
6264
6265                 if ((nlink != 1 || refs != 1) && repair) {
6266                         ret = repair_inode_nlinks_lowmem(root, path, inode_id,
6267                                 namebuf, name_len, refs, imode_to_type(mode),
6268                                 &nlink);
6269                 }
6270
6271                 if (nlink != 1) {
6272                         err |= LINK_COUNT_ERROR;
6273                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
6274                               root->objectid, inode_id, nlink);
6275                 }
6276
6277                 /*
6278                  * Just a warning, as dir inode nbytes is just an
6279                  * instructive value.
6280                  */
6281                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
6282                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
6283                                 root->objectid, inode_id,
6284                                 root->fs_info->nodesize);
6285                 }
6286
6287                 if (isize != size) {
6288                         if (repair)
6289                                 ret = repair_dir_isize_lowmem(root, path,
6290                                                               inode_id, size);
6291                         if (!repair || ret) {
6292                                 err |= ISIZE_ERROR;
6293                                 error(
6294                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
6295                                       root->objectid, inode_id, isize, size);
6296                         }
6297                 }
6298         } else {
6299                 if (nlink != refs) {
6300                         if (repair)
6301                                 ret = repair_inode_nlinks_lowmem(root, path,
6302                                          inode_id, namebuf, name_len, refs,
6303                                          imode_to_type(mode), &nlink);
6304                         if (!repair || ret) {
6305                                 err |= LINK_COUNT_ERROR;
6306                                 error(
6307                 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
6308                                       root->objectid, inode_id, nlink, refs);
6309                         }
6310                 } else if (!nlink) {
6311                         if (repair)
6312                                 ret = repair_inode_orphan_item_lowmem(root,
6313                                                               path, inode_id);
6314                         if (!repair || ret) {
6315                                 err |= ORPHAN_ITEM;
6316                                 error("root %llu INODE[%llu] is orphan item",
6317                                       root->objectid, inode_id);
6318                         }
6319                 }
6320
6321                 if (!nbytes && !no_holes && extent_end < isize) {
6322                         if (repair)
6323                                 ret = punch_extent_hole(root, inode_id,
6324                                                 extent_end, isize - extent_end);
6325                         if (!repair || ret) {
6326                                 err |= NBYTES_ERROR;
6327                                 error(
6328         "root %llu INODE[%llu] size %llu should have a file extent hole",
6329                                       root->objectid, inode_id, isize);
6330                         }
6331                 }
6332
6333                 if (nbytes != extent_size) {
6334                         if (repair)
6335                                 ret = repair_inode_nbytes_lowmem(root, path,
6336                                                          inode_id, extent_size);
6337                         if (!repair || ret) {
6338                                 err |= NBYTES_ERROR;
6339                                 error(
6340         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6341                                       root->objectid, inode_id, nbytes,
6342                                       extent_size);
6343                         }
6344                 }
6345         }
6346
6347         if (err & LAST_ITEM)
6348                 btrfs_next_item(root, path);
6349         return err;
6350 }
6351
6352 /*
6353  * Insert the missing inode item and inode ref.
6354  *
6355  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6356  * Root dir should be handled specially because root dir is the root of fs.
6357  *
6358  * returns err (>0 or 0) after repair
6359  */
6360 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6361 {
6362         struct btrfs_trans_handle *trans;
6363         struct btrfs_key key;
6364         struct btrfs_path path;
6365         int filetype = BTRFS_FT_DIR;
6366         int ret = 0;
6367
6368         btrfs_init_path(&path);
6369
6370         if (err & INODE_REF_MISSING) {
6371                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6372                 key.type = BTRFS_INODE_REF_KEY;
6373                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6374
6375                 trans = btrfs_start_transaction(root, 1);
6376                 if (IS_ERR(trans)) {
6377                         ret = PTR_ERR(trans);
6378                         goto out;
6379                 }
6380
6381                 btrfs_release_path(&path);
6382                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6383                 if (ret)
6384                         goto trans_fail;
6385
6386                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6387                                              BTRFS_FIRST_FREE_OBJECTID,
6388                                              BTRFS_FIRST_FREE_OBJECTID, 0);
6389                 if (ret)
6390                         goto trans_fail;
6391
6392                 printf("Add INODE_REF[%llu %llu] name %s\n",
6393                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6394                        "..");
6395                 err &= ~INODE_REF_MISSING;
6396 trans_fail:
6397                 if (ret)
6398                         error("fail to insert first inode's ref");
6399                 btrfs_commit_transaction(trans, root);
6400         }
6401
6402         if (err & INODE_ITEM_MISSING) {
6403                 ret = repair_inode_item_missing(root,
6404                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
6405                 if (ret)
6406                         goto out;
6407                 err &= ~INODE_ITEM_MISSING;
6408         }
6409 out:
6410         if (ret)
6411                 error("fail to repair first inode");
6412         btrfs_release_path(&path);
6413         return err;
6414 }
6415
6416 /*
6417  * check first root dir's inode_item and inode_ref
6418  *
6419  * returns 0 means no error
6420  * returns >0 means error
6421  * returns <0 means fatal error
6422  */
6423 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6424 {
6425         struct btrfs_path path;
6426         struct btrfs_key key;
6427         struct btrfs_inode_item *ii;
6428         u64 index;
6429         u32 mode;
6430         int err = 0;
6431         int ret;
6432
6433         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6434         key.type = BTRFS_INODE_ITEM_KEY;
6435         key.offset = 0;
6436
6437         /* For root being dropped, we don't need to check first inode */
6438         if (btrfs_root_refs(&root->root_item) == 0 &&
6439             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6440             BTRFS_FIRST_FREE_OBJECTID)
6441                 return 0;
6442
6443         btrfs_init_path(&path);
6444         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6445         if (ret < 0)
6446                 goto out;
6447         if (ret > 0) {
6448                 ret = 0;
6449                 err |= INODE_ITEM_MISSING;
6450         } else {
6451                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6452                                     struct btrfs_inode_item);
6453                 mode = btrfs_inode_mode(path.nodes[0], ii);
6454                 if (imode_to_type(mode) != BTRFS_FT_DIR)
6455                         err |= INODE_ITEM_MISMATCH;
6456         }
6457
6458         /* lookup first inode ref */
6459         key.offset = BTRFS_FIRST_FREE_OBJECTID;
6460         key.type = BTRFS_INODE_REF_KEY;
6461         /* special index value */
6462         index = 0;
6463
6464         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6465         if (ret < 0)
6466                 goto out;
6467         err |= ret;
6468
6469 out:
6470         btrfs_release_path(&path);
6471
6472         if (err && repair)
6473                 err = repair_fs_first_inode(root, err);
6474
6475         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6476                 error("root dir INODE_ITEM is %s",
6477                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6478         if (err & INODE_REF_MISSING)
6479                 error("root dir INODE_REF is missing");
6480
6481         return ret < 0 ? ret : err;
6482 }
6483
6484 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6485                                                 u64 parent, u64 root)
6486 {
6487         struct rb_node *node;
6488         struct tree_backref *back = NULL;
6489         struct tree_backref match = {
6490                 .node = {
6491                         .is_data = 0,
6492                 },
6493         };
6494
6495         if (parent) {
6496                 match.parent = parent;
6497                 match.node.full_backref = 1;
6498         } else {
6499                 match.root = root;
6500         }
6501
6502         node = rb_search(&rec->backref_tree, &match.node.node,
6503                          (rb_compare_keys)compare_extent_backref, NULL);
6504         if (node)
6505                 back = to_tree_backref(rb_node_to_extent_backref(node));
6506
6507         return back;
6508 }
6509
6510 static struct data_backref *find_data_backref(struct extent_record *rec,
6511                                                 u64 parent, u64 root,
6512                                                 u64 owner, u64 offset,
6513                                                 int found_ref,
6514                                                 u64 disk_bytenr, u64 bytes)
6515 {
6516         struct rb_node *node;
6517         struct data_backref *back = NULL;
6518         struct data_backref match = {
6519                 .node = {
6520                         .is_data = 1,
6521                 },
6522                 .owner = owner,
6523                 .offset = offset,
6524                 .bytes = bytes,
6525                 .found_ref = found_ref,
6526                 .disk_bytenr = disk_bytenr,
6527         };
6528
6529         if (parent) {
6530                 match.parent = parent;
6531                 match.node.full_backref = 1;
6532         } else {
6533                 match.root = root;
6534         }
6535
6536         node = rb_search(&rec->backref_tree, &match.node.node,
6537                          (rb_compare_keys)compare_extent_backref, NULL);
6538         if (node)
6539                 back = to_data_backref(rb_node_to_extent_backref(node));
6540
6541         return back;
6542 }
6543 /*
6544  * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6545  * blocks and integrity of fs tree items.
6546  *
6547  * @root:         the root of the tree to be checked.
6548  * @ext_ref       feature EXTENDED_IREF is enable or not.
6549  * @account       if NOT 0 means check the tree (including tree)'s treeblocks.
6550  *                otherwise means check fs tree(s) items relationship and
6551  *                @root MUST be a fs tree root.
6552  * Returns 0      represents OK.
6553  * Returns not 0  represents error.
6554  */
6555 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6556                             struct btrfs_root *root, unsigned int ext_ref,
6557                             int check_all)
6558
6559 {
6560         struct btrfs_path path;
6561         struct node_refs nrefs;
6562         struct btrfs_root_item *root_item = &root->root_item;
6563         int ret;
6564         int level;
6565         int err = 0;
6566
6567         memset(&nrefs, 0, sizeof(nrefs));
6568         if (!check_all) {
6569                 /*
6570                  * We need to manually check the first inode item (256)
6571                  * As the following traversal function will only start from
6572                  * the first inode item in the leaf, if inode item (256) is
6573                  * missing we will skip it forever.
6574                  */
6575                 ret = check_fs_first_inode(root, ext_ref);
6576                 if (ret < 0)
6577                         return ret;
6578         }
6579
6580
6581         level = btrfs_header_level(root->node);
6582         btrfs_init_path(&path);
6583
6584         if (btrfs_root_refs(root_item) > 0 ||
6585             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6586                 path.nodes[level] = root->node;
6587                 path.slots[level] = 0;
6588                 extent_buffer_get(root->node);
6589         } else {
6590                 struct btrfs_key key;
6591
6592                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6593                 level = root_item->drop_level;
6594                 path.lowest_level = level;
6595                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6596                 if (ret < 0)
6597                         goto out;
6598                 ret = 0;
6599         }
6600
6601         while (1) {
6602                 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6603                                         ext_ref, check_all);
6604
6605                 err |= !!ret;
6606
6607                 /* if ret is negative, walk shall stop */
6608                 if (ret < 0) {
6609                         ret = err;
6610                         break;
6611                 }
6612
6613                 ret = walk_up_tree_v2(root, &path, &level);
6614                 if (ret != 0) {
6615                         /* Normal exit, reset ret to err */
6616                         ret = err;
6617                         break;
6618                 }
6619         }
6620
6621 out:
6622         btrfs_release_path(&path);
6623         return ret;
6624 }
6625
6626 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info);
6627
6628 /*
6629  * Iterate all items in the tree and call check_inode_item() to check.
6630  *
6631  * @root:       the root of the tree to be checked.
6632  * @ext_ref:    the EXTENDED_IREF feature
6633  *
6634  * Return 0 if no error found.
6635  * Return <0 for error.
6636  */
6637 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6638 {
6639         reset_cached_block_groups(root->fs_info);
6640         return check_btrfs_root(NULL, root, ext_ref, 0);
6641 }
6642
6643 /*
6644  * Find the relative ref for root_ref and root_backref.
6645  *
6646  * @root:       the root of the root tree.
6647  * @ref_key:    the key of the root ref.
6648  *
6649  * Return 0 if no error occurred.
6650  */
6651 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6652                           struct extent_buffer *node, int slot)
6653 {
6654         struct btrfs_path path;
6655         struct btrfs_key key;
6656         struct btrfs_root_ref *ref;
6657         struct btrfs_root_ref *backref;
6658         char ref_name[BTRFS_NAME_LEN] = {0};
6659         char backref_name[BTRFS_NAME_LEN] = {0};
6660         u64 ref_dirid;
6661         u64 ref_seq;
6662         u32 ref_namelen;
6663         u64 backref_dirid;
6664         u64 backref_seq;
6665         u32 backref_namelen;
6666         u32 len;
6667         int ret;
6668         int err = 0;
6669
6670         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6671         ref_dirid = btrfs_root_ref_dirid(node, ref);
6672         ref_seq = btrfs_root_ref_sequence(node, ref);
6673         ref_namelen = btrfs_root_ref_name_len(node, ref);
6674
6675         if (ref_namelen <= BTRFS_NAME_LEN) {
6676                 len = ref_namelen;
6677         } else {
6678                 len = BTRFS_NAME_LEN;
6679                 warning("%s[%llu %llu] ref_name too long",
6680                         ref_key->type == BTRFS_ROOT_REF_KEY ?
6681                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6682                         ref_key->offset);
6683         }
6684         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6685
6686         /* Find relative root_ref */
6687         key.objectid = ref_key->offset;
6688         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6689         key.offset = ref_key->objectid;
6690
6691         btrfs_init_path(&path);
6692         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6693         if (ret) {
6694                 err |= ROOT_REF_MISSING;
6695                 error("%s[%llu %llu] couldn't find relative ref",
6696                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6697                       "ROOT_REF" : "ROOT_BACKREF",
6698                       ref_key->objectid, ref_key->offset);
6699                 goto out;
6700         }
6701
6702         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6703                                  struct btrfs_root_ref);
6704         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6705         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6706         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6707
6708         if (backref_namelen <= BTRFS_NAME_LEN) {
6709                 len = backref_namelen;
6710         } else {
6711                 len = BTRFS_NAME_LEN;
6712                 warning("%s[%llu %llu] ref_name too long",
6713                         key.type == BTRFS_ROOT_REF_KEY ?
6714                         "ROOT_REF" : "ROOT_BACKREF",
6715                         key.objectid, key.offset);
6716         }
6717         read_extent_buffer(path.nodes[0], backref_name,
6718                            (unsigned long)(backref + 1), len);
6719
6720         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6721             ref_namelen != backref_namelen ||
6722             strncmp(ref_name, backref_name, len)) {
6723                 err |= ROOT_REF_MISMATCH;
6724                 error("%s[%llu %llu] mismatch relative ref",
6725                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6726                       "ROOT_REF" : "ROOT_BACKREF",
6727                       ref_key->objectid, ref_key->offset);
6728         }
6729 out:
6730         btrfs_release_path(&path);
6731         return err;
6732 }
6733
6734 /*
6735  * Check all fs/file tree in low_memory mode.
6736  *
6737  * 1. for fs tree root item, call check_fs_root_v2()
6738  * 2. for fs tree root ref/backref, call check_root_ref()
6739  *
6740  * Return 0 if no error occurred.
6741  */
6742 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6743 {
6744         struct btrfs_root *tree_root = fs_info->tree_root;
6745         struct btrfs_root *cur_root = NULL;
6746         struct btrfs_path path;
6747         struct btrfs_key key;
6748         struct extent_buffer *node;
6749         unsigned int ext_ref;
6750         int slot;
6751         int ret;
6752         int err = 0;
6753
6754         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6755
6756         btrfs_init_path(&path);
6757         key.objectid = BTRFS_FS_TREE_OBJECTID;
6758         key.offset = 0;
6759         key.type = BTRFS_ROOT_ITEM_KEY;
6760
6761         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6762         if (ret < 0) {
6763                 err = ret;
6764                 goto out;
6765         } else if (ret > 0) {
6766                 err = -ENOENT;
6767                 goto out;
6768         }
6769
6770         while (1) {
6771                 node = path.nodes[0];
6772                 slot = path.slots[0];
6773                 btrfs_item_key_to_cpu(node, &key, slot);
6774                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6775                         goto out;
6776                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6777                     fs_root_objectid(key.objectid)) {
6778                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6779                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6780                                                                        &key);
6781                         } else {
6782                                 key.offset = (u64)-1;
6783                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6784                         }
6785
6786                         if (IS_ERR(cur_root)) {
6787                                 error("Fail to read fs/subvol tree: %lld",
6788                                       key.objectid);
6789                                 err = -EIO;
6790                                 goto next;
6791                         }
6792
6793                         ret = check_fs_root_v2(cur_root, ext_ref);
6794                         err |= ret;
6795
6796                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6797                                 btrfs_free_fs_root(cur_root);
6798                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6799                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6800                         ret = check_root_ref(tree_root, &key, node, slot);
6801                         err |= ret;
6802                 }
6803 next:
6804                 ret = btrfs_next_item(tree_root, &path);
6805                 if (ret > 0)
6806                         goto out;
6807                 if (ret < 0) {
6808                         err = ret;
6809                         goto out;
6810                 }
6811         }
6812
6813 out:
6814         btrfs_release_path(&path);
6815         return err;
6816 }
6817
6818 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6819                           struct cache_tree *root_cache)
6820 {
6821         int ret;
6822
6823         if (!ctx.progress_enabled)
6824                 fprintf(stderr, "checking fs roots\n");
6825         if (check_mode == CHECK_MODE_LOWMEM)
6826                 ret = check_fs_roots_v2(fs_info);
6827         else
6828                 ret = check_fs_roots(fs_info, root_cache);
6829
6830         return ret;
6831 }
6832
6833 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6834 {
6835         struct extent_backref *back, *tmp;
6836         struct tree_backref *tback;
6837         struct data_backref *dback;
6838         u64 found = 0;
6839         int err = 0;
6840
6841         rbtree_postorder_for_each_entry_safe(back, tmp,
6842                                              &rec->backref_tree, node) {
6843                 if (!back->found_extent_tree) {
6844                         err = 1;
6845                         if (!print_errs)
6846                                 goto out;
6847                         if (back->is_data) {
6848                                 dback = to_data_backref(back);
6849                                 fprintf(stderr, "Data backref %llu %s %llu"
6850                                         " owner %llu offset %llu num_refs %lu"
6851                                         " not found in extent tree\n",
6852                                         (unsigned long long)rec->start,
6853                                         back->full_backref ?
6854                                         "parent" : "root",
6855                                         back->full_backref ?
6856                                         (unsigned long long)dback->parent:
6857                                         (unsigned long long)dback->root,
6858                                         (unsigned long long)dback->owner,
6859                                         (unsigned long long)dback->offset,
6860                                         (unsigned long)dback->num_refs);
6861                         } else {
6862                                 tback = to_tree_backref(back);
6863                                 fprintf(stderr, "Tree backref %llu parent %llu"
6864                                         " root %llu not found in extent tree\n",
6865                                         (unsigned long long)rec->start,
6866                                         (unsigned long long)tback->parent,
6867                                         (unsigned long long)tback->root);
6868                         }
6869                 }
6870                 if (!back->is_data && !back->found_ref) {
6871                         err = 1;
6872                         if (!print_errs)
6873                                 goto out;
6874                         tback = to_tree_backref(back);
6875                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6876                                 (unsigned long long)rec->start,
6877                                 back->full_backref ? "parent" : "root",
6878                                 back->full_backref ?
6879                                 (unsigned long long)tback->parent :
6880                                 (unsigned long long)tback->root, back);
6881                 }
6882                 if (back->is_data) {
6883                         dback = to_data_backref(back);
6884                         if (dback->found_ref != dback->num_refs) {
6885                                 err = 1;
6886                                 if (!print_errs)
6887                                         goto out;
6888                                 fprintf(stderr, "Incorrect local backref count"
6889                                         " on %llu %s %llu owner %llu"
6890                                         " offset %llu found %u wanted %u back %p\n",
6891                                         (unsigned long long)rec->start,
6892                                         back->full_backref ?
6893                                         "parent" : "root",
6894                                         back->full_backref ?
6895                                         (unsigned long long)dback->parent:
6896                                         (unsigned long long)dback->root,
6897                                         (unsigned long long)dback->owner,
6898                                         (unsigned long long)dback->offset,
6899                                         dback->found_ref, dback->num_refs, back);
6900                         }
6901                         if (dback->disk_bytenr != rec->start) {
6902                                 err = 1;
6903                                 if (!print_errs)
6904                                         goto out;
6905                                 fprintf(stderr, "Backref disk bytenr does not"
6906                                         " match extent record, bytenr=%llu, "
6907                                         "ref bytenr=%llu\n",
6908                                         (unsigned long long)rec->start,
6909                                         (unsigned long long)dback->disk_bytenr);
6910                         }
6911
6912                         if (dback->bytes != rec->nr) {
6913                                 err = 1;
6914                                 if (!print_errs)
6915                                         goto out;
6916                                 fprintf(stderr, "Backref bytes do not match "
6917                                         "extent backref, bytenr=%llu, ref "
6918                                         "bytes=%llu, backref bytes=%llu\n",
6919                                         (unsigned long long)rec->start,
6920                                         (unsigned long long)rec->nr,
6921                                         (unsigned long long)dback->bytes);
6922                         }
6923                 }
6924                 if (!back->is_data) {
6925                         found += 1;
6926                 } else {
6927                         dback = to_data_backref(back);
6928                         found += dback->found_ref;
6929                 }
6930         }
6931         if (found != rec->refs) {
6932                 err = 1;
6933                 if (!print_errs)
6934                         goto out;
6935                 fprintf(stderr, "Incorrect global backref count "
6936                         "on %llu found %llu wanted %llu\n",
6937                         (unsigned long long)rec->start,
6938                         (unsigned long long)found,
6939                         (unsigned long long)rec->refs);
6940         }
6941 out:
6942         return err;
6943 }
6944
6945 static void __free_one_backref(struct rb_node *node)
6946 {
6947         struct extent_backref *back = rb_node_to_extent_backref(node);
6948
6949         free(back);
6950 }
6951
6952 static void free_all_extent_backrefs(struct extent_record *rec)
6953 {
6954         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6955 }
6956
6957 static void free_extent_record_cache(struct cache_tree *extent_cache)
6958 {
6959         struct cache_extent *cache;
6960         struct extent_record *rec;
6961
6962         while (1) {
6963                 cache = first_cache_extent(extent_cache);
6964                 if (!cache)
6965                         break;
6966                 rec = container_of(cache, struct extent_record, cache);
6967                 remove_cache_extent(extent_cache, cache);
6968                 free_all_extent_backrefs(rec);
6969                 free(rec);
6970         }
6971 }
6972
6973 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6974                                  struct extent_record *rec)
6975 {
6976         if (rec->content_checked && rec->owner_ref_checked &&
6977             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6978             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6979             !rec->bad_full_backref && !rec->crossing_stripes &&
6980             !rec->wrong_chunk_type) {
6981                 remove_cache_extent(extent_cache, &rec->cache);
6982                 free_all_extent_backrefs(rec);
6983                 list_del_init(&rec->list);
6984                 free(rec);
6985         }
6986         return 0;
6987 }
6988
6989 static int check_owner_ref(struct btrfs_root *root,
6990                             struct extent_record *rec,
6991                             struct extent_buffer *buf)
6992 {
6993         struct extent_backref *node, *tmp;
6994         struct tree_backref *back;
6995         struct btrfs_root *ref_root;
6996         struct btrfs_key key;
6997         struct btrfs_path path;
6998         struct extent_buffer *parent;
6999         int level;
7000         int found = 0;
7001         int ret;
7002
7003         rbtree_postorder_for_each_entry_safe(node, tmp,
7004                                              &rec->backref_tree, node) {
7005                 if (node->is_data)
7006                         continue;
7007                 if (!node->found_ref)
7008                         continue;
7009                 if (node->full_backref)
7010                         continue;
7011                 back = to_tree_backref(node);
7012                 if (btrfs_header_owner(buf) == back->root)
7013                         return 0;
7014         }
7015         BUG_ON(rec->is_root);
7016
7017         /* try to find the block by search corresponding fs tree */
7018         key.objectid = btrfs_header_owner(buf);
7019         key.type = BTRFS_ROOT_ITEM_KEY;
7020         key.offset = (u64)-1;
7021
7022         ref_root = btrfs_read_fs_root(root->fs_info, &key);
7023         if (IS_ERR(ref_root))
7024                 return 1;
7025
7026         level = btrfs_header_level(buf);
7027         if (level == 0)
7028                 btrfs_item_key_to_cpu(buf, &key, 0);
7029         else
7030                 btrfs_node_key_to_cpu(buf, &key, 0);
7031
7032         btrfs_init_path(&path);
7033         path.lowest_level = level + 1;
7034         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
7035         if (ret < 0)
7036                 return 0;
7037
7038         parent = path.nodes[level + 1];
7039         if (parent && buf->start == btrfs_node_blockptr(parent,
7040                                                         path.slots[level + 1]))
7041                 found = 1;
7042
7043         btrfs_release_path(&path);
7044         return found ? 0 : 1;
7045 }
7046
7047 static int is_extent_tree_record(struct extent_record *rec)
7048 {
7049         struct extent_backref *node, *tmp;
7050         struct tree_backref *back;
7051         int is_extent = 0;
7052
7053         rbtree_postorder_for_each_entry_safe(node, tmp,
7054                                              &rec->backref_tree, node) {
7055                 if (node->is_data)
7056                         return 0;
7057                 back = to_tree_backref(node);
7058                 if (node->full_backref)
7059                         return 0;
7060                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
7061                         is_extent = 1;
7062         }
7063         return is_extent;
7064 }
7065
7066
7067 static int record_bad_block_io(struct btrfs_fs_info *info,
7068                                struct cache_tree *extent_cache,
7069                                u64 start, u64 len)
7070 {
7071         struct extent_record *rec;
7072         struct cache_extent *cache;
7073         struct btrfs_key key;
7074
7075         cache = lookup_cache_extent(extent_cache, start, len);
7076         if (!cache)
7077                 return 0;
7078
7079         rec = container_of(cache, struct extent_record, cache);
7080         if (!is_extent_tree_record(rec))
7081                 return 0;
7082
7083         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
7084         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
7085 }
7086
7087 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
7088                        struct extent_buffer *buf, int slot)
7089 {
7090         if (btrfs_header_level(buf)) {
7091                 struct btrfs_key_ptr ptr1, ptr2;
7092
7093                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
7094                                    sizeof(struct btrfs_key_ptr));
7095                 read_extent_buffer(buf, &ptr2,
7096                                    btrfs_node_key_ptr_offset(slot + 1),
7097                                    sizeof(struct btrfs_key_ptr));
7098                 write_extent_buffer(buf, &ptr1,
7099                                     btrfs_node_key_ptr_offset(slot + 1),
7100                                     sizeof(struct btrfs_key_ptr));
7101                 write_extent_buffer(buf, &ptr2,
7102                                     btrfs_node_key_ptr_offset(slot),
7103                                     sizeof(struct btrfs_key_ptr));
7104                 if (slot == 0) {
7105                         struct btrfs_disk_key key;
7106                         btrfs_node_key(buf, &key, 0);
7107                         btrfs_fixup_low_keys(root, path, &key,
7108                                              btrfs_header_level(buf) + 1);
7109                 }
7110         } else {
7111                 struct btrfs_item *item1, *item2;
7112                 struct btrfs_key k1, k2;
7113                 char *item1_data, *item2_data;
7114                 u32 item1_offset, item2_offset, item1_size, item2_size;
7115
7116                 item1 = btrfs_item_nr(slot);
7117                 item2 = btrfs_item_nr(slot + 1);
7118                 btrfs_item_key_to_cpu(buf, &k1, slot);
7119                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
7120                 item1_offset = btrfs_item_offset(buf, item1);
7121                 item2_offset = btrfs_item_offset(buf, item2);
7122                 item1_size = btrfs_item_size(buf, item1);
7123                 item2_size = btrfs_item_size(buf, item2);
7124
7125                 item1_data = malloc(item1_size);
7126                 if (!item1_data)
7127                         return -ENOMEM;
7128                 item2_data = malloc(item2_size);
7129                 if (!item2_data) {
7130                         free(item1_data);
7131                         return -ENOMEM;
7132                 }
7133
7134                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
7135                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
7136
7137                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
7138                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
7139                 free(item1_data);
7140                 free(item2_data);
7141
7142                 btrfs_set_item_offset(buf, item1, item2_offset);
7143                 btrfs_set_item_offset(buf, item2, item1_offset);
7144                 btrfs_set_item_size(buf, item1, item2_size);
7145                 btrfs_set_item_size(buf, item2, item1_size);
7146
7147                 path->slots[0] = slot;
7148                 btrfs_set_item_key_unsafe(root, path, &k2);
7149                 path->slots[0] = slot + 1;
7150                 btrfs_set_item_key_unsafe(root, path, &k1);
7151         }
7152         return 0;
7153 }
7154
7155 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
7156 {
7157         struct extent_buffer *buf;
7158         struct btrfs_key k1, k2;
7159         int i;
7160         int level = path->lowest_level;
7161         int ret = -EIO;
7162
7163         buf = path->nodes[level];
7164         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
7165                 if (level) {
7166                         btrfs_node_key_to_cpu(buf, &k1, i);
7167                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
7168                 } else {
7169                         btrfs_item_key_to_cpu(buf, &k1, i);
7170                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
7171                 }
7172                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
7173                         continue;
7174                 ret = swap_values(root, path, buf, i);
7175                 if (ret)
7176                         break;
7177                 btrfs_mark_buffer_dirty(buf);
7178                 i = 0;
7179         }
7180         return ret;
7181 }
7182
7183 static int delete_bogus_item(struct btrfs_root *root,
7184                              struct btrfs_path *path,
7185                              struct extent_buffer *buf, int slot)
7186 {
7187         struct btrfs_key key;
7188         int nritems = btrfs_header_nritems(buf);
7189
7190         btrfs_item_key_to_cpu(buf, &key, slot);
7191
7192         /* These are all the keys we can deal with missing. */
7193         if (key.type != BTRFS_DIR_INDEX_KEY &&
7194             key.type != BTRFS_EXTENT_ITEM_KEY &&
7195             key.type != BTRFS_METADATA_ITEM_KEY &&
7196             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7197             key.type != BTRFS_EXTENT_DATA_REF_KEY)
7198                 return -1;
7199
7200         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
7201                (unsigned long long)key.objectid, key.type,
7202                (unsigned long long)key.offset, slot, buf->start);
7203         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
7204                               btrfs_item_nr_offset(slot + 1),
7205                               sizeof(struct btrfs_item) *
7206                               (nritems - slot - 1));
7207         btrfs_set_header_nritems(buf, nritems - 1);
7208         if (slot == 0) {
7209                 struct btrfs_disk_key disk_key;
7210
7211                 btrfs_item_key(buf, &disk_key, 0);
7212                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
7213         }
7214         btrfs_mark_buffer_dirty(buf);
7215         return 0;
7216 }
7217
7218 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
7219 {
7220         struct extent_buffer *buf;
7221         int i;
7222         int ret = 0;
7223
7224         /* We should only get this for leaves */
7225         BUG_ON(path->lowest_level);
7226         buf = path->nodes[0];
7227 again:
7228         for (i = 0; i < btrfs_header_nritems(buf); i++) {
7229                 unsigned int shift = 0, offset;
7230
7231                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
7232                     BTRFS_LEAF_DATA_SIZE(root)) {
7233                         if (btrfs_item_end_nr(buf, i) >
7234                             BTRFS_LEAF_DATA_SIZE(root)) {
7235                                 ret = delete_bogus_item(root, path, buf, i);
7236                                 if (!ret)
7237                                         goto again;
7238                                 fprintf(stderr, "item is off the end of the "
7239                                         "leaf, can't fix\n");
7240                                 ret = -EIO;
7241                                 break;
7242                         }
7243                         shift = BTRFS_LEAF_DATA_SIZE(root) -
7244                                 btrfs_item_end_nr(buf, i);
7245                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
7246                            btrfs_item_offset_nr(buf, i - 1)) {
7247                         if (btrfs_item_end_nr(buf, i) >
7248                             btrfs_item_offset_nr(buf, i - 1)) {
7249                                 ret = delete_bogus_item(root, path, buf, i);
7250                                 if (!ret)
7251                                         goto again;
7252                                 fprintf(stderr, "items overlap, can't fix\n");
7253                                 ret = -EIO;
7254                                 break;
7255                         }
7256                         shift = btrfs_item_offset_nr(buf, i - 1) -
7257                                 btrfs_item_end_nr(buf, i);
7258                 }
7259                 if (!shift)
7260                         continue;
7261
7262                 printf("Shifting item nr %d by %u bytes in block %llu\n",
7263                        i, shift, (unsigned long long)buf->start);
7264                 offset = btrfs_item_offset_nr(buf, i);
7265                 memmove_extent_buffer(buf,
7266                                       btrfs_leaf_data(buf) + offset + shift,
7267                                       btrfs_leaf_data(buf) + offset,
7268                                       btrfs_item_size_nr(buf, i));
7269                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
7270                                       offset + shift);
7271                 btrfs_mark_buffer_dirty(buf);
7272         }
7273
7274         /*
7275          * We may have moved things, in which case we want to exit so we don't
7276          * write those changes out.  Once we have proper abort functionality in
7277          * progs this can be changed to something nicer.
7278          */
7279         BUG_ON(ret);
7280         return ret;
7281 }
7282
7283 /*
7284  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
7285  * then just return -EIO.
7286  */
7287 static int try_to_fix_bad_block(struct btrfs_root *root,
7288                                 struct extent_buffer *buf,
7289                                 enum btrfs_tree_block_status status)
7290 {
7291         struct btrfs_trans_handle *trans;
7292         struct ulist *roots;
7293         struct ulist_node *node;
7294         struct btrfs_root *search_root;
7295         struct btrfs_path path;
7296         struct ulist_iterator iter;
7297         struct btrfs_key root_key, key;
7298         int ret;
7299
7300         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
7301             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7302                 return -EIO;
7303
7304         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
7305         if (ret)
7306                 return -EIO;
7307
7308         btrfs_init_path(&path);
7309         ULIST_ITER_INIT(&iter);
7310         while ((node = ulist_next(roots, &iter))) {
7311                 root_key.objectid = node->val;
7312                 root_key.type = BTRFS_ROOT_ITEM_KEY;
7313                 root_key.offset = (u64)-1;
7314
7315                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
7316                 if (IS_ERR(root)) {
7317                         ret = -EIO;
7318                         break;
7319                 }
7320
7321
7322                 trans = btrfs_start_transaction(search_root, 0);
7323                 if (IS_ERR(trans)) {
7324                         ret = PTR_ERR(trans);
7325                         break;
7326                 }
7327
7328                 path.lowest_level = btrfs_header_level(buf);
7329                 path.skip_check_block = 1;
7330                 if (path.lowest_level)
7331                         btrfs_node_key_to_cpu(buf, &key, 0);
7332                 else
7333                         btrfs_item_key_to_cpu(buf, &key, 0);
7334                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7335                 if (ret) {
7336                         ret = -EIO;
7337                         btrfs_commit_transaction(trans, search_root);
7338                         break;
7339                 }
7340                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7341                         ret = fix_key_order(search_root, &path);
7342                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7343                         ret = fix_item_offset(search_root, &path);
7344                 if (ret) {
7345                         btrfs_commit_transaction(trans, search_root);
7346                         break;
7347                 }
7348                 btrfs_release_path(&path);
7349                 btrfs_commit_transaction(trans, search_root);
7350         }
7351         ulist_free(roots);
7352         btrfs_release_path(&path);
7353         return ret;
7354 }
7355
7356 static int check_block(struct btrfs_root *root,
7357                        struct cache_tree *extent_cache,
7358                        struct extent_buffer *buf, u64 flags)
7359 {
7360         struct extent_record *rec;
7361         struct cache_extent *cache;
7362         struct btrfs_key key;
7363         enum btrfs_tree_block_status status;
7364         int ret = 0;
7365         int level;
7366
7367         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7368         if (!cache)
7369                 return 1;
7370         rec = container_of(cache, struct extent_record, cache);
7371         rec->generation = btrfs_header_generation(buf);
7372
7373         level = btrfs_header_level(buf);
7374         if (btrfs_header_nritems(buf) > 0) {
7375
7376                 if (level == 0)
7377                         btrfs_item_key_to_cpu(buf, &key, 0);
7378                 else
7379                         btrfs_node_key_to_cpu(buf, &key, 0);
7380
7381                 rec->info_objectid = key.objectid;
7382         }
7383         rec->info_level = level;
7384
7385         if (btrfs_is_leaf(buf))
7386                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7387         else
7388                 status = btrfs_check_node(root, &rec->parent_key, buf);
7389
7390         if (status != BTRFS_TREE_BLOCK_CLEAN) {
7391                 if (repair)
7392                         status = try_to_fix_bad_block(root, buf, status);
7393                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7394                         ret = -EIO;
7395                         fprintf(stderr, "bad block %llu\n",
7396                                 (unsigned long long)buf->start);
7397                 } else {
7398                         /*
7399                          * Signal to callers we need to start the scan over
7400                          * again since we'll have cowed blocks.
7401                          */
7402                         ret = -EAGAIN;
7403                 }
7404         } else {
7405                 rec->content_checked = 1;
7406                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7407                         rec->owner_ref_checked = 1;
7408                 else {
7409                         ret = check_owner_ref(root, rec, buf);
7410                         if (!ret)
7411                                 rec->owner_ref_checked = 1;
7412                 }
7413         }
7414         if (!ret)
7415                 maybe_free_extent_rec(extent_cache, rec);
7416         return ret;
7417 }
7418
7419 #if 0
7420 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7421                                                 u64 parent, u64 root)
7422 {
7423         struct list_head *cur = rec->backrefs.next;
7424         struct extent_backref *node;
7425         struct tree_backref *back;
7426
7427         while(cur != &rec->backrefs) {
7428                 node = to_extent_backref(cur);
7429                 cur = cur->next;
7430                 if (node->is_data)
7431                         continue;
7432                 back = to_tree_backref(node);
7433                 if (parent > 0) {
7434                         if (!node->full_backref)
7435                                 continue;
7436                         if (parent == back->parent)
7437                                 return back;
7438                 } else {
7439                         if (node->full_backref)
7440                                 continue;
7441                         if (back->root == root)
7442                                 return back;
7443                 }
7444         }
7445         return NULL;
7446 }
7447 #endif
7448
7449 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7450                                                 u64 parent, u64 root)
7451 {
7452         struct tree_backref *ref = malloc(sizeof(*ref));
7453
7454         if (!ref)
7455                 return NULL;
7456         memset(&ref->node, 0, sizeof(ref->node));
7457         if (parent > 0) {
7458                 ref->parent = parent;
7459                 ref->node.full_backref = 1;
7460         } else {
7461                 ref->root = root;
7462                 ref->node.full_backref = 0;
7463         }
7464
7465         return ref;
7466 }
7467
7468 #if 0
7469 static struct data_backref *find_data_backref(struct extent_record *rec,
7470                                                 u64 parent, u64 root,
7471                                                 u64 owner, u64 offset,
7472                                                 int found_ref,
7473                                                 u64 disk_bytenr, u64 bytes)
7474 {
7475         struct list_head *cur = rec->backrefs.next;
7476         struct extent_backref *node;
7477         struct data_backref *back;
7478
7479         while(cur != &rec->backrefs) {
7480                 node = to_extent_backref(cur);
7481                 cur = cur->next;
7482                 if (!node->is_data)
7483                         continue;
7484                 back = to_data_backref(node);
7485                 if (parent > 0) {
7486                         if (!node->full_backref)
7487                                 continue;
7488                         if (parent == back->parent)
7489                                 return back;
7490                 } else {
7491                         if (node->full_backref)
7492                                 continue;
7493                         if (back->root == root && back->owner == owner &&
7494                             back->offset == offset) {
7495                                 if (found_ref && node->found_ref &&
7496                                     (back->bytes != bytes ||
7497                                     back->disk_bytenr != disk_bytenr))
7498                                         continue;
7499                                 return back;
7500                         }
7501                 }
7502         }
7503         return NULL;
7504 }
7505 #endif
7506
7507 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7508                                                 u64 parent, u64 root,
7509                                                 u64 owner, u64 offset,
7510                                                 u64 max_size)
7511 {
7512         struct data_backref *ref = malloc(sizeof(*ref));
7513
7514         if (!ref)
7515                 return NULL;
7516         memset(&ref->node, 0, sizeof(ref->node));
7517         ref->node.is_data = 1;
7518
7519         if (parent > 0) {
7520                 ref->parent = parent;
7521                 ref->owner = 0;
7522                 ref->offset = 0;
7523                 ref->node.full_backref = 1;
7524         } else {
7525                 ref->root = root;
7526                 ref->owner = owner;
7527                 ref->offset = offset;
7528                 ref->node.full_backref = 0;
7529         }
7530         ref->bytes = max_size;
7531         ref->found_ref = 0;
7532         ref->num_refs = 0;
7533         if (max_size > rec->max_size)
7534                 rec->max_size = max_size;
7535         return ref;
7536 }
7537
7538 /* Check if the type of extent matches with its chunk */
7539 static void check_extent_type(struct extent_record *rec)
7540 {
7541         struct btrfs_block_group_cache *bg_cache;
7542
7543         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7544         if (!bg_cache)
7545                 return;
7546
7547         /* data extent, check chunk directly*/
7548         if (!rec->metadata) {
7549                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7550                         rec->wrong_chunk_type = 1;
7551                 return;
7552         }
7553
7554         /* metadata extent, check the obvious case first */
7555         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7556                                  BTRFS_BLOCK_GROUP_METADATA))) {
7557                 rec->wrong_chunk_type = 1;
7558                 return;
7559         }
7560
7561         /*
7562          * Check SYSTEM extent, as it's also marked as metadata, we can only
7563          * make sure it's a SYSTEM extent by its backref
7564          */
7565         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7566                 struct extent_backref *node;
7567                 struct tree_backref *tback;
7568                 u64 bg_type;
7569
7570                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7571                 if (node->is_data) {
7572                         /* tree block shouldn't have data backref */
7573                         rec->wrong_chunk_type = 1;
7574                         return;
7575                 }
7576                 tback = container_of(node, struct tree_backref, node);
7577
7578                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7579                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7580                 else
7581                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
7582                 if (!(bg_cache->flags & bg_type))
7583                         rec->wrong_chunk_type = 1;
7584         }
7585 }
7586
7587 /*
7588  * Allocate a new extent record, fill default values from @tmpl and insert int
7589  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7590  * the cache, otherwise it fails.
7591  */
7592 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7593                 struct extent_record *tmpl)
7594 {
7595         struct extent_record *rec;
7596         int ret = 0;
7597
7598         BUG_ON(tmpl->max_size == 0);
7599         rec = malloc(sizeof(*rec));
7600         if (!rec)
7601                 return -ENOMEM;
7602         rec->start = tmpl->start;
7603         rec->max_size = tmpl->max_size;
7604         rec->nr = max(tmpl->nr, tmpl->max_size);
7605         rec->found_rec = tmpl->found_rec;
7606         rec->content_checked = tmpl->content_checked;
7607         rec->owner_ref_checked = tmpl->owner_ref_checked;
7608         rec->num_duplicates = 0;
7609         rec->metadata = tmpl->metadata;
7610         rec->flag_block_full_backref = FLAG_UNSET;
7611         rec->bad_full_backref = 0;
7612         rec->crossing_stripes = 0;
7613         rec->wrong_chunk_type = 0;
7614         rec->is_root = tmpl->is_root;
7615         rec->refs = tmpl->refs;
7616         rec->extent_item_refs = tmpl->extent_item_refs;
7617         rec->parent_generation = tmpl->parent_generation;
7618         INIT_LIST_HEAD(&rec->backrefs);
7619         INIT_LIST_HEAD(&rec->dups);
7620         INIT_LIST_HEAD(&rec->list);
7621         rec->backref_tree = RB_ROOT;
7622         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7623         rec->cache.start = tmpl->start;
7624         rec->cache.size = tmpl->nr;
7625         ret = insert_cache_extent(extent_cache, &rec->cache);
7626         if (ret) {
7627                 free(rec);
7628                 return ret;
7629         }
7630         bytes_used += rec->nr;
7631
7632         if (tmpl->metadata)
7633                 rec->crossing_stripes = check_crossing_stripes(global_info,
7634                                 rec->start, global_info->nodesize);
7635         check_extent_type(rec);
7636         return ret;
7637 }
7638
7639 /*
7640  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7641  * some are hints:
7642  * - refs              - if found, increase refs
7643  * - is_root           - if found, set
7644  * - content_checked   - if found, set
7645  * - owner_ref_checked - if found, set
7646  *
7647  * If not found, create a new one, initialize and insert.
7648  */
7649 static int add_extent_rec(struct cache_tree *extent_cache,
7650                 struct extent_record *tmpl)
7651 {
7652         struct extent_record *rec;
7653         struct cache_extent *cache;
7654         int ret = 0;
7655         int dup = 0;
7656
7657         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7658         if (cache) {
7659                 rec = container_of(cache, struct extent_record, cache);
7660                 if (tmpl->refs)
7661                         rec->refs++;
7662                 if (rec->nr == 1)
7663                         rec->nr = max(tmpl->nr, tmpl->max_size);
7664
7665                 /*
7666                  * We need to make sure to reset nr to whatever the extent
7667                  * record says was the real size, this way we can compare it to
7668                  * the backrefs.
7669                  */
7670                 if (tmpl->found_rec) {
7671                         if (tmpl->start != rec->start || rec->found_rec) {
7672                                 struct extent_record *tmp;
7673
7674                                 dup = 1;
7675                                 if (list_empty(&rec->list))
7676                                         list_add_tail(&rec->list,
7677                                                       &duplicate_extents);
7678
7679                                 /*
7680                                  * We have to do this song and dance in case we
7681                                  * find an extent record that falls inside of
7682                                  * our current extent record but does not have
7683                                  * the same objectid.
7684                                  */
7685                                 tmp = malloc(sizeof(*tmp));
7686                                 if (!tmp)
7687                                         return -ENOMEM;
7688                                 tmp->start = tmpl->start;
7689                                 tmp->max_size = tmpl->max_size;
7690                                 tmp->nr = tmpl->nr;
7691                                 tmp->found_rec = 1;
7692                                 tmp->metadata = tmpl->metadata;
7693                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7694                                 INIT_LIST_HEAD(&tmp->list);
7695                                 list_add_tail(&tmp->list, &rec->dups);
7696                                 rec->num_duplicates++;
7697                         } else {
7698                                 rec->nr = tmpl->nr;
7699                                 rec->found_rec = 1;
7700                         }
7701                 }
7702
7703                 if (tmpl->extent_item_refs && !dup) {
7704                         if (rec->extent_item_refs) {
7705                                 fprintf(stderr, "block %llu rec "
7706                                         "extent_item_refs %llu, passed %llu\n",
7707                                         (unsigned long long)tmpl->start,
7708                                         (unsigned long long)
7709                                                         rec->extent_item_refs,
7710                                         (unsigned long long)tmpl->extent_item_refs);
7711                         }
7712                         rec->extent_item_refs = tmpl->extent_item_refs;
7713                 }
7714                 if (tmpl->is_root)
7715                         rec->is_root = 1;
7716                 if (tmpl->content_checked)
7717                         rec->content_checked = 1;
7718                 if (tmpl->owner_ref_checked)
7719                         rec->owner_ref_checked = 1;
7720                 memcpy(&rec->parent_key, &tmpl->parent_key,
7721                                 sizeof(tmpl->parent_key));
7722                 if (tmpl->parent_generation)
7723                         rec->parent_generation = tmpl->parent_generation;
7724                 if (rec->max_size < tmpl->max_size)
7725                         rec->max_size = tmpl->max_size;
7726
7727                 /*
7728                  * A metadata extent can't cross stripe_len boundary, otherwise
7729                  * kernel scrub won't be able to handle it.
7730                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7731                  * it.
7732                  */
7733                 if (tmpl->metadata)
7734                         rec->crossing_stripes = check_crossing_stripes(
7735                                         global_info, rec->start,
7736                                         global_info->nodesize);
7737                 check_extent_type(rec);
7738                 maybe_free_extent_rec(extent_cache, rec);
7739                 return ret;
7740         }
7741
7742         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7743
7744         return ret;
7745 }
7746
7747 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7748                             u64 parent, u64 root, int found_ref)
7749 {
7750         struct extent_record *rec;
7751         struct tree_backref *back;
7752         struct cache_extent *cache;
7753         int ret;
7754         bool insert = false;
7755
7756         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7757         if (!cache) {
7758                 struct extent_record tmpl;
7759
7760                 memset(&tmpl, 0, sizeof(tmpl));
7761                 tmpl.start = bytenr;
7762                 tmpl.nr = 1;
7763                 tmpl.metadata = 1;
7764                 tmpl.max_size = 1;
7765
7766                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7767                 if (ret)
7768                         return ret;
7769
7770                 /* really a bug in cache_extent implement now */
7771                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7772                 if (!cache)
7773                         return -ENOENT;
7774         }
7775
7776         rec = container_of(cache, struct extent_record, cache);
7777         if (rec->start != bytenr) {
7778                 /*
7779                  * Several cause, from unaligned bytenr to over lapping extents
7780                  */
7781                 return -EEXIST;
7782         }
7783
7784         back = find_tree_backref(rec, parent, root);
7785         if (!back) {
7786                 back = alloc_tree_backref(rec, parent, root);
7787                 if (!back)
7788                         return -ENOMEM;
7789                 insert = true;
7790         }
7791
7792         if (found_ref) {
7793                 if (back->node.found_ref) {
7794                         fprintf(stderr, "Extent back ref already exists "
7795                                 "for %llu parent %llu root %llu \n",
7796                                 (unsigned long long)bytenr,
7797                                 (unsigned long long)parent,
7798                                 (unsigned long long)root);
7799                 }
7800                 back->node.found_ref = 1;
7801         } else {
7802                 if (back->node.found_extent_tree) {
7803                         fprintf(stderr, "Extent back ref already exists "
7804                                 "for %llu parent %llu root %llu \n",
7805                                 (unsigned long long)bytenr,
7806                                 (unsigned long long)parent,
7807                                 (unsigned long long)root);
7808                 }
7809                 back->node.found_extent_tree = 1;
7810         }
7811         if (insert)
7812                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7813                         compare_extent_backref));
7814         check_extent_type(rec);
7815         maybe_free_extent_rec(extent_cache, rec);
7816         return 0;
7817 }
7818
7819 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7820                             u64 parent, u64 root, u64 owner, u64 offset,
7821                             u32 num_refs, int found_ref, u64 max_size)
7822 {
7823         struct extent_record *rec;
7824         struct data_backref *back;
7825         struct cache_extent *cache;
7826         int ret;
7827         bool insert = false;
7828
7829         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7830         if (!cache) {
7831                 struct extent_record tmpl;
7832
7833                 memset(&tmpl, 0, sizeof(tmpl));
7834                 tmpl.start = bytenr;
7835                 tmpl.nr = 1;
7836                 tmpl.max_size = max_size;
7837
7838                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7839                 if (ret)
7840                         return ret;
7841
7842                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7843                 if (!cache)
7844                         abort();
7845         }
7846
7847         rec = container_of(cache, struct extent_record, cache);
7848         if (rec->max_size < max_size)
7849                 rec->max_size = max_size;
7850
7851         /*
7852          * If found_ref is set then max_size is the real size and must match the
7853          * existing refs.  So if we have already found a ref then we need to
7854          * make sure that this ref matches the existing one, otherwise we need
7855          * to add a new backref so we can notice that the backrefs don't match
7856          * and we need to figure out who is telling the truth.  This is to
7857          * account for that awful fsync bug I introduced where we'd end up with
7858          * a btrfs_file_extent_item that would have its length include multiple
7859          * prealloc extents or point inside of a prealloc extent.
7860          */
7861         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7862                                  bytenr, max_size);
7863         if (!back) {
7864                 back = alloc_data_backref(rec, parent, root, owner, offset,
7865                                           max_size);
7866                 BUG_ON(!back);
7867                 insert = true;
7868         }
7869
7870         if (found_ref) {
7871                 BUG_ON(num_refs != 1);
7872                 if (back->node.found_ref)
7873                         BUG_ON(back->bytes != max_size);
7874                 back->node.found_ref = 1;
7875                 back->found_ref += 1;
7876                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7877                         back->bytes = max_size;
7878                         back->disk_bytenr = bytenr;
7879
7880                         /* Need to reinsert if not already in the tree */
7881                         if (!insert) {
7882                                 rb_erase(&back->node.node, &rec->backref_tree);
7883                                 insert = true;
7884                         }
7885                 }
7886                 rec->refs += 1;
7887                 rec->content_checked = 1;
7888                 rec->owner_ref_checked = 1;
7889         } else {
7890                 if (back->node.found_extent_tree) {
7891                         fprintf(stderr, "Extent back ref already exists "
7892                                 "for %llu parent %llu root %llu "
7893                                 "owner %llu offset %llu num_refs %lu\n",
7894                                 (unsigned long long)bytenr,
7895                                 (unsigned long long)parent,
7896                                 (unsigned long long)root,
7897                                 (unsigned long long)owner,
7898                                 (unsigned long long)offset,
7899                                 (unsigned long)num_refs);
7900                 }
7901                 back->num_refs = num_refs;
7902                 back->node.found_extent_tree = 1;
7903         }
7904         if (insert)
7905                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7906                         compare_extent_backref));
7907
7908         maybe_free_extent_rec(extent_cache, rec);
7909         return 0;
7910 }
7911
7912 static int add_pending(struct cache_tree *pending,
7913                        struct cache_tree *seen, u64 bytenr, u32 size)
7914 {
7915         int ret;
7916         ret = add_cache_extent(seen, bytenr, size);
7917         if (ret)
7918                 return ret;
7919         add_cache_extent(pending, bytenr, size);
7920         return 0;
7921 }
7922
7923 static int pick_next_pending(struct cache_tree *pending,
7924                         struct cache_tree *reada,
7925                         struct cache_tree *nodes,
7926                         u64 last, struct block_info *bits, int bits_nr,
7927                         int *reada_bits)
7928 {
7929         unsigned long node_start = last;
7930         struct cache_extent *cache;
7931         int ret;
7932
7933         cache = search_cache_extent(reada, 0);
7934         if (cache) {
7935                 bits[0].start = cache->start;
7936                 bits[0].size = cache->size;
7937                 *reada_bits = 1;
7938                 return 1;
7939         }
7940         *reada_bits = 0;
7941         if (node_start > 32768)
7942                 node_start -= 32768;
7943
7944         cache = search_cache_extent(nodes, node_start);
7945         if (!cache)
7946                 cache = search_cache_extent(nodes, 0);
7947
7948         if (!cache) {
7949                  cache = search_cache_extent(pending, 0);
7950                  if (!cache)
7951                          return 0;
7952                  ret = 0;
7953                  do {
7954                          bits[ret].start = cache->start;
7955                          bits[ret].size = cache->size;
7956                          cache = next_cache_extent(cache);
7957                          ret++;
7958                  } while (cache && ret < bits_nr);
7959                  return ret;
7960         }
7961
7962         ret = 0;
7963         do {
7964                 bits[ret].start = cache->start;
7965                 bits[ret].size = cache->size;
7966                 cache = next_cache_extent(cache);
7967                 ret++;
7968         } while (cache && ret < bits_nr);
7969
7970         if (bits_nr - ret > 8) {
7971                 u64 lookup = bits[0].start + bits[0].size;
7972                 struct cache_extent *next;
7973                 next = search_cache_extent(pending, lookup);
7974                 while(next) {
7975                         if (next->start - lookup > 32768)
7976                                 break;
7977                         bits[ret].start = next->start;
7978                         bits[ret].size = next->size;
7979                         lookup = next->start + next->size;
7980                         ret++;
7981                         if (ret == bits_nr)
7982                                 break;
7983                         next = next_cache_extent(next);
7984                         if (!next)
7985                                 break;
7986                 }
7987         }
7988         return ret;
7989 }
7990
7991 static void free_chunk_record(struct cache_extent *cache)
7992 {
7993         struct chunk_record *rec;
7994
7995         rec = container_of(cache, struct chunk_record, cache);
7996         list_del_init(&rec->list);
7997         list_del_init(&rec->dextents);
7998         free(rec);
7999 }
8000
8001 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
8002 {
8003         cache_tree_free_extents(chunk_cache, free_chunk_record);
8004 }
8005
8006 static void free_device_record(struct rb_node *node)
8007 {
8008         struct device_record *rec;
8009
8010         rec = container_of(node, struct device_record, node);
8011         free(rec);
8012 }
8013
8014 FREE_RB_BASED_TREE(device_cache, free_device_record);
8015
8016 int insert_block_group_record(struct block_group_tree *tree,
8017                               struct block_group_record *bg_rec)
8018 {
8019         int ret;
8020
8021         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
8022         if (ret)
8023                 return ret;
8024
8025         list_add_tail(&bg_rec->list, &tree->block_groups);
8026         return 0;
8027 }
8028
8029 static void free_block_group_record(struct cache_extent *cache)
8030 {
8031         struct block_group_record *rec;
8032
8033         rec = container_of(cache, struct block_group_record, cache);
8034         list_del_init(&rec->list);
8035         free(rec);
8036 }
8037
8038 void free_block_group_tree(struct block_group_tree *tree)
8039 {
8040         cache_tree_free_extents(&tree->tree, free_block_group_record);
8041 }
8042
8043 int insert_device_extent_record(struct device_extent_tree *tree,
8044                                 struct device_extent_record *de_rec)
8045 {
8046         int ret;
8047
8048         /*
8049          * Device extent is a bit different from the other extents, because
8050          * the extents which belong to the different devices may have the
8051          * same start and size, so we need use the special extent cache
8052          * search/insert functions.
8053          */
8054         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
8055         if (ret)
8056                 return ret;
8057
8058         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
8059         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
8060         return 0;
8061 }
8062
8063 static void free_device_extent_record(struct cache_extent *cache)
8064 {
8065         struct device_extent_record *rec;
8066
8067         rec = container_of(cache, struct device_extent_record, cache);
8068         if (!list_empty(&rec->chunk_list))
8069                 list_del_init(&rec->chunk_list);
8070         if (!list_empty(&rec->device_list))
8071                 list_del_init(&rec->device_list);
8072         free(rec);
8073 }
8074
8075 void free_device_extent_tree(struct device_extent_tree *tree)
8076 {
8077         cache_tree_free_extents(&tree->tree, free_device_extent_record);
8078 }
8079
8080 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8081 static int process_extent_ref_v0(struct cache_tree *extent_cache,
8082                                  struct extent_buffer *leaf, int slot)
8083 {
8084         struct btrfs_extent_ref_v0 *ref0;
8085         struct btrfs_key key;
8086         int ret;
8087
8088         btrfs_item_key_to_cpu(leaf, &key, slot);
8089         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
8090         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
8091                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
8092                                 0, 0);
8093         } else {
8094                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
8095                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
8096         }
8097         return ret;
8098 }
8099 #endif
8100
8101 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
8102                                             struct btrfs_key *key,
8103                                             int slot)
8104 {
8105         struct btrfs_chunk *ptr;
8106         struct chunk_record *rec;
8107         int num_stripes, i;
8108
8109         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
8110         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
8111
8112         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
8113         if (!rec) {
8114                 fprintf(stderr, "memory allocation failed\n");
8115                 exit(-1);
8116         }
8117
8118         INIT_LIST_HEAD(&rec->list);
8119         INIT_LIST_HEAD(&rec->dextents);
8120         rec->bg_rec = NULL;
8121
8122         rec->cache.start = key->offset;
8123         rec->cache.size = btrfs_chunk_length(leaf, ptr);
8124
8125         rec->generation = btrfs_header_generation(leaf);
8126
8127         rec->objectid = key->objectid;
8128         rec->type = key->type;
8129         rec->offset = key->offset;
8130
8131         rec->length = rec->cache.size;
8132         rec->owner = btrfs_chunk_owner(leaf, ptr);
8133         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
8134         rec->type_flags = btrfs_chunk_type(leaf, ptr);
8135         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
8136         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
8137         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
8138         rec->num_stripes = num_stripes;
8139         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
8140
8141         for (i = 0; i < rec->num_stripes; ++i) {
8142                 rec->stripes[i].devid =
8143                         btrfs_stripe_devid_nr(leaf, ptr, i);
8144                 rec->stripes[i].offset =
8145                         btrfs_stripe_offset_nr(leaf, ptr, i);
8146                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
8147                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
8148                                 BTRFS_UUID_SIZE);
8149         }
8150
8151         return rec;
8152 }
8153
8154 static int process_chunk_item(struct cache_tree *chunk_cache,
8155                               struct btrfs_key *key, struct extent_buffer *eb,
8156                               int slot)
8157 {
8158         struct chunk_record *rec;
8159         struct btrfs_chunk *chunk;
8160         int ret = 0;
8161
8162         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
8163         /*
8164          * Do extra check for this chunk item,
8165          *
8166          * It's still possible one can craft a leaf with CHUNK_ITEM, with
8167          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
8168          * and owner<->key_type check.
8169          */
8170         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
8171                                       key->offset);
8172         if (ret < 0) {
8173                 error("chunk(%llu, %llu) is not valid, ignore it",
8174                       key->offset, btrfs_chunk_length(eb, chunk));
8175                 return 0;
8176         }
8177         rec = btrfs_new_chunk_record(eb, key, slot);
8178         ret = insert_cache_extent(chunk_cache, &rec->cache);
8179         if (ret) {
8180                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
8181                         rec->offset, rec->length);
8182                 free(rec);
8183         }
8184
8185         return ret;
8186 }
8187
8188 static int process_device_item(struct rb_root *dev_cache,
8189                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
8190 {
8191         struct btrfs_dev_item *ptr;
8192         struct device_record *rec;
8193         int ret = 0;
8194
8195         ptr = btrfs_item_ptr(eb,
8196                 slot, struct btrfs_dev_item);
8197
8198         rec = malloc(sizeof(*rec));
8199         if (!rec) {
8200                 fprintf(stderr, "memory allocation failed\n");
8201                 return -ENOMEM;
8202         }
8203
8204         rec->devid = key->offset;
8205         rec->generation = btrfs_header_generation(eb);
8206
8207         rec->objectid = key->objectid;
8208         rec->type = key->type;
8209         rec->offset = key->offset;
8210
8211         rec->devid = btrfs_device_id(eb, ptr);
8212         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
8213         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
8214
8215         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
8216         if (ret) {
8217                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
8218                 free(rec);
8219         }
8220
8221         return ret;
8222 }
8223
8224 struct block_group_record *
8225 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
8226                              int slot)
8227 {
8228         struct btrfs_block_group_item *ptr;
8229         struct block_group_record *rec;
8230
8231         rec = calloc(1, sizeof(*rec));
8232         if (!rec) {
8233                 fprintf(stderr, "memory allocation failed\n");
8234                 exit(-1);
8235         }
8236
8237         rec->cache.start = key->objectid;
8238         rec->cache.size = key->offset;
8239
8240         rec->generation = btrfs_header_generation(leaf);
8241
8242         rec->objectid = key->objectid;
8243         rec->type = key->type;
8244         rec->offset = key->offset;
8245
8246         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
8247         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
8248
8249         INIT_LIST_HEAD(&rec->list);
8250
8251         return rec;
8252 }
8253
8254 static int process_block_group_item(struct block_group_tree *block_group_cache,
8255                                     struct btrfs_key *key,
8256                                     struct extent_buffer *eb, int slot)
8257 {
8258         struct block_group_record *rec;
8259         int ret = 0;
8260
8261         rec = btrfs_new_block_group_record(eb, key, slot);
8262         ret = insert_block_group_record(block_group_cache, rec);
8263         if (ret) {
8264                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
8265                         rec->objectid, rec->offset);
8266                 free(rec);
8267         }
8268
8269         return ret;
8270 }
8271
8272 struct device_extent_record *
8273 btrfs_new_device_extent_record(struct extent_buffer *leaf,
8274                                struct btrfs_key *key, int slot)
8275 {
8276         struct device_extent_record *rec;
8277         struct btrfs_dev_extent *ptr;
8278
8279         rec = calloc(1, sizeof(*rec));
8280         if (!rec) {
8281                 fprintf(stderr, "memory allocation failed\n");
8282                 exit(-1);
8283         }
8284
8285         rec->cache.objectid = key->objectid;
8286         rec->cache.start = key->offset;
8287
8288         rec->generation = btrfs_header_generation(leaf);
8289
8290         rec->objectid = key->objectid;
8291         rec->type = key->type;
8292         rec->offset = key->offset;
8293
8294         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
8295         rec->chunk_objecteid =
8296                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
8297         rec->chunk_offset =
8298                 btrfs_dev_extent_chunk_offset(leaf, ptr);
8299         rec->length = btrfs_dev_extent_length(leaf, ptr);
8300         rec->cache.size = rec->length;
8301
8302         INIT_LIST_HEAD(&rec->chunk_list);
8303         INIT_LIST_HEAD(&rec->device_list);
8304
8305         return rec;
8306 }
8307
8308 static int
8309 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
8310                            struct btrfs_key *key, struct extent_buffer *eb,
8311                            int slot)
8312 {
8313         struct device_extent_record *rec;
8314         int ret;
8315
8316         rec = btrfs_new_device_extent_record(eb, key, slot);
8317         ret = insert_device_extent_record(dev_extent_cache, rec);
8318         if (ret) {
8319                 fprintf(stderr,
8320                         "Device extent[%llu, %llu, %llu] existed.\n",
8321                         rec->objectid, rec->offset, rec->length);
8322                 free(rec);
8323         }
8324
8325         return ret;
8326 }
8327
8328 static int process_extent_item(struct btrfs_root *root,
8329                                struct cache_tree *extent_cache,
8330                                struct extent_buffer *eb, int slot)
8331 {
8332         struct btrfs_extent_item *ei;
8333         struct btrfs_extent_inline_ref *iref;
8334         struct btrfs_extent_data_ref *dref;
8335         struct btrfs_shared_data_ref *sref;
8336         struct btrfs_key key;
8337         struct extent_record tmpl;
8338         unsigned long end;
8339         unsigned long ptr;
8340         int ret;
8341         int type;
8342         u32 item_size = btrfs_item_size_nr(eb, slot);
8343         u64 refs = 0;
8344         u64 offset;
8345         u64 num_bytes;
8346         int metadata = 0;
8347
8348         btrfs_item_key_to_cpu(eb, &key, slot);
8349
8350         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8351                 metadata = 1;
8352                 num_bytes = root->fs_info->nodesize;
8353         } else {
8354                 num_bytes = key.offset;
8355         }
8356
8357         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8358                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8359                       key.objectid, root->fs_info->sectorsize);
8360                 return -EIO;
8361         }
8362         if (item_size < sizeof(*ei)) {
8363 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8364                 struct btrfs_extent_item_v0 *ei0;
8365                 BUG_ON(item_size != sizeof(*ei0));
8366                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8367                 refs = btrfs_extent_refs_v0(eb, ei0);
8368 #else
8369                 BUG();
8370 #endif
8371                 memset(&tmpl, 0, sizeof(tmpl));
8372                 tmpl.start = key.objectid;
8373                 tmpl.nr = num_bytes;
8374                 tmpl.extent_item_refs = refs;
8375                 tmpl.metadata = metadata;
8376                 tmpl.found_rec = 1;
8377                 tmpl.max_size = num_bytes;
8378
8379                 return add_extent_rec(extent_cache, &tmpl);
8380         }
8381
8382         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8383         refs = btrfs_extent_refs(eb, ei);
8384         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8385                 metadata = 1;
8386         else
8387                 metadata = 0;
8388         if (metadata && num_bytes != root->fs_info->nodesize) {
8389                 error("ignore invalid metadata extent, length %llu does not equal to %u",
8390                       num_bytes, root->fs_info->nodesize);
8391                 return -EIO;
8392         }
8393         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8394                 error("ignore invalid data extent, length %llu is not aligned to %u",
8395                       num_bytes, root->fs_info->sectorsize);
8396                 return -EIO;
8397         }
8398
8399         memset(&tmpl, 0, sizeof(tmpl));
8400         tmpl.start = key.objectid;
8401         tmpl.nr = num_bytes;
8402         tmpl.extent_item_refs = refs;
8403         tmpl.metadata = metadata;
8404         tmpl.found_rec = 1;
8405         tmpl.max_size = num_bytes;
8406         add_extent_rec(extent_cache, &tmpl);
8407
8408         ptr = (unsigned long)(ei + 1);
8409         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8410             key.type == BTRFS_EXTENT_ITEM_KEY)
8411                 ptr += sizeof(struct btrfs_tree_block_info);
8412
8413         end = (unsigned long)ei + item_size;
8414         while (ptr < end) {
8415                 iref = (struct btrfs_extent_inline_ref *)ptr;
8416                 type = btrfs_extent_inline_ref_type(eb, iref);
8417                 offset = btrfs_extent_inline_ref_offset(eb, iref);
8418                 switch (type) {
8419                 case BTRFS_TREE_BLOCK_REF_KEY:
8420                         ret = add_tree_backref(extent_cache, key.objectid,
8421                                         0, offset, 0);
8422                         if (ret < 0)
8423                                 error(
8424                         "add_tree_backref failed (extent items tree block): %s",
8425                                       strerror(-ret));
8426                         break;
8427                 case BTRFS_SHARED_BLOCK_REF_KEY:
8428                         ret = add_tree_backref(extent_cache, key.objectid,
8429                                         offset, 0, 0);
8430                         if (ret < 0)
8431                                 error(
8432                         "add_tree_backref failed (extent items shared block): %s",
8433                                       strerror(-ret));
8434                         break;
8435                 case BTRFS_EXTENT_DATA_REF_KEY:
8436                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8437                         add_data_backref(extent_cache, key.objectid, 0,
8438                                         btrfs_extent_data_ref_root(eb, dref),
8439                                         btrfs_extent_data_ref_objectid(eb,
8440                                                                        dref),
8441                                         btrfs_extent_data_ref_offset(eb, dref),
8442                                         btrfs_extent_data_ref_count(eb, dref),
8443                                         0, num_bytes);
8444                         break;
8445                 case BTRFS_SHARED_DATA_REF_KEY:
8446                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
8447                         add_data_backref(extent_cache, key.objectid, offset,
8448                                         0, 0, 0,
8449                                         btrfs_shared_data_ref_count(eb, sref),
8450                                         0, num_bytes);
8451                         break;
8452                 default:
8453                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8454                                 key.objectid, key.type, num_bytes);
8455                         goto out;
8456                 }
8457                 ptr += btrfs_extent_inline_ref_size(type);
8458         }
8459         WARN_ON(ptr > end);
8460 out:
8461         return 0;
8462 }
8463
8464 static int check_cache_range(struct btrfs_root *root,
8465                              struct btrfs_block_group_cache *cache,
8466                              u64 offset, u64 bytes)
8467 {
8468         struct btrfs_free_space *entry;
8469         u64 *logical;
8470         u64 bytenr;
8471         int stripe_len;
8472         int i, nr, ret;
8473
8474         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8475                 bytenr = btrfs_sb_offset(i);
8476                 ret = btrfs_rmap_block(root->fs_info,
8477                                        cache->key.objectid, bytenr, 0,
8478                                        &logical, &nr, &stripe_len);
8479                 if (ret)
8480                         return ret;
8481
8482                 while (nr--) {
8483                         if (logical[nr] + stripe_len <= offset)
8484                                 continue;
8485                         if (offset + bytes <= logical[nr])
8486                                 continue;
8487                         if (logical[nr] == offset) {
8488                                 if (stripe_len >= bytes) {
8489                                         free(logical);
8490                                         return 0;
8491                                 }
8492                                 bytes -= stripe_len;
8493                                 offset += stripe_len;
8494                         } else if (logical[nr] < offset) {
8495                                 if (logical[nr] + stripe_len >=
8496                                     offset + bytes) {
8497                                         free(logical);
8498                                         return 0;
8499                                 }
8500                                 bytes = (offset + bytes) -
8501                                         (logical[nr] + stripe_len);
8502                                 offset = logical[nr] + stripe_len;
8503                         } else {
8504                                 /*
8505                                  * Could be tricky, the super may land in the
8506                                  * middle of the area we're checking.  First
8507                                  * check the easiest case, it's at the end.
8508                                  */
8509                                 if (logical[nr] + stripe_len >=
8510                                     bytes + offset) {
8511                                         bytes = logical[nr] - offset;
8512                                         continue;
8513                                 }
8514
8515                                 /* Check the left side */
8516                                 ret = check_cache_range(root, cache,
8517                                                         offset,
8518                                                         logical[nr] - offset);
8519                                 if (ret) {
8520                                         free(logical);
8521                                         return ret;
8522                                 }
8523
8524                                 /* Now we continue with the right side */
8525                                 bytes = (offset + bytes) -
8526                                         (logical[nr] + stripe_len);
8527                                 offset = logical[nr] + stripe_len;
8528                         }
8529                 }
8530
8531                 free(logical);
8532         }
8533
8534         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8535         if (!entry) {
8536                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8537                         offset, offset+bytes);
8538                 return -EINVAL;
8539         }
8540
8541         if (entry->offset != offset) {
8542                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8543                         entry->offset);
8544                 return -EINVAL;
8545         }
8546
8547         if (entry->bytes != bytes) {
8548                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8549                         bytes, entry->bytes, offset);
8550                 return -EINVAL;
8551         }
8552
8553         unlink_free_space(cache->free_space_ctl, entry);
8554         free(entry);
8555         return 0;
8556 }
8557
8558 static int verify_space_cache(struct btrfs_root *root,
8559                               struct btrfs_block_group_cache *cache)
8560 {
8561         struct btrfs_path path;
8562         struct extent_buffer *leaf;
8563         struct btrfs_key key;
8564         u64 last;
8565         int ret = 0;
8566
8567         root = root->fs_info->extent_root;
8568
8569         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8570
8571         btrfs_init_path(&path);
8572         key.objectid = last;
8573         key.offset = 0;
8574         key.type = BTRFS_EXTENT_ITEM_KEY;
8575         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8576         if (ret < 0)
8577                 goto out;
8578         ret = 0;
8579         while (1) {
8580                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8581                         ret = btrfs_next_leaf(root, &path);
8582                         if (ret < 0)
8583                                 goto out;
8584                         if (ret > 0) {
8585                                 ret = 0;
8586                                 break;
8587                         }
8588                 }
8589                 leaf = path.nodes[0];
8590                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8591                 if (key.objectid >= cache->key.offset + cache->key.objectid)
8592                         break;
8593                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8594                     key.type != BTRFS_METADATA_ITEM_KEY) {
8595                         path.slots[0]++;
8596                         continue;
8597                 }
8598
8599                 if (last == key.objectid) {
8600                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
8601                                 last = key.objectid + key.offset;
8602                         else
8603                                 last = key.objectid + root->fs_info->nodesize;
8604                         path.slots[0]++;
8605                         continue;
8606                 }
8607
8608                 ret = check_cache_range(root, cache, last,
8609                                         key.objectid - last);
8610                 if (ret)
8611                         break;
8612                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8613                         last = key.objectid + key.offset;
8614                 else
8615                         last = key.objectid + root->fs_info->nodesize;
8616                 path.slots[0]++;
8617         }
8618
8619         if (last < cache->key.objectid + cache->key.offset)
8620                 ret = check_cache_range(root, cache, last,
8621                                         cache->key.objectid +
8622                                         cache->key.offset - last);
8623
8624 out:
8625         btrfs_release_path(&path);
8626
8627         if (!ret &&
8628             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8629                 fprintf(stderr, "There are still entries left in the space "
8630                         "cache\n");
8631                 ret = -EINVAL;
8632         }
8633
8634         return ret;
8635 }
8636
8637 static int check_space_cache(struct btrfs_root *root)
8638 {
8639         struct btrfs_block_group_cache *cache;
8640         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8641         int ret;
8642         int error = 0;
8643
8644         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8645             btrfs_super_generation(root->fs_info->super_copy) !=
8646             btrfs_super_cache_generation(root->fs_info->super_copy)) {
8647                 printf("cache and super generation don't match, space cache "
8648                        "will be invalidated\n");
8649                 return 0;
8650         }
8651
8652         if (ctx.progress_enabled) {
8653                 ctx.tp = TASK_FREE_SPACE;
8654                 task_start(ctx.info);
8655         }
8656
8657         while (1) {
8658                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8659                 if (!cache)
8660                         break;
8661
8662                 start = cache->key.objectid + cache->key.offset;
8663                 if (!cache->free_space_ctl) {
8664                         if (btrfs_init_free_space_ctl(cache,
8665                                                 root->fs_info->sectorsize)) {
8666                                 ret = -ENOMEM;
8667                                 break;
8668                         }
8669                 } else {
8670                         btrfs_remove_free_space_cache(cache);
8671                 }
8672
8673                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8674                         ret = exclude_super_stripes(root, cache);
8675                         if (ret) {
8676                                 fprintf(stderr, "could not exclude super stripes: %s\n",
8677                                         strerror(-ret));
8678                                 error++;
8679                                 continue;
8680                         }
8681                         ret = load_free_space_tree(root->fs_info, cache);
8682                         free_excluded_extents(root, cache);
8683                         if (ret < 0) {
8684                                 fprintf(stderr, "could not load free space tree: %s\n",
8685                                         strerror(-ret));
8686                                 error++;
8687                                 continue;
8688                         }
8689                         error += ret;
8690                 } else {
8691                         ret = load_free_space_cache(root->fs_info, cache);
8692                         if (!ret)
8693                                 continue;
8694                 }
8695
8696                 ret = verify_space_cache(root, cache);
8697                 if (ret) {
8698                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8699                                 cache->key.objectid);
8700                         error++;
8701                 }
8702         }
8703
8704         task_stop(ctx.info);
8705
8706         return error ? -EINVAL : 0;
8707 }
8708
8709 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8710                         u64 num_bytes, unsigned long leaf_offset,
8711                         struct extent_buffer *eb) {
8712
8713         struct btrfs_fs_info *fs_info = root->fs_info;
8714         u64 offset = 0;
8715         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8716         char *data;
8717         unsigned long csum_offset;
8718         u32 csum;
8719         u32 csum_expected;
8720         u64 read_len;
8721         u64 data_checked = 0;
8722         u64 tmp;
8723         int ret = 0;
8724         int mirror;
8725         int num_copies;
8726
8727         if (num_bytes % fs_info->sectorsize)
8728                 return -EINVAL;
8729
8730         data = malloc(num_bytes);
8731         if (!data)
8732                 return -ENOMEM;
8733
8734         while (offset < num_bytes) {
8735                 mirror = 0;
8736 again:
8737                 read_len = num_bytes - offset;
8738                 /* read as much space once a time */
8739                 ret = read_extent_data(fs_info, data + offset,
8740                                 bytenr + offset, &read_len, mirror);
8741                 if (ret)
8742                         goto out;
8743                 data_checked = 0;
8744                 /* verify every 4k data's checksum */
8745                 while (data_checked < read_len) {
8746                         csum = ~(u32)0;
8747                         tmp = offset + data_checked;
8748
8749                         csum = btrfs_csum_data((char *)data + tmp,
8750                                                csum, fs_info->sectorsize);
8751                         btrfs_csum_final(csum, (u8 *)&csum);
8752
8753                         csum_offset = leaf_offset +
8754                                  tmp / fs_info->sectorsize * csum_size;
8755                         read_extent_buffer(eb, (char *)&csum_expected,
8756                                            csum_offset, csum_size);
8757                         /* try another mirror */
8758                         if (csum != csum_expected) {
8759                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8760                                                 mirror, bytenr + tmp,
8761                                                 csum, csum_expected);
8762                                 num_copies = btrfs_num_copies(root->fs_info,
8763                                                 bytenr, num_bytes);
8764                                 if (mirror < num_copies - 1) {
8765                                         mirror += 1;
8766                                         goto again;
8767                                 }
8768                         }
8769                         data_checked += fs_info->sectorsize;
8770                 }
8771                 offset += read_len;
8772         }
8773 out:
8774         free(data);
8775         return ret;
8776 }
8777
8778 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8779                                u64 num_bytes)
8780 {
8781         struct btrfs_path path;
8782         struct extent_buffer *leaf;
8783         struct btrfs_key key;
8784         int ret;
8785
8786         btrfs_init_path(&path);
8787         key.objectid = bytenr;
8788         key.type = BTRFS_EXTENT_ITEM_KEY;
8789         key.offset = (u64)-1;
8790
8791 again:
8792         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8793                                 0, 0);
8794         if (ret < 0) {
8795                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8796                 btrfs_release_path(&path);
8797                 return ret;
8798         } else if (ret) {
8799                 if (path.slots[0] > 0) {
8800                         path.slots[0]--;
8801                 } else {
8802                         ret = btrfs_prev_leaf(root, &path);
8803                         if (ret < 0) {
8804                                 goto out;
8805                         } else if (ret > 0) {
8806                                 ret = 0;
8807                                 goto out;
8808                         }
8809                 }
8810         }
8811
8812         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8813
8814         /*
8815          * Block group items come before extent items if they have the same
8816          * bytenr, so walk back one more just in case.  Dear future traveller,
8817          * first congrats on mastering time travel.  Now if it's not too much
8818          * trouble could you go back to 2006 and tell Chris to make the
8819          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8820          * EXTENT_ITEM_KEY please?
8821          */
8822         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8823                 if (path.slots[0] > 0) {
8824                         path.slots[0]--;
8825                 } else {
8826                         ret = btrfs_prev_leaf(root, &path);
8827                         if (ret < 0) {
8828                                 goto out;
8829                         } else if (ret > 0) {
8830                                 ret = 0;
8831                                 goto out;
8832                         }
8833                 }
8834                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8835         }
8836
8837         while (num_bytes) {
8838                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8839                         ret = btrfs_next_leaf(root, &path);
8840                         if (ret < 0) {
8841                                 fprintf(stderr, "Error going to next leaf "
8842                                         "%d\n", ret);
8843                                 btrfs_release_path(&path);
8844                                 return ret;
8845                         } else if (ret) {
8846                                 break;
8847                         }
8848                 }
8849                 leaf = path.nodes[0];
8850                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8851                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8852                         path.slots[0]++;
8853                         continue;
8854                 }
8855                 if (key.objectid + key.offset < bytenr) {
8856                         path.slots[0]++;
8857                         continue;
8858                 }
8859                 if (key.objectid > bytenr + num_bytes)
8860                         break;
8861
8862                 if (key.objectid == bytenr) {
8863                         if (key.offset >= num_bytes) {
8864                                 num_bytes = 0;
8865                                 break;
8866                         }
8867                         num_bytes -= key.offset;
8868                         bytenr += key.offset;
8869                 } else if (key.objectid < bytenr) {
8870                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8871                                 num_bytes = 0;
8872                                 break;
8873                         }
8874                         num_bytes = (bytenr + num_bytes) -
8875                                 (key.objectid + key.offset);
8876                         bytenr = key.objectid + key.offset;
8877                 } else {
8878                         if (key.objectid + key.offset < bytenr + num_bytes) {
8879                                 u64 new_start = key.objectid + key.offset;
8880                                 u64 new_bytes = bytenr + num_bytes - new_start;
8881
8882                                 /*
8883                                  * Weird case, the extent is in the middle of
8884                                  * our range, we'll have to search one side
8885                                  * and then the other.  Not sure if this happens
8886                                  * in real life, but no harm in coding it up
8887                                  * anyway just in case.
8888                                  */
8889                                 btrfs_release_path(&path);
8890                                 ret = check_extent_exists(root, new_start,
8891                                                           new_bytes);
8892                                 if (ret) {
8893                                         fprintf(stderr, "Right section didn't "
8894                                                 "have a record\n");
8895                                         break;
8896                                 }
8897                                 num_bytes = key.objectid - bytenr;
8898                                 goto again;
8899                         }
8900                         num_bytes = key.objectid - bytenr;
8901                 }
8902                 path.slots[0]++;
8903         }
8904         ret = 0;
8905
8906 out:
8907         if (num_bytes && !ret) {
8908                 fprintf(stderr, "There are no extents for csum range "
8909                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8910                 ret = 1;
8911         }
8912
8913         btrfs_release_path(&path);
8914         return ret;
8915 }
8916
8917 static int check_csums(struct btrfs_root *root)
8918 {
8919         struct btrfs_path path;
8920         struct extent_buffer *leaf;
8921         struct btrfs_key key;
8922         u64 offset = 0, num_bytes = 0;
8923         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8924         int errors = 0;
8925         int ret;
8926         u64 data_len;
8927         unsigned long leaf_offset;
8928
8929         root = root->fs_info->csum_root;
8930         if (!extent_buffer_uptodate(root->node)) {
8931                 fprintf(stderr, "No valid csum tree found\n");
8932                 return -ENOENT;
8933         }
8934
8935         btrfs_init_path(&path);
8936         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8937         key.type = BTRFS_EXTENT_CSUM_KEY;
8938         key.offset = 0;
8939         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8940         if (ret < 0) {
8941                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8942                 btrfs_release_path(&path);
8943                 return ret;
8944         }
8945
8946         if (ret > 0 && path.slots[0])
8947                 path.slots[0]--;
8948         ret = 0;
8949
8950         while (1) {
8951                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8952                         ret = btrfs_next_leaf(root, &path);
8953                         if (ret < 0) {
8954                                 fprintf(stderr, "Error going to next leaf "
8955                                         "%d\n", ret);
8956                                 break;
8957                         }
8958                         if (ret)
8959                                 break;
8960                 }
8961                 leaf = path.nodes[0];
8962
8963                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8964                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8965                         path.slots[0]++;
8966                         continue;
8967                 }
8968
8969                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8970                               csum_size) * root->fs_info->sectorsize;
8971                 if (!check_data_csum)
8972                         goto skip_csum_check;
8973                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8974                 ret = check_extent_csums(root, key.offset, data_len,
8975                                          leaf_offset, leaf);
8976                 if (ret)
8977                         break;
8978 skip_csum_check:
8979                 if (!num_bytes) {
8980                         offset = key.offset;
8981                 } else if (key.offset != offset + num_bytes) {
8982                         ret = check_extent_exists(root, offset, num_bytes);
8983                         if (ret) {
8984                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8985                                         "there is no extent record\n",
8986                                         offset, offset+num_bytes);
8987                                 errors++;
8988                         }
8989                         offset = key.offset;
8990                         num_bytes = 0;
8991                 }
8992                 num_bytes += data_len;
8993                 path.slots[0]++;
8994         }
8995
8996         btrfs_release_path(&path);
8997         return errors;
8998 }
8999
9000 static int is_dropped_key(struct btrfs_key *key,
9001                           struct btrfs_key *drop_key) {
9002         if (key->objectid < drop_key->objectid)
9003                 return 1;
9004         else if (key->objectid == drop_key->objectid) {
9005                 if (key->type < drop_key->type)
9006                         return 1;
9007                 else if (key->type == drop_key->type) {
9008                         if (key->offset < drop_key->offset)
9009                                 return 1;
9010                 }
9011         }
9012         return 0;
9013 }
9014
9015 /*
9016  * Here are the rules for FULL_BACKREF.
9017  *
9018  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
9019  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
9020  *      FULL_BACKREF set.
9021  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
9022  *    if it happened after the relocation occurred since we'll have dropped the
9023  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
9024  *    have no real way to know for sure.
9025  *
9026  * We process the blocks one root at a time, and we start from the lowest root
9027  * objectid and go to the highest.  So we can just lookup the owner backref for
9028  * the record and if we don't find it then we know it doesn't exist and we have
9029  * a FULL BACKREF.
9030  *
9031  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
9032  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
9033  * be set or not and then we can check later once we've gathered all the refs.
9034  */
9035 static int calc_extent_flag(struct cache_tree *extent_cache,
9036                            struct extent_buffer *buf,
9037                            struct root_item_record *ri,
9038                            u64 *flags)
9039 {
9040         struct extent_record *rec;
9041         struct cache_extent *cache;
9042         struct tree_backref *tback;
9043         u64 owner = 0;
9044
9045         cache = lookup_cache_extent(extent_cache, buf->start, 1);
9046         /* we have added this extent before */
9047         if (!cache)
9048                 return -ENOENT;
9049
9050         rec = container_of(cache, struct extent_record, cache);
9051
9052         /*
9053          * Except file/reloc tree, we can not have
9054          * FULL BACKREF MODE
9055          */
9056         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
9057                 goto normal;
9058         /*
9059          * root node
9060          */
9061         if (buf->start == ri->bytenr)
9062                 goto normal;
9063
9064         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
9065                 goto full_backref;
9066
9067         owner = btrfs_header_owner(buf);
9068         if (owner == ri->objectid)
9069                 goto normal;
9070
9071         tback = find_tree_backref(rec, 0, owner);
9072         if (!tback)
9073                 goto full_backref;
9074 normal:
9075         *flags = 0;
9076         if (rec->flag_block_full_backref != FLAG_UNSET &&
9077             rec->flag_block_full_backref != 0)
9078                 rec->bad_full_backref = 1;
9079         return 0;
9080 full_backref:
9081         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9082         if (rec->flag_block_full_backref != FLAG_UNSET &&
9083             rec->flag_block_full_backref != 1)
9084                 rec->bad_full_backref = 1;
9085         return 0;
9086 }
9087
9088 static void report_mismatch_key_root(u8 key_type, u64 rootid)
9089 {
9090         fprintf(stderr, "Invalid key type(");
9091         print_key_type(stderr, 0, key_type);
9092         fprintf(stderr, ") found in root(");
9093         print_objectid(stderr, rootid, 0);
9094         fprintf(stderr, ")\n");
9095 }
9096
9097 /*
9098  * Check if the key is valid with its extent buffer.
9099  *
9100  * This is a early check in case invalid key exists in a extent buffer
9101  * This is not comprehensive yet, but should prevent wrong key/item passed
9102  * further
9103  */
9104 static int check_type_with_root(u64 rootid, u8 key_type)
9105 {
9106         switch (key_type) {
9107         /* Only valid in chunk tree */
9108         case BTRFS_DEV_ITEM_KEY:
9109         case BTRFS_CHUNK_ITEM_KEY:
9110                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
9111                         goto err;
9112                 break;
9113         /* valid in csum and log tree */
9114         case BTRFS_CSUM_TREE_OBJECTID:
9115                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
9116                       is_fstree(rootid)))
9117                         goto err;
9118                 break;
9119         case BTRFS_EXTENT_ITEM_KEY:
9120         case BTRFS_METADATA_ITEM_KEY:
9121         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9122                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
9123                         goto err;
9124                 break;
9125         case BTRFS_ROOT_ITEM_KEY:
9126                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
9127                         goto err;
9128                 break;
9129         case BTRFS_DEV_EXTENT_KEY:
9130                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
9131                         goto err;
9132                 break;
9133         }
9134         return 0;
9135 err:
9136         report_mismatch_key_root(key_type, rootid);
9137         return -EINVAL;
9138 }
9139
9140 static int run_next_block(struct btrfs_root *root,
9141                           struct block_info *bits,
9142                           int bits_nr,
9143                           u64 *last,
9144                           struct cache_tree *pending,
9145                           struct cache_tree *seen,
9146                           struct cache_tree *reada,
9147                           struct cache_tree *nodes,
9148                           struct cache_tree *extent_cache,
9149                           struct cache_tree *chunk_cache,
9150                           struct rb_root *dev_cache,
9151                           struct block_group_tree *block_group_cache,
9152                           struct device_extent_tree *dev_extent_cache,
9153                           struct root_item_record *ri)
9154 {
9155         struct btrfs_fs_info *fs_info = root->fs_info;
9156         struct extent_buffer *buf;
9157         struct extent_record *rec = NULL;
9158         u64 bytenr;
9159         u32 size;
9160         u64 parent;
9161         u64 owner;
9162         u64 flags;
9163         u64 ptr;
9164         u64 gen = 0;
9165         int ret = 0;
9166         int i;
9167         int nritems;
9168         struct btrfs_key key;
9169         struct cache_extent *cache;
9170         int reada_bits;
9171
9172         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
9173                                     bits_nr, &reada_bits);
9174         if (nritems == 0)
9175                 return 1;
9176
9177         if (!reada_bits) {
9178                 for(i = 0; i < nritems; i++) {
9179                         ret = add_cache_extent(reada, bits[i].start,
9180                                                bits[i].size);
9181                         if (ret == -EEXIST)
9182                                 continue;
9183
9184                         /* fixme, get the parent transid */
9185                         readahead_tree_block(fs_info, bits[i].start, 0);
9186                 }
9187         }
9188         *last = bits[0].start;
9189         bytenr = bits[0].start;
9190         size = bits[0].size;
9191
9192         cache = lookup_cache_extent(pending, bytenr, size);
9193         if (cache) {
9194                 remove_cache_extent(pending, cache);
9195                 free(cache);
9196         }
9197         cache = lookup_cache_extent(reada, bytenr, size);
9198         if (cache) {
9199                 remove_cache_extent(reada, cache);
9200                 free(cache);
9201         }
9202         cache = lookup_cache_extent(nodes, bytenr, size);
9203         if (cache) {
9204                 remove_cache_extent(nodes, cache);
9205                 free(cache);
9206         }
9207         cache = lookup_cache_extent(extent_cache, bytenr, size);
9208         if (cache) {
9209                 rec = container_of(cache, struct extent_record, cache);
9210                 gen = rec->parent_generation;
9211         }
9212
9213         /* fixme, get the real parent transid */
9214         buf = read_tree_block(root->fs_info, bytenr, gen);
9215         if (!extent_buffer_uptodate(buf)) {
9216                 record_bad_block_io(root->fs_info,
9217                                     extent_cache, bytenr, size);
9218                 goto out;
9219         }
9220
9221         nritems = btrfs_header_nritems(buf);
9222
9223         flags = 0;
9224         if (!init_extent_tree) {
9225                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
9226                                        btrfs_header_level(buf), 1, NULL,
9227                                        &flags);
9228                 if (ret < 0) {
9229                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9230                         if (ret < 0) {
9231                                 fprintf(stderr, "Couldn't calc extent flags\n");
9232                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9233                         }
9234                 }
9235         } else {
9236                 flags = 0;
9237                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9238                 if (ret < 0) {
9239                         fprintf(stderr, "Couldn't calc extent flags\n");
9240                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9241                 }
9242         }
9243
9244         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9245                 if (ri != NULL &&
9246                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
9247                     ri->objectid == btrfs_header_owner(buf)) {
9248                         /*
9249                          * Ok we got to this block from it's original owner and
9250                          * we have FULL_BACKREF set.  Relocation can leave
9251                          * converted blocks over so this is altogether possible,
9252                          * however it's not possible if the generation > the
9253                          * last snapshot, so check for this case.
9254                          */
9255                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
9256                             btrfs_header_generation(buf) > ri->last_snapshot) {
9257                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9258                                 rec->bad_full_backref = 1;
9259                         }
9260                 }
9261         } else {
9262                 if (ri != NULL &&
9263                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
9264                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
9265                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9266                         rec->bad_full_backref = 1;
9267                 }
9268         }
9269
9270         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9271                 rec->flag_block_full_backref = 1;
9272                 parent = bytenr;
9273                 owner = 0;
9274         } else {
9275                 rec->flag_block_full_backref = 0;
9276                 parent = 0;
9277                 owner = btrfs_header_owner(buf);
9278         }
9279
9280         ret = check_block(root, extent_cache, buf, flags);
9281         if (ret)
9282                 goto out;
9283
9284         if (btrfs_is_leaf(buf)) {
9285                 btree_space_waste += btrfs_leaf_free_space(root, buf);
9286                 for (i = 0; i < nritems; i++) {
9287                         struct btrfs_file_extent_item *fi;
9288                         btrfs_item_key_to_cpu(buf, &key, i);
9289                         /*
9290                          * Check key type against the leaf owner.
9291                          * Could filter quite a lot of early error if
9292                          * owner is correct
9293                          */
9294                         if (check_type_with_root(btrfs_header_owner(buf),
9295                                                  key.type)) {
9296                                 fprintf(stderr, "ignoring invalid key\n");
9297                                 continue;
9298                         }
9299                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
9300                                 process_extent_item(root, extent_cache, buf,
9301                                                     i);
9302                                 continue;
9303                         }
9304                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9305                                 process_extent_item(root, extent_cache, buf,
9306                                                     i);
9307                                 continue;
9308                         }
9309                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
9310                                 total_csum_bytes +=
9311                                         btrfs_item_size_nr(buf, i);
9312                                 continue;
9313                         }
9314                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
9315                                 process_chunk_item(chunk_cache, &key, buf, i);
9316                                 continue;
9317                         }
9318                         if (key.type == BTRFS_DEV_ITEM_KEY) {
9319                                 process_device_item(dev_cache, &key, buf, i);
9320                                 continue;
9321                         }
9322                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9323                                 process_block_group_item(block_group_cache,
9324                                         &key, buf, i);
9325                                 continue;
9326                         }
9327                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
9328                                 process_device_extent_item(dev_extent_cache,
9329                                         &key, buf, i);
9330                                 continue;
9331
9332                         }
9333                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9334 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9335                                 process_extent_ref_v0(extent_cache, buf, i);
9336 #else
9337                                 BUG();
9338 #endif
9339                                 continue;
9340                         }
9341
9342                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9343                                 ret = add_tree_backref(extent_cache,
9344                                                 key.objectid, 0, key.offset, 0);
9345                                 if (ret < 0)
9346                                         error(
9347                                 "add_tree_backref failed (leaf tree block): %s",
9348                                               strerror(-ret));
9349                                 continue;
9350                         }
9351                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9352                                 ret = add_tree_backref(extent_cache,
9353                                                 key.objectid, key.offset, 0, 0);
9354                                 if (ret < 0)
9355                                         error(
9356                                 "add_tree_backref failed (leaf shared block): %s",
9357                                               strerror(-ret));
9358                                 continue;
9359                         }
9360                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9361                                 struct btrfs_extent_data_ref *ref;
9362                                 ref = btrfs_item_ptr(buf, i,
9363                                                 struct btrfs_extent_data_ref);
9364                                 add_data_backref(extent_cache,
9365                                         key.objectid, 0,
9366                                         btrfs_extent_data_ref_root(buf, ref),
9367                                         btrfs_extent_data_ref_objectid(buf,
9368                                                                        ref),
9369                                         btrfs_extent_data_ref_offset(buf, ref),
9370                                         btrfs_extent_data_ref_count(buf, ref),
9371                                         0, root->fs_info->sectorsize);
9372                                 continue;
9373                         }
9374                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9375                                 struct btrfs_shared_data_ref *ref;
9376                                 ref = btrfs_item_ptr(buf, i,
9377                                                 struct btrfs_shared_data_ref);
9378                                 add_data_backref(extent_cache,
9379                                         key.objectid, key.offset, 0, 0, 0,
9380                                         btrfs_shared_data_ref_count(buf, ref),
9381                                         0, root->fs_info->sectorsize);
9382                                 continue;
9383                         }
9384                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9385                                 struct bad_item *bad;
9386
9387                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9388                                         continue;
9389                                 if (!owner)
9390                                         continue;
9391                                 bad = malloc(sizeof(struct bad_item));
9392                                 if (!bad)
9393                                         continue;
9394                                 INIT_LIST_HEAD(&bad->list);
9395                                 memcpy(&bad->key, &key,
9396                                        sizeof(struct btrfs_key));
9397                                 bad->root_id = owner;
9398                                 list_add_tail(&bad->list, &delete_items);
9399                                 continue;
9400                         }
9401                         if (key.type != BTRFS_EXTENT_DATA_KEY)
9402                                 continue;
9403                         fi = btrfs_item_ptr(buf, i,
9404                                             struct btrfs_file_extent_item);
9405                         if (btrfs_file_extent_type(buf, fi) ==
9406                             BTRFS_FILE_EXTENT_INLINE)
9407                                 continue;
9408                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9409                                 continue;
9410
9411                         data_bytes_allocated +=
9412                                 btrfs_file_extent_disk_num_bytes(buf, fi);
9413                         if (data_bytes_allocated < root->fs_info->sectorsize) {
9414                                 abort();
9415                         }
9416                         data_bytes_referenced +=
9417                                 btrfs_file_extent_num_bytes(buf, fi);
9418                         add_data_backref(extent_cache,
9419                                 btrfs_file_extent_disk_bytenr(buf, fi),
9420                                 parent, owner, key.objectid, key.offset -
9421                                 btrfs_file_extent_offset(buf, fi), 1, 1,
9422                                 btrfs_file_extent_disk_num_bytes(buf, fi));
9423                 }
9424         } else {
9425                 int level;
9426                 struct btrfs_key first_key;
9427
9428                 first_key.objectid = 0;
9429
9430                 if (nritems > 0)
9431                         btrfs_item_key_to_cpu(buf, &first_key, 0);
9432                 level = btrfs_header_level(buf);
9433                 for (i = 0; i < nritems; i++) {
9434                         struct extent_record tmpl;
9435
9436                         ptr = btrfs_node_blockptr(buf, i);
9437                         size = root->fs_info->nodesize;
9438                         btrfs_node_key_to_cpu(buf, &key, i);
9439                         if (ri != NULL) {
9440                                 if ((level == ri->drop_level)
9441                                     && is_dropped_key(&key, &ri->drop_key)) {
9442                                         continue;
9443                                 }
9444                         }
9445
9446                         memset(&tmpl, 0, sizeof(tmpl));
9447                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9448                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9449                         tmpl.start = ptr;
9450                         tmpl.nr = size;
9451                         tmpl.refs = 1;
9452                         tmpl.metadata = 1;
9453                         tmpl.max_size = size;
9454                         ret = add_extent_rec(extent_cache, &tmpl);
9455                         if (ret < 0)
9456                                 goto out;
9457
9458                         ret = add_tree_backref(extent_cache, ptr, parent,
9459                                         owner, 1);
9460                         if (ret < 0) {
9461                                 error(
9462                                 "add_tree_backref failed (non-leaf block): %s",
9463                                       strerror(-ret));
9464                                 continue;
9465                         }
9466
9467                         if (level > 1) {
9468                                 add_pending(nodes, seen, ptr, size);
9469                         } else {
9470                                 add_pending(pending, seen, ptr, size);
9471                         }
9472                 }
9473                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
9474                                       nritems) * sizeof(struct btrfs_key_ptr);
9475         }
9476         total_btree_bytes += buf->len;
9477         if (fs_root_objectid(btrfs_header_owner(buf)))
9478                 total_fs_tree_bytes += buf->len;
9479         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9480                 total_extent_tree_bytes += buf->len;
9481 out:
9482         free_extent_buffer(buf);
9483         return ret;
9484 }
9485
9486 static int add_root_to_pending(struct extent_buffer *buf,
9487                                struct cache_tree *extent_cache,
9488                                struct cache_tree *pending,
9489                                struct cache_tree *seen,
9490                                struct cache_tree *nodes,
9491                                u64 objectid)
9492 {
9493         struct extent_record tmpl;
9494         int ret;
9495
9496         if (btrfs_header_level(buf) > 0)
9497                 add_pending(nodes, seen, buf->start, buf->len);
9498         else
9499                 add_pending(pending, seen, buf->start, buf->len);
9500
9501         memset(&tmpl, 0, sizeof(tmpl));
9502         tmpl.start = buf->start;
9503         tmpl.nr = buf->len;
9504         tmpl.is_root = 1;
9505         tmpl.refs = 1;
9506         tmpl.metadata = 1;
9507         tmpl.max_size = buf->len;
9508         add_extent_rec(extent_cache, &tmpl);
9509
9510         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9511             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9512                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9513                                 0, 1);
9514         else
9515                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9516                                 1);
9517         return ret;
9518 }
9519
9520 /* as we fix the tree, we might be deleting blocks that
9521  * we're tracking for repair.  This hook makes sure we
9522  * remove any backrefs for blocks as we are fixing them.
9523  */
9524 static int free_extent_hook(struct btrfs_trans_handle *trans,
9525                             struct btrfs_root *root,
9526                             u64 bytenr, u64 num_bytes, u64 parent,
9527                             u64 root_objectid, u64 owner, u64 offset,
9528                             int refs_to_drop)
9529 {
9530         struct extent_record *rec;
9531         struct cache_extent *cache;
9532         int is_data;
9533         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9534
9535         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9536         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9537         if (!cache)
9538                 return 0;
9539
9540         rec = container_of(cache, struct extent_record, cache);
9541         if (is_data) {
9542                 struct data_backref *back;
9543                 back = find_data_backref(rec, parent, root_objectid, owner,
9544                                          offset, 1, bytenr, num_bytes);
9545                 if (!back)
9546                         goto out;
9547                 if (back->node.found_ref) {
9548                         back->found_ref -= refs_to_drop;
9549                         if (rec->refs)
9550                                 rec->refs -= refs_to_drop;
9551                 }
9552                 if (back->node.found_extent_tree) {
9553                         back->num_refs -= refs_to_drop;
9554                         if (rec->extent_item_refs)
9555                                 rec->extent_item_refs -= refs_to_drop;
9556                 }
9557                 if (back->found_ref == 0)
9558                         back->node.found_ref = 0;
9559                 if (back->num_refs == 0)
9560                         back->node.found_extent_tree = 0;
9561
9562                 if (!back->node.found_extent_tree && back->node.found_ref) {
9563                         rb_erase(&back->node.node, &rec->backref_tree);
9564                         free(back);
9565                 }
9566         } else {
9567                 struct tree_backref *back;
9568                 back = find_tree_backref(rec, parent, root_objectid);
9569                 if (!back)
9570                         goto out;
9571                 if (back->node.found_ref) {
9572                         if (rec->refs)
9573                                 rec->refs--;
9574                         back->node.found_ref = 0;
9575                 }
9576                 if (back->node.found_extent_tree) {
9577                         if (rec->extent_item_refs)
9578                                 rec->extent_item_refs--;
9579                         back->node.found_extent_tree = 0;
9580                 }
9581                 if (!back->node.found_extent_tree && back->node.found_ref) {
9582                         rb_erase(&back->node.node, &rec->backref_tree);
9583                         free(back);
9584                 }
9585         }
9586         maybe_free_extent_rec(extent_cache, rec);
9587 out:
9588         return 0;
9589 }
9590
9591 static int delete_extent_records(struct btrfs_trans_handle *trans,
9592                                  struct btrfs_root *root,
9593                                  struct btrfs_path *path,
9594                                  u64 bytenr)
9595 {
9596         struct btrfs_key key;
9597         struct btrfs_key found_key;
9598         struct extent_buffer *leaf;
9599         int ret;
9600         int slot;
9601
9602
9603         key.objectid = bytenr;
9604         key.type = (u8)-1;
9605         key.offset = (u64)-1;
9606
9607         while(1) {
9608                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9609                                         &key, path, 0, 1);
9610                 if (ret < 0)
9611                         break;
9612
9613                 if (ret > 0) {
9614                         ret = 0;
9615                         if (path->slots[0] == 0)
9616                                 break;
9617                         path->slots[0]--;
9618                 }
9619                 ret = 0;
9620
9621                 leaf = path->nodes[0];
9622                 slot = path->slots[0];
9623
9624                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9625                 if (found_key.objectid != bytenr)
9626                         break;
9627
9628                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9629                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
9630                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9631                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9632                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9633                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9634                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9635                         btrfs_release_path(path);
9636                         if (found_key.type == 0) {
9637                                 if (found_key.offset == 0)
9638                                         break;
9639                                 key.offset = found_key.offset - 1;
9640                                 key.type = found_key.type;
9641                         }
9642                         key.type = found_key.type - 1;
9643                         key.offset = (u64)-1;
9644                         continue;
9645                 }
9646
9647                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9648                         found_key.objectid, found_key.type, found_key.offset);
9649
9650                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9651                 if (ret)
9652                         break;
9653                 btrfs_release_path(path);
9654
9655                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9656                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
9657                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9658                                 found_key.offset : root->fs_info->nodesize;
9659
9660                         ret = btrfs_update_block_group(trans, root, bytenr,
9661                                                        bytes, 0, 0);
9662                         if (ret)
9663                                 break;
9664                 }
9665         }
9666
9667         btrfs_release_path(path);
9668         return ret;
9669 }
9670
9671 /*
9672  * for a single backref, this will allocate a new extent
9673  * and add the backref to it.
9674  */
9675 static int record_extent(struct btrfs_trans_handle *trans,
9676                          struct btrfs_fs_info *info,
9677                          struct btrfs_path *path,
9678                          struct extent_record *rec,
9679                          struct extent_backref *back,
9680                          int allocated, u64 flags)
9681 {
9682         int ret = 0;
9683         struct btrfs_root *extent_root = info->extent_root;
9684         struct extent_buffer *leaf;
9685         struct btrfs_key ins_key;
9686         struct btrfs_extent_item *ei;
9687         struct data_backref *dback;
9688         struct btrfs_tree_block_info *bi;
9689
9690         if (!back->is_data)
9691                 rec->max_size = max_t(u64, rec->max_size,
9692                                     info->nodesize);
9693
9694         if (!allocated) {
9695                 u32 item_size = sizeof(*ei);
9696
9697                 if (!back->is_data)
9698                         item_size += sizeof(*bi);
9699
9700                 ins_key.objectid = rec->start;
9701                 ins_key.offset = rec->max_size;
9702                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9703
9704                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9705                                         &ins_key, item_size);
9706                 if (ret)
9707                         goto fail;
9708
9709                 leaf = path->nodes[0];
9710                 ei = btrfs_item_ptr(leaf, path->slots[0],
9711                                     struct btrfs_extent_item);
9712
9713                 btrfs_set_extent_refs(leaf, ei, 0);
9714                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9715
9716                 if (back->is_data) {
9717                         btrfs_set_extent_flags(leaf, ei,
9718                                                BTRFS_EXTENT_FLAG_DATA);
9719                 } else {
9720                         struct btrfs_disk_key copy_key;;
9721
9722                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9723                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9724                                              sizeof(*bi));
9725
9726                         btrfs_set_disk_key_objectid(&copy_key,
9727                                                     rec->info_objectid);
9728                         btrfs_set_disk_key_type(&copy_key, 0);
9729                         btrfs_set_disk_key_offset(&copy_key, 0);
9730
9731                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9732                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9733
9734                         btrfs_set_extent_flags(leaf, ei,
9735                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9736                 }
9737
9738                 btrfs_mark_buffer_dirty(leaf);
9739                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9740                                                rec->max_size, 1, 0);
9741                 if (ret)
9742                         goto fail;
9743                 btrfs_release_path(path);
9744         }
9745
9746         if (back->is_data) {
9747                 u64 parent;
9748                 int i;
9749
9750                 dback = to_data_backref(back);
9751                 if (back->full_backref)
9752                         parent = dback->parent;
9753                 else
9754                         parent = 0;
9755
9756                 for (i = 0; i < dback->found_ref; i++) {
9757                         /* if parent != 0, we're doing a full backref
9758                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9759                          * just makes the backref allocator create a data
9760                          * backref
9761                          */
9762                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9763                                                    rec->start, rec->max_size,
9764                                                    parent,
9765                                                    dback->root,
9766                                                    parent ?
9767                                                    BTRFS_FIRST_FREE_OBJECTID :
9768                                                    dback->owner,
9769                                                    dback->offset);
9770                         if (ret)
9771                                 break;
9772                 }
9773                 fprintf(stderr, "adding new data backref"
9774                                 " on %llu %s %llu owner %llu"
9775                                 " offset %llu found %d\n",
9776                                 (unsigned long long)rec->start,
9777                                 back->full_backref ?
9778                                 "parent" : "root",
9779                                 back->full_backref ?
9780                                 (unsigned long long)parent :
9781                                 (unsigned long long)dback->root,
9782                                 (unsigned long long)dback->owner,
9783                                 (unsigned long long)dback->offset,
9784                                 dback->found_ref);
9785         } else {
9786                 u64 parent;
9787                 struct tree_backref *tback;
9788
9789                 tback = to_tree_backref(back);
9790                 if (back->full_backref)
9791                         parent = tback->parent;
9792                 else
9793                         parent = 0;
9794
9795                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9796                                            rec->start, rec->max_size,
9797                                            parent, tback->root, 0, 0);
9798                 fprintf(stderr, "adding new tree backref on "
9799                         "start %llu len %llu parent %llu root %llu\n",
9800                         rec->start, rec->max_size, parent, tback->root);
9801         }
9802 fail:
9803         btrfs_release_path(path);
9804         return ret;
9805 }
9806
9807 static struct extent_entry *find_entry(struct list_head *entries,
9808                                        u64 bytenr, u64 bytes)
9809 {
9810         struct extent_entry *entry = NULL;
9811
9812         list_for_each_entry(entry, entries, list) {
9813                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9814                         return entry;
9815         }
9816
9817         return NULL;
9818 }
9819
9820 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9821 {
9822         struct extent_entry *entry, *best = NULL, *prev = NULL;
9823
9824         list_for_each_entry(entry, entries, list) {
9825                 /*
9826                  * If there are as many broken entries as entries then we know
9827                  * not to trust this particular entry.
9828                  */
9829                 if (entry->broken == entry->count)
9830                         continue;
9831
9832                 /*
9833                  * Special case, when there are only two entries and 'best' is
9834                  * the first one
9835                  */
9836                 if (!prev) {
9837                         best = entry;
9838                         prev = entry;
9839                         continue;
9840                 }
9841
9842                 /*
9843                  * If our current entry == best then we can't be sure our best
9844                  * is really the best, so we need to keep searching.
9845                  */
9846                 if (best && best->count == entry->count) {
9847                         prev = entry;
9848                         best = NULL;
9849                         continue;
9850                 }
9851
9852                 /* Prev == entry, not good enough, have to keep searching */
9853                 if (!prev->broken && prev->count == entry->count)
9854                         continue;
9855
9856                 if (!best)
9857                         best = (prev->count > entry->count) ? prev : entry;
9858                 else if (best->count < entry->count)
9859                         best = entry;
9860                 prev = entry;
9861         }
9862
9863         return best;
9864 }
9865
9866 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9867                       struct data_backref *dback, struct extent_entry *entry)
9868 {
9869         struct btrfs_trans_handle *trans;
9870         struct btrfs_root *root;
9871         struct btrfs_file_extent_item *fi;
9872         struct extent_buffer *leaf;
9873         struct btrfs_key key;
9874         u64 bytenr, bytes;
9875         int ret, err;
9876
9877         key.objectid = dback->root;
9878         key.type = BTRFS_ROOT_ITEM_KEY;
9879         key.offset = (u64)-1;
9880         root = btrfs_read_fs_root(info, &key);
9881         if (IS_ERR(root)) {
9882                 fprintf(stderr, "Couldn't find root for our ref\n");
9883                 return -EINVAL;
9884         }
9885
9886         /*
9887          * The backref points to the original offset of the extent if it was
9888          * split, so we need to search down to the offset we have and then walk
9889          * forward until we find the backref we're looking for.
9890          */
9891         key.objectid = dback->owner;
9892         key.type = BTRFS_EXTENT_DATA_KEY;
9893         key.offset = dback->offset;
9894         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9895         if (ret < 0) {
9896                 fprintf(stderr, "Error looking up ref %d\n", ret);
9897                 return ret;
9898         }
9899
9900         while (1) {
9901                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9902                         ret = btrfs_next_leaf(root, path);
9903                         if (ret) {
9904                                 fprintf(stderr, "Couldn't find our ref, next\n");
9905                                 return -EINVAL;
9906                         }
9907                 }
9908                 leaf = path->nodes[0];
9909                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9910                 if (key.objectid != dback->owner ||
9911                     key.type != BTRFS_EXTENT_DATA_KEY) {
9912                         fprintf(stderr, "Couldn't find our ref, search\n");
9913                         return -EINVAL;
9914                 }
9915                 fi = btrfs_item_ptr(leaf, path->slots[0],
9916                                     struct btrfs_file_extent_item);
9917                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9918                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9919
9920                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9921                         break;
9922                 path->slots[0]++;
9923         }
9924
9925         btrfs_release_path(path);
9926
9927         trans = btrfs_start_transaction(root, 1);
9928         if (IS_ERR(trans))
9929                 return PTR_ERR(trans);
9930
9931         /*
9932          * Ok we have the key of the file extent we want to fix, now we can cow
9933          * down to the thing and fix it.
9934          */
9935         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9936         if (ret < 0) {
9937                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9938                         key.objectid, key.type, key.offset, ret);
9939                 goto out;
9940         }
9941         if (ret > 0) {
9942                 fprintf(stderr, "Well that's odd, we just found this key "
9943                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9944                         key.offset);
9945                 ret = -EINVAL;
9946                 goto out;
9947         }
9948         leaf = path->nodes[0];
9949         fi = btrfs_item_ptr(leaf, path->slots[0],
9950                             struct btrfs_file_extent_item);
9951
9952         if (btrfs_file_extent_compression(leaf, fi) &&
9953             dback->disk_bytenr != entry->bytenr) {
9954                 fprintf(stderr, "Ref doesn't match the record start and is "
9955                         "compressed, please take a btrfs-image of this file "
9956                         "system and send it to a btrfs developer so they can "
9957                         "complete this functionality for bytenr %Lu\n",
9958                         dback->disk_bytenr);
9959                 ret = -EINVAL;
9960                 goto out;
9961         }
9962
9963         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9964                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9965         } else if (dback->disk_bytenr > entry->bytenr) {
9966                 u64 off_diff, offset;
9967
9968                 off_diff = dback->disk_bytenr - entry->bytenr;
9969                 offset = btrfs_file_extent_offset(leaf, fi);
9970                 if (dback->disk_bytenr + offset +
9971                     btrfs_file_extent_num_bytes(leaf, fi) >
9972                     entry->bytenr + entry->bytes) {
9973                         fprintf(stderr, "Ref is past the entry end, please "
9974                                 "take a btrfs-image of this file system and "
9975                                 "send it to a btrfs developer, ref %Lu\n",
9976                                 dback->disk_bytenr);
9977                         ret = -EINVAL;
9978                         goto out;
9979                 }
9980                 offset += off_diff;
9981                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9982                 btrfs_set_file_extent_offset(leaf, fi, offset);
9983         } else if (dback->disk_bytenr < entry->bytenr) {
9984                 u64 offset;
9985
9986                 offset = btrfs_file_extent_offset(leaf, fi);
9987                 if (dback->disk_bytenr + offset < entry->bytenr) {
9988                         fprintf(stderr, "Ref is before the entry start, please"
9989                                 " take a btrfs-image of this file system and "
9990                                 "send it to a btrfs developer, ref %Lu\n",
9991                                 dback->disk_bytenr);
9992                         ret = -EINVAL;
9993                         goto out;
9994                 }
9995
9996                 offset += dback->disk_bytenr;
9997                 offset -= entry->bytenr;
9998                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9999                 btrfs_set_file_extent_offset(leaf, fi, offset);
10000         }
10001
10002         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
10003
10004         /*
10005          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
10006          * only do this if we aren't using compression, otherwise it's a
10007          * trickier case.
10008          */
10009         if (!btrfs_file_extent_compression(leaf, fi))
10010                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
10011         else
10012                 printf("ram bytes may be wrong?\n");
10013         btrfs_mark_buffer_dirty(leaf);
10014 out:
10015         err = btrfs_commit_transaction(trans, root);
10016         btrfs_release_path(path);
10017         return ret ? ret : err;
10018 }
10019
10020 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
10021                            struct extent_record *rec)
10022 {
10023         struct extent_backref *back, *tmp;
10024         struct data_backref *dback;
10025         struct extent_entry *entry, *best = NULL;
10026         LIST_HEAD(entries);
10027         int nr_entries = 0;
10028         int broken_entries = 0;
10029         int ret = 0;
10030         short mismatch = 0;
10031
10032         /*
10033          * Metadata is easy and the backrefs should always agree on bytenr and
10034          * size, if not we've got bigger issues.
10035          */
10036         if (rec->metadata)
10037                 return 0;
10038
10039         rbtree_postorder_for_each_entry_safe(back, tmp,
10040                                              &rec->backref_tree, node) {
10041                 if (back->full_backref || !back->is_data)
10042                         continue;
10043
10044                 dback = to_data_backref(back);
10045
10046                 /*
10047                  * We only pay attention to backrefs that we found a real
10048                  * backref for.
10049                  */
10050                 if (dback->found_ref == 0)
10051                         continue;
10052
10053                 /*
10054                  * For now we only catch when the bytes don't match, not the
10055                  * bytenr.  We can easily do this at the same time, but I want
10056                  * to have a fs image to test on before we just add repair
10057                  * functionality willy-nilly so we know we won't screw up the
10058                  * repair.
10059                  */
10060
10061                 entry = find_entry(&entries, dback->disk_bytenr,
10062                                    dback->bytes);
10063                 if (!entry) {
10064                         entry = malloc(sizeof(struct extent_entry));
10065                         if (!entry) {
10066                                 ret = -ENOMEM;
10067                                 goto out;
10068                         }
10069                         memset(entry, 0, sizeof(*entry));
10070                         entry->bytenr = dback->disk_bytenr;
10071                         entry->bytes = dback->bytes;
10072                         list_add_tail(&entry->list, &entries);
10073                         nr_entries++;
10074                 }
10075
10076                 /*
10077                  * If we only have on entry we may think the entries agree when
10078                  * in reality they don't so we have to do some extra checking.
10079                  */
10080                 if (dback->disk_bytenr != rec->start ||
10081                     dback->bytes != rec->nr || back->broken)
10082                         mismatch = 1;
10083
10084                 if (back->broken) {
10085                         entry->broken++;
10086                         broken_entries++;
10087                 }
10088
10089                 entry->count++;
10090         }
10091
10092         /* Yay all the backrefs agree, carry on good sir */
10093         if (nr_entries <= 1 && !mismatch)
10094                 goto out;
10095
10096         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
10097                 "%Lu\n", rec->start);
10098
10099         /*
10100          * First we want to see if the backrefs can agree amongst themselves who
10101          * is right, so figure out which one of the entries has the highest
10102          * count.
10103          */
10104         best = find_most_right_entry(&entries);
10105
10106         /*
10107          * Ok so we may have an even split between what the backrefs think, so
10108          * this is where we use the extent ref to see what it thinks.
10109          */
10110         if (!best) {
10111                 entry = find_entry(&entries, rec->start, rec->nr);
10112                 if (!entry && (!broken_entries || !rec->found_rec)) {
10113                         fprintf(stderr, "Backrefs don't agree with each other "
10114                                 "and extent record doesn't agree with anybody,"
10115                                 " so we can't fix bytenr %Lu bytes %Lu\n",
10116                                 rec->start, rec->nr);
10117                         ret = -EINVAL;
10118                         goto out;
10119                 } else if (!entry) {
10120                         /*
10121                          * Ok our backrefs were broken, we'll assume this is the
10122                          * correct value and add an entry for this range.
10123                          */
10124                         entry = malloc(sizeof(struct extent_entry));
10125                         if (!entry) {
10126                                 ret = -ENOMEM;
10127                                 goto out;
10128                         }
10129                         memset(entry, 0, sizeof(*entry));
10130                         entry->bytenr = rec->start;
10131                         entry->bytes = rec->nr;
10132                         list_add_tail(&entry->list, &entries);
10133                         nr_entries++;
10134                 }
10135                 entry->count++;
10136                 best = find_most_right_entry(&entries);
10137                 if (!best) {
10138                         fprintf(stderr, "Backrefs and extent record evenly "
10139                                 "split on who is right, this is going to "
10140                                 "require user input to fix bytenr %Lu bytes "
10141                                 "%Lu\n", rec->start, rec->nr);
10142                         ret = -EINVAL;
10143                         goto out;
10144                 }
10145         }
10146
10147         /*
10148          * I don't think this can happen currently as we'll abort() if we catch
10149          * this case higher up, but in case somebody removes that we still can't
10150          * deal with it properly here yet, so just bail out of that's the case.
10151          */
10152         if (best->bytenr != rec->start) {
10153                 fprintf(stderr, "Extent start and backref starts don't match, "
10154                         "please use btrfs-image on this file system and send "
10155                         "it to a btrfs developer so they can make fsck fix "
10156                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
10157                         rec->start, rec->nr);
10158                 ret = -EINVAL;
10159                 goto out;
10160         }
10161
10162         /*
10163          * Ok great we all agreed on an extent record, let's go find the real
10164          * references and fix up the ones that don't match.
10165          */
10166         rbtree_postorder_for_each_entry_safe(back, tmp,
10167                                              &rec->backref_tree, node) {
10168                 if (back->full_backref || !back->is_data)
10169                         continue;
10170
10171                 dback = to_data_backref(back);
10172
10173                 /*
10174                  * Still ignoring backrefs that don't have a real ref attached
10175                  * to them.
10176                  */
10177                 if (dback->found_ref == 0)
10178                         continue;
10179
10180                 if (dback->bytes == best->bytes &&
10181                     dback->disk_bytenr == best->bytenr)
10182                         continue;
10183
10184                 ret = repair_ref(info, path, dback, best);
10185                 if (ret)
10186                         goto out;
10187         }
10188
10189         /*
10190          * Ok we messed with the actual refs, which means we need to drop our
10191          * entire cache and go back and rescan.  I know this is a huge pain and
10192          * adds a lot of extra work, but it's the only way to be safe.  Once all
10193          * the backrefs agree we may not need to do anything to the extent
10194          * record itself.
10195          */
10196         ret = -EAGAIN;
10197 out:
10198         while (!list_empty(&entries)) {
10199                 entry = list_entry(entries.next, struct extent_entry, list);
10200                 list_del_init(&entry->list);
10201                 free(entry);
10202         }
10203         return ret;
10204 }
10205
10206 static int process_duplicates(struct cache_tree *extent_cache,
10207                               struct extent_record *rec)
10208 {
10209         struct extent_record *good, *tmp;
10210         struct cache_extent *cache;
10211         int ret;
10212
10213         /*
10214          * If we found a extent record for this extent then return, or if we
10215          * have more than one duplicate we are likely going to need to delete
10216          * something.
10217          */
10218         if (rec->found_rec || rec->num_duplicates > 1)
10219                 return 0;
10220
10221         /* Shouldn't happen but just in case */
10222         BUG_ON(!rec->num_duplicates);
10223
10224         /*
10225          * So this happens if we end up with a backref that doesn't match the
10226          * actual extent entry.  So either the backref is bad or the extent
10227          * entry is bad.  Either way we want to have the extent_record actually
10228          * reflect what we found in the extent_tree, so we need to take the
10229          * duplicate out and use that as the extent_record since the only way we
10230          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
10231          */
10232         remove_cache_extent(extent_cache, &rec->cache);
10233
10234         good = to_extent_record(rec->dups.next);
10235         list_del_init(&good->list);
10236         INIT_LIST_HEAD(&good->backrefs);
10237         INIT_LIST_HEAD(&good->dups);
10238         good->cache.start = good->start;
10239         good->cache.size = good->nr;
10240         good->content_checked = 0;
10241         good->owner_ref_checked = 0;
10242         good->num_duplicates = 0;
10243         good->refs = rec->refs;
10244         list_splice_init(&rec->backrefs, &good->backrefs);
10245         while (1) {
10246                 cache = lookup_cache_extent(extent_cache, good->start,
10247                                             good->nr);
10248                 if (!cache)
10249                         break;
10250                 tmp = container_of(cache, struct extent_record, cache);
10251
10252                 /*
10253                  * If we find another overlapping extent and it's found_rec is
10254                  * set then it's a duplicate and we need to try and delete
10255                  * something.
10256                  */
10257                 if (tmp->found_rec || tmp->num_duplicates > 0) {
10258                         if (list_empty(&good->list))
10259                                 list_add_tail(&good->list,
10260                                               &duplicate_extents);
10261                         good->num_duplicates += tmp->num_duplicates + 1;
10262                         list_splice_init(&tmp->dups, &good->dups);
10263                         list_del_init(&tmp->list);
10264                         list_add_tail(&tmp->list, &good->dups);
10265                         remove_cache_extent(extent_cache, &tmp->cache);
10266                         continue;
10267                 }
10268
10269                 /*
10270                  * Ok we have another non extent item backed extent rec, so lets
10271                  * just add it to this extent and carry on like we did above.
10272                  */
10273                 good->refs += tmp->refs;
10274                 list_splice_init(&tmp->backrefs, &good->backrefs);
10275                 remove_cache_extent(extent_cache, &tmp->cache);
10276                 free(tmp);
10277         }
10278         ret = insert_cache_extent(extent_cache, &good->cache);
10279         BUG_ON(ret);
10280         free(rec);
10281         return good->num_duplicates ? 0 : 1;
10282 }
10283
10284 static int delete_duplicate_records(struct btrfs_root *root,
10285                                     struct extent_record *rec)
10286 {
10287         struct btrfs_trans_handle *trans;
10288         LIST_HEAD(delete_list);
10289         struct btrfs_path path;
10290         struct extent_record *tmp, *good, *n;
10291         int nr_del = 0;
10292         int ret = 0, err;
10293         struct btrfs_key key;
10294
10295         btrfs_init_path(&path);
10296
10297         good = rec;
10298         /* Find the record that covers all of the duplicates. */
10299         list_for_each_entry(tmp, &rec->dups, list) {
10300                 if (good->start < tmp->start)
10301                         continue;
10302                 if (good->nr > tmp->nr)
10303                         continue;
10304
10305                 if (tmp->start + tmp->nr < good->start + good->nr) {
10306                         fprintf(stderr, "Ok we have overlapping extents that "
10307                                 "aren't completely covered by each other, this "
10308                                 "is going to require more careful thought.  "
10309                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
10310                                 tmp->start, tmp->nr, good->start, good->nr);
10311                         abort();
10312                 }
10313                 good = tmp;
10314         }
10315
10316         if (good != rec)
10317                 list_add_tail(&rec->list, &delete_list);
10318
10319         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10320                 if (tmp == good)
10321                         continue;
10322                 list_move_tail(&tmp->list, &delete_list);
10323         }
10324
10325         root = root->fs_info->extent_root;
10326         trans = btrfs_start_transaction(root, 1);
10327         if (IS_ERR(trans)) {
10328                 ret = PTR_ERR(trans);
10329                 goto out;
10330         }
10331
10332         list_for_each_entry(tmp, &delete_list, list) {
10333                 if (tmp->found_rec == 0)
10334                         continue;
10335                 key.objectid = tmp->start;
10336                 key.type = BTRFS_EXTENT_ITEM_KEY;
10337                 key.offset = tmp->nr;
10338
10339                 /* Shouldn't happen but just in case */
10340                 if (tmp->metadata) {
10341                         fprintf(stderr, "Well this shouldn't happen, extent "
10342                                 "record overlaps but is metadata? "
10343                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10344                         abort();
10345                 }
10346
10347                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10348                 if (ret) {
10349                         if (ret > 0)
10350                                 ret = -EINVAL;
10351                         break;
10352                 }
10353                 ret = btrfs_del_item(trans, root, &path);
10354                 if (ret)
10355                         break;
10356                 btrfs_release_path(&path);
10357                 nr_del++;
10358         }
10359         err = btrfs_commit_transaction(trans, root);
10360         if (err && !ret)
10361                 ret = err;
10362 out:
10363         while (!list_empty(&delete_list)) {
10364                 tmp = to_extent_record(delete_list.next);
10365                 list_del_init(&tmp->list);
10366                 if (tmp == rec)
10367                         continue;
10368                 free(tmp);
10369         }
10370
10371         while (!list_empty(&rec->dups)) {
10372                 tmp = to_extent_record(rec->dups.next);
10373                 list_del_init(&tmp->list);
10374                 free(tmp);
10375         }
10376
10377         btrfs_release_path(&path);
10378
10379         if (!ret && !nr_del)
10380                 rec->num_duplicates = 0;
10381
10382         return ret ? ret : nr_del;
10383 }
10384
10385 static int find_possible_backrefs(struct btrfs_fs_info *info,
10386                                   struct btrfs_path *path,
10387                                   struct cache_tree *extent_cache,
10388                                   struct extent_record *rec)
10389 {
10390         struct btrfs_root *root;
10391         struct extent_backref *back, *tmp;
10392         struct data_backref *dback;
10393         struct cache_extent *cache;
10394         struct btrfs_file_extent_item *fi;
10395         struct btrfs_key key;
10396         u64 bytenr, bytes;
10397         int ret;
10398
10399         rbtree_postorder_for_each_entry_safe(back, tmp,
10400                                              &rec->backref_tree, node) {
10401                 /* Don't care about full backrefs (poor unloved backrefs) */
10402                 if (back->full_backref || !back->is_data)
10403                         continue;
10404
10405                 dback = to_data_backref(back);
10406
10407                 /* We found this one, we don't need to do a lookup */
10408                 if (dback->found_ref)
10409                         continue;
10410
10411                 key.objectid = dback->root;
10412                 key.type = BTRFS_ROOT_ITEM_KEY;
10413                 key.offset = (u64)-1;
10414
10415                 root = btrfs_read_fs_root(info, &key);
10416
10417                 /* No root, definitely a bad ref, skip */
10418                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10419                         continue;
10420                 /* Other err, exit */
10421                 if (IS_ERR(root))
10422                         return PTR_ERR(root);
10423
10424                 key.objectid = dback->owner;
10425                 key.type = BTRFS_EXTENT_DATA_KEY;
10426                 key.offset = dback->offset;
10427                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10428                 if (ret) {
10429                         btrfs_release_path(path);
10430                         if (ret < 0)
10431                                 return ret;
10432                         /* Didn't find it, we can carry on */
10433                         ret = 0;
10434                         continue;
10435                 }
10436
10437                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10438                                     struct btrfs_file_extent_item);
10439                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10440                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10441                 btrfs_release_path(path);
10442                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10443                 if (cache) {
10444                         struct extent_record *tmp;
10445                         tmp = container_of(cache, struct extent_record, cache);
10446
10447                         /*
10448                          * If we found an extent record for the bytenr for this
10449                          * particular backref then we can't add it to our
10450                          * current extent record.  We only want to add backrefs
10451                          * that don't have a corresponding extent item in the
10452                          * extent tree since they likely belong to this record
10453                          * and we need to fix it if it doesn't match bytenrs.
10454                          */
10455                         if  (tmp->found_rec)
10456                                 continue;
10457                 }
10458
10459                 dback->found_ref += 1;
10460                 dback->disk_bytenr = bytenr;
10461                 dback->bytes = bytes;
10462
10463                 /*
10464                  * Set this so the verify backref code knows not to trust the
10465                  * values in this backref.
10466                  */
10467                 back->broken = 1;
10468         }
10469
10470         return 0;
10471 }
10472
10473 /*
10474  * Record orphan data ref into corresponding root.
10475  *
10476  * Return 0 if the extent item contains data ref and recorded.
10477  * Return 1 if the extent item contains no useful data ref
10478  *   On that case, it may contains only shared_dataref or metadata backref
10479  *   or the file extent exists(this should be handled by the extent bytenr
10480  *   recovery routine)
10481  * Return <0 if something goes wrong.
10482  */
10483 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10484                                       struct extent_record *rec)
10485 {
10486         struct btrfs_key key;
10487         struct btrfs_root *dest_root;
10488         struct extent_backref *back, *tmp;
10489         struct data_backref *dback;
10490         struct orphan_data_extent *orphan;
10491         struct btrfs_path path;
10492         int recorded_data_ref = 0;
10493         int ret = 0;
10494
10495         if (rec->metadata)
10496                 return 1;
10497         btrfs_init_path(&path);
10498         rbtree_postorder_for_each_entry_safe(back, tmp,
10499                                              &rec->backref_tree, node) {
10500                 if (back->full_backref || !back->is_data ||
10501                     !back->found_extent_tree)
10502                         continue;
10503                 dback = to_data_backref(back);
10504                 if (dback->found_ref)
10505                         continue;
10506                 key.objectid = dback->root;
10507                 key.type = BTRFS_ROOT_ITEM_KEY;
10508                 key.offset = (u64)-1;
10509
10510                 dest_root = btrfs_read_fs_root(fs_info, &key);
10511
10512                 /* For non-exist root we just skip it */
10513                 if (IS_ERR(dest_root) || !dest_root)
10514                         continue;
10515
10516                 key.objectid = dback->owner;
10517                 key.type = BTRFS_EXTENT_DATA_KEY;
10518                 key.offset = dback->offset;
10519
10520                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10521                 btrfs_release_path(&path);
10522                 /*
10523                  * For ret < 0, it's OK since the fs-tree may be corrupted,
10524                  * we need to record it for inode/file extent rebuild.
10525                  * For ret > 0, we record it only for file extent rebuild.
10526                  * For ret == 0, the file extent exists but only bytenr
10527                  * mismatch, let the original bytenr fix routine to handle,
10528                  * don't record it.
10529                  */
10530                 if (ret == 0)
10531                         continue;
10532                 ret = 0;
10533                 orphan = malloc(sizeof(*orphan));
10534                 if (!orphan) {
10535                         ret = -ENOMEM;
10536                         goto out;
10537                 }
10538                 INIT_LIST_HEAD(&orphan->list);
10539                 orphan->root = dback->root;
10540                 orphan->objectid = dback->owner;
10541                 orphan->offset = dback->offset;
10542                 orphan->disk_bytenr = rec->cache.start;
10543                 orphan->disk_len = rec->cache.size;
10544                 list_add(&dest_root->orphan_data_extents, &orphan->list);
10545                 recorded_data_ref = 1;
10546         }
10547 out:
10548         btrfs_release_path(&path);
10549         if (!ret)
10550                 return !recorded_data_ref;
10551         else
10552                 return ret;
10553 }
10554
10555 /*
10556  * when an incorrect extent item is found, this will delete
10557  * all of the existing entries for it and recreate them
10558  * based on what the tree scan found.
10559  */
10560 static int fixup_extent_refs(struct btrfs_fs_info *info,
10561                              struct cache_tree *extent_cache,
10562                              struct extent_record *rec)
10563 {
10564         struct btrfs_trans_handle *trans = NULL;
10565         int ret;
10566         struct btrfs_path path;
10567         struct cache_extent *cache;
10568         struct extent_backref *back, *tmp;
10569         int allocated = 0;
10570         u64 flags = 0;
10571
10572         if (rec->flag_block_full_backref)
10573                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10574
10575         btrfs_init_path(&path);
10576         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10577                 /*
10578                  * Sometimes the backrefs themselves are so broken they don't
10579                  * get attached to any meaningful rec, so first go back and
10580                  * check any of our backrefs that we couldn't find and throw
10581                  * them into the list if we find the backref so that
10582                  * verify_backrefs can figure out what to do.
10583                  */
10584                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10585                 if (ret < 0)
10586                         goto out;
10587         }
10588
10589         /* step one, make sure all of the backrefs agree */
10590         ret = verify_backrefs(info, &path, rec);
10591         if (ret < 0)
10592                 goto out;
10593
10594         trans = btrfs_start_transaction(info->extent_root, 1);
10595         if (IS_ERR(trans)) {
10596                 ret = PTR_ERR(trans);
10597                 goto out;
10598         }
10599
10600         /* step two, delete all the existing records */
10601         ret = delete_extent_records(trans, info->extent_root, &path,
10602                                     rec->start);
10603
10604         if (ret < 0)
10605                 goto out;
10606
10607         /* was this block corrupt?  If so, don't add references to it */
10608         cache = lookup_cache_extent(info->corrupt_blocks,
10609                                     rec->start, rec->max_size);
10610         if (cache) {
10611                 ret = 0;
10612                 goto out;
10613         }
10614
10615         /* step three, recreate all the refs we did find */
10616         rbtree_postorder_for_each_entry_safe(back, tmp,
10617                                              &rec->backref_tree, node) {
10618                 /*
10619                  * if we didn't find any references, don't create a
10620                  * new extent record
10621                  */
10622                 if (!back->found_ref)
10623                         continue;
10624
10625                 rec->bad_full_backref = 0;
10626                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10627                 allocated = 1;
10628
10629                 if (ret)
10630                         goto out;
10631         }
10632 out:
10633         if (trans) {
10634                 int err = btrfs_commit_transaction(trans, info->extent_root);
10635                 if (!ret)
10636                         ret = err;
10637         }
10638
10639         if (!ret)
10640                 fprintf(stderr, "Repaired extent references for %llu\n",
10641                                 (unsigned long long)rec->start);
10642
10643         btrfs_release_path(&path);
10644         return ret;
10645 }
10646
10647 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10648                               struct extent_record *rec)
10649 {
10650         struct btrfs_trans_handle *trans;
10651         struct btrfs_root *root = fs_info->extent_root;
10652         struct btrfs_path path;
10653         struct btrfs_extent_item *ei;
10654         struct btrfs_key key;
10655         u64 flags;
10656         int ret = 0;
10657
10658         key.objectid = rec->start;
10659         if (rec->metadata) {
10660                 key.type = BTRFS_METADATA_ITEM_KEY;
10661                 key.offset = rec->info_level;
10662         } else {
10663                 key.type = BTRFS_EXTENT_ITEM_KEY;
10664                 key.offset = rec->max_size;
10665         }
10666
10667         trans = btrfs_start_transaction(root, 0);
10668         if (IS_ERR(trans))
10669                 return PTR_ERR(trans);
10670
10671         btrfs_init_path(&path);
10672         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10673         if (ret < 0) {
10674                 btrfs_release_path(&path);
10675                 btrfs_commit_transaction(trans, root);
10676                 return ret;
10677         } else if (ret) {
10678                 fprintf(stderr, "Didn't find extent for %llu\n",
10679                         (unsigned long long)rec->start);
10680                 btrfs_release_path(&path);
10681                 btrfs_commit_transaction(trans, root);
10682                 return -ENOENT;
10683         }
10684
10685         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10686                             struct btrfs_extent_item);
10687         flags = btrfs_extent_flags(path.nodes[0], ei);
10688         if (rec->flag_block_full_backref) {
10689                 fprintf(stderr, "setting full backref on %llu\n",
10690                         (unsigned long long)key.objectid);
10691                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10692         } else {
10693                 fprintf(stderr, "clearing full backref on %llu\n",
10694                         (unsigned long long)key.objectid);
10695                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10696         }
10697         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10698         btrfs_mark_buffer_dirty(path.nodes[0]);
10699         btrfs_release_path(&path);
10700         ret = btrfs_commit_transaction(trans, root);
10701         if (!ret)
10702                 fprintf(stderr, "Repaired extent flags for %llu\n",
10703                                 (unsigned long long)rec->start);
10704
10705         return ret;
10706 }
10707
10708 /* right now we only prune from the extent allocation tree */
10709 static int prune_one_block(struct btrfs_trans_handle *trans,
10710                            struct btrfs_fs_info *info,
10711                            struct btrfs_corrupt_block *corrupt)
10712 {
10713         int ret;
10714         struct btrfs_path path;
10715         struct extent_buffer *eb;
10716         u64 found;
10717         int slot;
10718         int nritems;
10719         int level = corrupt->level + 1;
10720
10721         btrfs_init_path(&path);
10722 again:
10723         /* we want to stop at the parent to our busted block */
10724         path.lowest_level = level;
10725
10726         ret = btrfs_search_slot(trans, info->extent_root,
10727                                 &corrupt->key, &path, -1, 1);
10728
10729         if (ret < 0)
10730                 goto out;
10731
10732         eb = path.nodes[level];
10733         if (!eb) {
10734                 ret = -ENOENT;
10735                 goto out;
10736         }
10737
10738         /*
10739          * hopefully the search gave us the block we want to prune,
10740          * lets try that first
10741          */
10742         slot = path.slots[level];
10743         found =  btrfs_node_blockptr(eb, slot);
10744         if (found == corrupt->cache.start)
10745                 goto del_ptr;
10746
10747         nritems = btrfs_header_nritems(eb);
10748
10749         /* the search failed, lets scan this node and hope we find it */
10750         for (slot = 0; slot < nritems; slot++) {
10751                 found =  btrfs_node_blockptr(eb, slot);
10752                 if (found == corrupt->cache.start)
10753                         goto del_ptr;
10754         }
10755         /*
10756          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10757          * to this block
10758          */
10759         if (eb == info->extent_root->node) {
10760                 ret = -ENOENT;
10761                 goto out;
10762         } else {
10763                 level++;
10764                 btrfs_release_path(&path);
10765                 goto again;
10766         }
10767
10768 del_ptr:
10769         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10770         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10771
10772 out:
10773         btrfs_release_path(&path);
10774         return ret;
10775 }
10776
10777 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10778 {
10779         struct btrfs_trans_handle *trans = NULL;
10780         struct cache_extent *cache;
10781         struct btrfs_corrupt_block *corrupt;
10782
10783         while (1) {
10784                 cache = search_cache_extent(info->corrupt_blocks, 0);
10785                 if (!cache)
10786                         break;
10787                 if (!trans) {
10788                         trans = btrfs_start_transaction(info->extent_root, 1);
10789                         if (IS_ERR(trans))
10790                                 return PTR_ERR(trans);
10791                 }
10792                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10793                 prune_one_block(trans, info, corrupt);
10794                 remove_cache_extent(info->corrupt_blocks, cache);
10795         }
10796         if (trans)
10797                 return btrfs_commit_transaction(trans, info->extent_root);
10798         return 0;
10799 }
10800
10801 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10802 {
10803         struct btrfs_block_group_cache *cache;
10804         u64 start, end;
10805         int ret;
10806
10807         while (1) {
10808                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10809                                             &start, &end, EXTENT_DIRTY);
10810                 if (ret)
10811                         break;
10812                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10813         }
10814
10815         start = 0;
10816         while (1) {
10817                 cache = btrfs_lookup_first_block_group(fs_info, start);
10818                 if (!cache)
10819                         break;
10820                 if (cache->cached)
10821                         cache->cached = 0;
10822                 start = cache->key.objectid + cache->key.offset;
10823         }
10824 }
10825
10826 static int check_extent_refs(struct btrfs_root *root,
10827                              struct cache_tree *extent_cache)
10828 {
10829         struct extent_record *rec;
10830         struct cache_extent *cache;
10831         int ret = 0;
10832         int had_dups = 0;
10833         int err = 0;
10834
10835         if (repair) {
10836                 /*
10837                  * if we're doing a repair, we have to make sure
10838                  * we don't allocate from the problem extents.
10839                  * In the worst case, this will be all the
10840                  * extents in the FS
10841                  */
10842                 cache = search_cache_extent(extent_cache, 0);
10843                 while(cache) {
10844                         rec = container_of(cache, struct extent_record, cache);
10845                         set_extent_dirty(root->fs_info->excluded_extents,
10846                                          rec->start,
10847                                          rec->start + rec->max_size - 1);
10848                         cache = next_cache_extent(cache);
10849                 }
10850
10851                 /* pin down all the corrupted blocks too */
10852                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10853                 while(cache) {
10854                         set_extent_dirty(root->fs_info->excluded_extents,
10855                                          cache->start,
10856                                          cache->start + cache->size - 1);
10857                         cache = next_cache_extent(cache);
10858                 }
10859                 prune_corrupt_blocks(root->fs_info);
10860                 reset_cached_block_groups(root->fs_info);
10861         }
10862
10863         reset_cached_block_groups(root->fs_info);
10864
10865         /*
10866          * We need to delete any duplicate entries we find first otherwise we
10867          * could mess up the extent tree when we have backrefs that actually
10868          * belong to a different extent item and not the weird duplicate one.
10869          */
10870         while (repair && !list_empty(&duplicate_extents)) {
10871                 rec = to_extent_record(duplicate_extents.next);
10872                 list_del_init(&rec->list);
10873
10874                 /* Sometimes we can find a backref before we find an actual
10875                  * extent, so we need to process it a little bit to see if there
10876                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10877                  * if this is a backref screwup.  If we need to delete stuff
10878                  * process_duplicates() will return 0, otherwise it will return
10879                  * 1 and we
10880                  */
10881                 if (process_duplicates(extent_cache, rec))
10882                         continue;
10883                 ret = delete_duplicate_records(root, rec);
10884                 if (ret < 0)
10885                         return ret;
10886                 /*
10887                  * delete_duplicate_records will return the number of entries
10888                  * deleted, so if it's greater than 0 then we know we actually
10889                  * did something and we need to remove.
10890                  */
10891                 if (ret)
10892                         had_dups = 1;
10893         }
10894
10895         if (had_dups)
10896                 return -EAGAIN;
10897
10898         while(1) {
10899                 int cur_err = 0;
10900                 int fix = 0;
10901
10902                 cache = search_cache_extent(extent_cache, 0);
10903                 if (!cache)
10904                         break;
10905                 rec = container_of(cache, struct extent_record, cache);
10906                 if (rec->num_duplicates) {
10907                         fprintf(stderr, "extent item %llu has multiple extent "
10908                                 "items\n", (unsigned long long)rec->start);
10909                         cur_err = 1;
10910                 }
10911
10912                 if (rec->refs != rec->extent_item_refs) {
10913                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10914                                 (unsigned long long)rec->start,
10915                                 (unsigned long long)rec->nr);
10916                         fprintf(stderr, "extent item %llu, found %llu\n",
10917                                 (unsigned long long)rec->extent_item_refs,
10918                                 (unsigned long long)rec->refs);
10919                         ret = record_orphan_data_extents(root->fs_info, rec);
10920                         if (ret < 0)
10921                                 goto repair_abort;
10922                         fix = ret;
10923                         cur_err = 1;
10924                 }
10925                 if (all_backpointers_checked(rec, 1)) {
10926                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10927                                 (unsigned long long)rec->start,
10928                                 (unsigned long long)rec->nr);
10929                         fix = 1;
10930                         cur_err = 1;
10931                 }
10932                 if (!rec->owner_ref_checked) {
10933                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10934                                 (unsigned long long)rec->start,
10935                                 (unsigned long long)rec->nr);
10936                         fix = 1;
10937                         cur_err = 1;
10938                 }
10939
10940                 if (repair && fix) {
10941                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10942                         if (ret)
10943                                 goto repair_abort;
10944                 }
10945
10946
10947                 if (rec->bad_full_backref) {
10948                         fprintf(stderr, "bad full backref, on [%llu]\n",
10949                                 (unsigned long long)rec->start);
10950                         if (repair) {
10951                                 ret = fixup_extent_flags(root->fs_info, rec);
10952                                 if (ret)
10953                                         goto repair_abort;
10954                                 fix = 1;
10955                         }
10956                         cur_err = 1;
10957                 }
10958                 /*
10959                  * Although it's not a extent ref's problem, we reuse this
10960                  * routine for error reporting.
10961                  * No repair function yet.
10962                  */
10963                 if (rec->crossing_stripes) {
10964                         fprintf(stderr,
10965                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10966                                 rec->start, rec->start + rec->max_size);
10967                         cur_err = 1;
10968                 }
10969
10970                 if (rec->wrong_chunk_type) {
10971                         fprintf(stderr,
10972                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10973                                 rec->start, rec->start + rec->max_size);
10974                         cur_err = 1;
10975                 }
10976
10977                 err = cur_err;
10978                 remove_cache_extent(extent_cache, cache);
10979                 free_all_extent_backrefs(rec);
10980                 if (!init_extent_tree && repair && (!cur_err || fix))
10981                         clear_extent_dirty(root->fs_info->excluded_extents,
10982                                            rec->start,
10983                                            rec->start + rec->max_size - 1);
10984                 free(rec);
10985         }
10986 repair_abort:
10987         if (repair) {
10988                 if (ret && ret != -EAGAIN) {
10989                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10990                         exit(1);
10991                 } else if (!ret) {
10992                         struct btrfs_trans_handle *trans;
10993
10994                         root = root->fs_info->extent_root;
10995                         trans = btrfs_start_transaction(root, 1);
10996                         if (IS_ERR(trans)) {
10997                                 ret = PTR_ERR(trans);
10998                                 goto repair_abort;
10999                         }
11000
11001                         ret = btrfs_fix_block_accounting(trans, root);
11002                         if (ret)
11003                                 goto repair_abort;
11004                         ret = btrfs_commit_transaction(trans, root);
11005                         if (ret)
11006                                 goto repair_abort;
11007                 }
11008                 return ret;
11009         }
11010
11011         if (err)
11012                 err = -EIO;
11013         return err;
11014 }
11015
11016 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
11017 {
11018         u64 stripe_size;
11019
11020         if (type & BTRFS_BLOCK_GROUP_RAID0) {
11021                 stripe_size = length;
11022                 stripe_size /= num_stripes;
11023         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
11024                 stripe_size = length * 2;
11025                 stripe_size /= num_stripes;
11026         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
11027                 stripe_size = length;
11028                 stripe_size /= (num_stripes - 1);
11029         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
11030                 stripe_size = length;
11031                 stripe_size /= (num_stripes - 2);
11032         } else {
11033                 stripe_size = length;
11034         }
11035         return stripe_size;
11036 }
11037
11038 /*
11039  * Check the chunk with its block group/dev list ref:
11040  * Return 0 if all refs seems valid.
11041  * Return 1 if part of refs seems valid, need later check for rebuild ref
11042  * like missing block group and needs to search extent tree to rebuild them.
11043  * Return -1 if essential refs are missing and unable to rebuild.
11044  */
11045 static int check_chunk_refs(struct chunk_record *chunk_rec,
11046                             struct block_group_tree *block_group_cache,
11047                             struct device_extent_tree *dev_extent_cache,
11048                             int silent)
11049 {
11050         struct cache_extent *block_group_item;
11051         struct block_group_record *block_group_rec;
11052         struct cache_extent *dev_extent_item;
11053         struct device_extent_record *dev_extent_rec;
11054         u64 devid;
11055         u64 offset;
11056         u64 length;
11057         int metadump_v2 = 0;
11058         int i;
11059         int ret = 0;
11060
11061         block_group_item = lookup_cache_extent(&block_group_cache->tree,
11062                                                chunk_rec->offset,
11063                                                chunk_rec->length);
11064         if (block_group_item) {
11065                 block_group_rec = container_of(block_group_item,
11066                                                struct block_group_record,
11067                                                cache);
11068                 if (chunk_rec->length != block_group_rec->offset ||
11069                     chunk_rec->offset != block_group_rec->objectid ||
11070                     (!metadump_v2 &&
11071                      chunk_rec->type_flags != block_group_rec->flags)) {
11072                         if (!silent)
11073                                 fprintf(stderr,
11074                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
11075                                         chunk_rec->objectid,
11076                                         chunk_rec->type,
11077                                         chunk_rec->offset,
11078                                         chunk_rec->length,
11079                                         chunk_rec->offset,
11080                                         chunk_rec->type_flags,
11081                                         block_group_rec->objectid,
11082                                         block_group_rec->type,
11083                                         block_group_rec->offset,
11084                                         block_group_rec->offset,
11085                                         block_group_rec->objectid,
11086                                         block_group_rec->flags);
11087                         ret = -1;
11088                 } else {
11089                         list_del_init(&block_group_rec->list);
11090                         chunk_rec->bg_rec = block_group_rec;
11091                 }
11092         } else {
11093                 if (!silent)
11094                         fprintf(stderr,
11095                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
11096                                 chunk_rec->objectid,
11097                                 chunk_rec->type,
11098                                 chunk_rec->offset,
11099                                 chunk_rec->length,
11100                                 chunk_rec->offset,
11101                                 chunk_rec->type_flags);
11102                 ret = 1;
11103         }
11104
11105         if (metadump_v2)
11106                 return ret;
11107
11108         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
11109                                     chunk_rec->num_stripes);
11110         for (i = 0; i < chunk_rec->num_stripes; ++i) {
11111                 devid = chunk_rec->stripes[i].devid;
11112                 offset = chunk_rec->stripes[i].offset;
11113                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
11114                                                        devid, offset, length);
11115                 if (dev_extent_item) {
11116                         dev_extent_rec = container_of(dev_extent_item,
11117                                                 struct device_extent_record,
11118                                                 cache);
11119                         if (dev_extent_rec->objectid != devid ||
11120                             dev_extent_rec->offset != offset ||
11121                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
11122                             dev_extent_rec->length != length) {
11123                                 if (!silent)
11124                                         fprintf(stderr,
11125                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
11126                                                 chunk_rec->objectid,
11127                                                 chunk_rec->type,
11128                                                 chunk_rec->offset,
11129                                                 chunk_rec->stripes[i].devid,
11130                                                 chunk_rec->stripes[i].offset,
11131                                                 dev_extent_rec->objectid,
11132                                                 dev_extent_rec->offset,
11133                                                 dev_extent_rec->length);
11134                                 ret = -1;
11135                         } else {
11136                                 list_move(&dev_extent_rec->chunk_list,
11137                                           &chunk_rec->dextents);
11138                         }
11139                 } else {
11140                         if (!silent)
11141                                 fprintf(stderr,
11142                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
11143                                         chunk_rec->objectid,
11144                                         chunk_rec->type,
11145                                         chunk_rec->offset,
11146                                         chunk_rec->stripes[i].devid,
11147                                         chunk_rec->stripes[i].offset);
11148                         ret = -1;
11149                 }
11150         }
11151         return ret;
11152 }
11153
11154 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
11155 int check_chunks(struct cache_tree *chunk_cache,
11156                  struct block_group_tree *block_group_cache,
11157                  struct device_extent_tree *dev_extent_cache,
11158                  struct list_head *good, struct list_head *bad,
11159                  struct list_head *rebuild, int silent)
11160 {
11161         struct cache_extent *chunk_item;
11162         struct chunk_record *chunk_rec;
11163         struct block_group_record *bg_rec;
11164         struct device_extent_record *dext_rec;
11165         int err;
11166         int ret = 0;
11167
11168         chunk_item = first_cache_extent(chunk_cache);
11169         while (chunk_item) {
11170                 chunk_rec = container_of(chunk_item, struct chunk_record,
11171                                          cache);
11172                 err = check_chunk_refs(chunk_rec, block_group_cache,
11173                                        dev_extent_cache, silent);
11174                 if (err < 0)
11175                         ret = err;
11176                 if (err == 0 && good)
11177                         list_add_tail(&chunk_rec->list, good);
11178                 if (err > 0 && rebuild)
11179                         list_add_tail(&chunk_rec->list, rebuild);
11180                 if (err < 0 && bad)
11181                         list_add_tail(&chunk_rec->list, bad);
11182                 chunk_item = next_cache_extent(chunk_item);
11183         }
11184
11185         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
11186                 if (!silent)
11187                         fprintf(stderr,
11188                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
11189                                 bg_rec->objectid,
11190                                 bg_rec->offset,
11191                                 bg_rec->flags);
11192                 if (!ret)
11193                         ret = 1;
11194         }
11195
11196         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
11197                             chunk_list) {
11198                 if (!silent)
11199                         fprintf(stderr,
11200                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
11201                                 dext_rec->objectid,
11202                                 dext_rec->offset,
11203                                 dext_rec->length);
11204                 if (!ret)
11205                         ret = 1;
11206         }
11207         return ret;
11208 }
11209
11210
11211 static int check_device_used(struct device_record *dev_rec,
11212                              struct device_extent_tree *dext_cache)
11213 {
11214         struct cache_extent *cache;
11215         struct device_extent_record *dev_extent_rec;
11216         u64 total_byte = 0;
11217
11218         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
11219         while (cache) {
11220                 dev_extent_rec = container_of(cache,
11221                                               struct device_extent_record,
11222                                               cache);
11223                 if (dev_extent_rec->objectid != dev_rec->devid)
11224                         break;
11225
11226                 list_del_init(&dev_extent_rec->device_list);
11227                 total_byte += dev_extent_rec->length;
11228                 cache = next_cache_extent(cache);
11229         }
11230
11231         if (total_byte != dev_rec->byte_used) {
11232                 fprintf(stderr,
11233                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
11234                         total_byte, dev_rec->byte_used, dev_rec->objectid,
11235                         dev_rec->type, dev_rec->offset);
11236                 return -1;
11237         } else {
11238                 return 0;
11239         }
11240 }
11241
11242 /*
11243  * Extra (optional) check for dev_item size to report possbile problem on a new
11244  * kernel.
11245  */
11246 static void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
11247 {
11248         if (!IS_ALIGNED(total_bytes, sectorsize)) {
11249                 warning(
11250 "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
11251                         devid, total_bytes, sectorsize);
11252                 warning(
11253 "this is OK for older kernel, but may cause kernel warning for newer kernels");
11254                 warning("this can be fixed by 'btrfs rescue fix-device-size'");
11255         }
11256 }
11257
11258 /*
11259  * Unlike device size alignment check above, some super total_bytes check
11260  * failure can lead to mount failure for newer kernel.
11261  *
11262  * So this function will return the error for a fatal super total_bytes problem.
11263  */
11264 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
11265 {
11266         struct btrfs_device *dev;
11267         struct list_head *dev_list = &fs_info->fs_devices->devices;
11268         u64 total_bytes = 0;
11269         u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
11270
11271         list_for_each_entry(dev, dev_list, dev_list)
11272                 total_bytes += dev->total_bytes;
11273
11274         /* Important check, which can cause unmountable fs */
11275         if (super_bytes < total_bytes) {
11276                 error("super total bytes %llu smaller than real device(s) size %llu",
11277                         super_bytes, total_bytes);
11278                 error("mounting this fs may fail for newer kernels");
11279                 error("this can be fixed by 'btrfs rescue fix-device-size'");
11280                 return false;
11281         }
11282
11283         /*
11284          * Optional check, just to make everything aligned and match with each
11285          * other.
11286          *
11287          * For a btrfs-image restored fs, we don't need to check it anyway.
11288          */
11289         if (btrfs_super_flags(fs_info->super_copy) &
11290             (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
11291                 return true;
11292         if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
11293             !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
11294             super_bytes != total_bytes) {
11295                 warning("minor unaligned/mismatch device size detected");
11296                 warning(
11297                 "recommended to use 'btrfs rescue fix-device-size' to fix it");
11298         }
11299         return true;
11300 }
11301
11302 /* check btrfs_dev_item -> btrfs_dev_extent */
11303 static int check_devices(struct rb_root *dev_cache,
11304                          struct device_extent_tree *dev_extent_cache)
11305 {
11306         struct rb_node *dev_node;
11307         struct device_record *dev_rec;
11308         struct device_extent_record *dext_rec;
11309         int err;
11310         int ret = 0;
11311
11312         dev_node = rb_first(dev_cache);
11313         while (dev_node) {
11314                 dev_rec = container_of(dev_node, struct device_record, node);
11315                 err = check_device_used(dev_rec, dev_extent_cache);
11316                 if (err)
11317                         ret = err;
11318
11319                 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
11320                                          global_info->sectorsize);
11321                 dev_node = rb_next(dev_node);
11322         }
11323         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11324                             device_list) {
11325                 fprintf(stderr,
11326                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11327                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
11328                 if (!ret)
11329                         ret = 1;
11330         }
11331         return ret;
11332 }
11333
11334 static int add_root_item_to_list(struct list_head *head,
11335                                   u64 objectid, u64 bytenr, u64 last_snapshot,
11336                                   u8 level, u8 drop_level,
11337                                   struct btrfs_key *drop_key)
11338 {
11339
11340         struct root_item_record *ri_rec;
11341         ri_rec = malloc(sizeof(*ri_rec));
11342         if (!ri_rec)
11343                 return -ENOMEM;
11344         ri_rec->bytenr = bytenr;
11345         ri_rec->objectid = objectid;
11346         ri_rec->level = level;
11347         ri_rec->drop_level = drop_level;
11348         ri_rec->last_snapshot = last_snapshot;
11349         if (drop_key)
11350                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11351         list_add_tail(&ri_rec->list, head);
11352
11353         return 0;
11354 }
11355
11356 static void free_root_item_list(struct list_head *list)
11357 {
11358         struct root_item_record *ri_rec;
11359
11360         while (!list_empty(list)) {
11361                 ri_rec = list_first_entry(list, struct root_item_record,
11362                                           list);
11363                 list_del_init(&ri_rec->list);
11364                 free(ri_rec);
11365         }
11366 }
11367
11368 static int deal_root_from_list(struct list_head *list,
11369                                struct btrfs_root *root,
11370                                struct block_info *bits,
11371                                int bits_nr,
11372                                struct cache_tree *pending,
11373                                struct cache_tree *seen,
11374                                struct cache_tree *reada,
11375                                struct cache_tree *nodes,
11376                                struct cache_tree *extent_cache,
11377                                struct cache_tree *chunk_cache,
11378                                struct rb_root *dev_cache,
11379                                struct block_group_tree *block_group_cache,
11380                                struct device_extent_tree *dev_extent_cache)
11381 {
11382         int ret = 0;
11383         u64 last;
11384
11385         while (!list_empty(list)) {
11386                 struct root_item_record *rec;
11387                 struct extent_buffer *buf;
11388                 rec = list_entry(list->next,
11389                                  struct root_item_record, list);
11390                 last = 0;
11391                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11392                 if (!extent_buffer_uptodate(buf)) {
11393                         free_extent_buffer(buf);
11394                         ret = -EIO;
11395                         break;
11396                 }
11397                 ret = add_root_to_pending(buf, extent_cache, pending,
11398                                     seen, nodes, rec->objectid);
11399                 if (ret < 0)
11400                         break;
11401                 /*
11402                  * To rebuild extent tree, we need deal with snapshot
11403                  * one by one, otherwise we deal with node firstly which
11404                  * can maximize readahead.
11405                  */
11406                 while (1) {
11407                         ret = run_next_block(root, bits, bits_nr, &last,
11408                                              pending, seen, reada, nodes,
11409                                              extent_cache, chunk_cache,
11410                                              dev_cache, block_group_cache,
11411                                              dev_extent_cache, rec);
11412                         if (ret != 0)
11413                                 break;
11414                 }
11415                 free_extent_buffer(buf);
11416                 list_del(&rec->list);
11417                 free(rec);
11418                 if (ret < 0)
11419                         break;
11420         }
11421         while (ret >= 0) {
11422                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11423                                      reada, nodes, extent_cache, chunk_cache,
11424                                      dev_cache, block_group_cache,
11425                                      dev_extent_cache, NULL);
11426                 if (ret != 0) {
11427                         if (ret > 0)
11428                                 ret = 0;
11429                         break;
11430                 }
11431         }
11432         return ret;
11433 }
11434
11435 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11436 {
11437         struct rb_root dev_cache;
11438         struct cache_tree chunk_cache;
11439         struct block_group_tree block_group_cache;
11440         struct device_extent_tree dev_extent_cache;
11441         struct cache_tree extent_cache;
11442         struct cache_tree seen;
11443         struct cache_tree pending;
11444         struct cache_tree reada;
11445         struct cache_tree nodes;
11446         struct extent_io_tree excluded_extents;
11447         struct cache_tree corrupt_blocks;
11448         struct btrfs_path path;
11449         struct btrfs_key key;
11450         struct btrfs_key found_key;
11451         int ret, err = 0;
11452         struct block_info *bits;
11453         int bits_nr;
11454         struct extent_buffer *leaf;
11455         int slot;
11456         struct btrfs_root_item ri;
11457         struct list_head dropping_trees;
11458         struct list_head normal_trees;
11459         struct btrfs_root *root1;
11460         struct btrfs_root *root;
11461         u64 objectid;
11462         u8 level;
11463
11464         root = fs_info->fs_root;
11465         dev_cache = RB_ROOT;
11466         cache_tree_init(&chunk_cache);
11467         block_group_tree_init(&block_group_cache);
11468         device_extent_tree_init(&dev_extent_cache);
11469
11470         cache_tree_init(&extent_cache);
11471         cache_tree_init(&seen);
11472         cache_tree_init(&pending);
11473         cache_tree_init(&nodes);
11474         cache_tree_init(&reada);
11475         cache_tree_init(&corrupt_blocks);
11476         extent_io_tree_init(&excluded_extents);
11477         INIT_LIST_HEAD(&dropping_trees);
11478         INIT_LIST_HEAD(&normal_trees);
11479
11480         if (repair) {
11481                 fs_info->excluded_extents = &excluded_extents;
11482                 fs_info->fsck_extent_cache = &extent_cache;
11483                 fs_info->free_extent_hook = free_extent_hook;
11484                 fs_info->corrupt_blocks = &corrupt_blocks;
11485         }
11486
11487         bits_nr = 1024;
11488         bits = malloc(bits_nr * sizeof(struct block_info));
11489         if (!bits) {
11490                 perror("malloc");
11491                 exit(1);
11492         }
11493
11494         if (ctx.progress_enabled) {
11495                 ctx.tp = TASK_EXTENTS;
11496                 task_start(ctx.info);
11497         }
11498
11499 again:
11500         root1 = fs_info->tree_root;
11501         level = btrfs_header_level(root1->node);
11502         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11503                                     root1->node->start, 0, level, 0, NULL);
11504         if (ret < 0)
11505                 goto out;
11506         root1 = fs_info->chunk_root;
11507         level = btrfs_header_level(root1->node);
11508         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11509                                     root1->node->start, 0, level, 0, NULL);
11510         if (ret < 0)
11511                 goto out;
11512         btrfs_init_path(&path);
11513         key.offset = 0;
11514         key.objectid = 0;
11515         key.type = BTRFS_ROOT_ITEM_KEY;
11516         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11517         if (ret < 0)
11518                 goto out;
11519         while(1) {
11520                 leaf = path.nodes[0];
11521                 slot = path.slots[0];
11522                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11523                         ret = btrfs_next_leaf(root, &path);
11524                         if (ret != 0)
11525                                 break;
11526                         leaf = path.nodes[0];
11527                         slot = path.slots[0];
11528                 }
11529                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11530                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11531                         unsigned long offset;
11532                         u64 last_snapshot;
11533
11534                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11535                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11536                         last_snapshot = btrfs_root_last_snapshot(&ri);
11537                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11538                                 level = btrfs_root_level(&ri);
11539                                 ret = add_root_item_to_list(&normal_trees,
11540                                                 found_key.objectid,
11541                                                 btrfs_root_bytenr(&ri),
11542                                                 last_snapshot, level,
11543                                                 0, NULL);
11544                                 if (ret < 0)
11545                                         goto out;
11546                         } else {
11547                                 level = btrfs_root_level(&ri);
11548                                 objectid = found_key.objectid;
11549                                 btrfs_disk_key_to_cpu(&found_key,
11550                                                       &ri.drop_progress);
11551                                 ret = add_root_item_to_list(&dropping_trees,
11552                                                 objectid,
11553                                                 btrfs_root_bytenr(&ri),
11554                                                 last_snapshot, level,
11555                                                 ri.drop_level, &found_key);
11556                                 if (ret < 0)
11557                                         goto out;
11558                         }
11559                 }
11560                 path.slots[0]++;
11561         }
11562         btrfs_release_path(&path);
11563
11564         /*
11565          * check_block can return -EAGAIN if it fixes something, please keep
11566          * this in mind when dealing with return values from these functions, if
11567          * we get -EAGAIN we want to fall through and restart the loop.
11568          */
11569         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11570                                   &seen, &reada, &nodes, &extent_cache,
11571                                   &chunk_cache, &dev_cache, &block_group_cache,
11572                                   &dev_extent_cache);
11573         if (ret < 0) {
11574                 if (ret == -EAGAIN)
11575                         goto loop;
11576                 goto out;
11577         }
11578         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11579                                   &pending, &seen, &reada, &nodes,
11580                                   &extent_cache, &chunk_cache, &dev_cache,
11581                                   &block_group_cache, &dev_extent_cache);
11582         if (ret < 0) {
11583                 if (ret == -EAGAIN)
11584                         goto loop;
11585                 goto out;
11586         }
11587
11588         ret = check_chunks(&chunk_cache, &block_group_cache,
11589                            &dev_extent_cache, NULL, NULL, NULL, 0);
11590         if (ret) {
11591                 if (ret == -EAGAIN)
11592                         goto loop;
11593                 err = ret;
11594         }
11595
11596         ret = check_extent_refs(root, &extent_cache);
11597         if (ret < 0) {
11598                 if (ret == -EAGAIN)
11599                         goto loop;
11600                 goto out;
11601         }
11602
11603         ret = check_devices(&dev_cache, &dev_extent_cache);
11604         if (ret && err)
11605                 ret = err;
11606
11607 out:
11608         task_stop(ctx.info);
11609         if (repair) {
11610                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11611                 extent_io_tree_cleanup(&excluded_extents);
11612                 fs_info->fsck_extent_cache = NULL;
11613                 fs_info->free_extent_hook = NULL;
11614                 fs_info->corrupt_blocks = NULL;
11615                 fs_info->excluded_extents = NULL;
11616         }
11617         free(bits);
11618         free_chunk_cache_tree(&chunk_cache);
11619         free_device_cache_tree(&dev_cache);
11620         free_block_group_tree(&block_group_cache);
11621         free_device_extent_tree(&dev_extent_cache);
11622         free_extent_cache_tree(&seen);
11623         free_extent_cache_tree(&pending);
11624         free_extent_cache_tree(&reada);
11625         free_extent_cache_tree(&nodes);
11626         free_root_item_list(&normal_trees);
11627         free_root_item_list(&dropping_trees);
11628         return ret;
11629 loop:
11630         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11631         free_extent_cache_tree(&seen);
11632         free_extent_cache_tree(&pending);
11633         free_extent_cache_tree(&reada);
11634         free_extent_cache_tree(&nodes);
11635         free_chunk_cache_tree(&chunk_cache);
11636         free_block_group_tree(&block_group_cache);
11637         free_device_cache_tree(&dev_cache);
11638         free_device_extent_tree(&dev_extent_cache);
11639         free_extent_record_cache(&extent_cache);
11640         free_root_item_list(&normal_trees);
11641         free_root_item_list(&dropping_trees);
11642         extent_io_tree_cleanup(&excluded_extents);
11643         goto again;
11644 }
11645
11646 static int check_extent_inline_ref(struct extent_buffer *eb,
11647                    struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11648 {
11649         int ret;
11650         u8 type = btrfs_extent_inline_ref_type(eb, iref);
11651
11652         switch (type) {
11653         case BTRFS_TREE_BLOCK_REF_KEY:
11654         case BTRFS_EXTENT_DATA_REF_KEY:
11655         case BTRFS_SHARED_BLOCK_REF_KEY:
11656         case BTRFS_SHARED_DATA_REF_KEY:
11657                 ret = 0;
11658                 break;
11659         default:
11660                 error("extent[%llu %u %llu] has unknown ref type: %d",
11661                       key->objectid, key->type, key->offset, type);
11662                 ret = UNKNOWN_TYPE;
11663                 break;
11664         }
11665
11666         return ret;
11667 }
11668
11669 /*
11670  * Check backrefs of a tree block given by @bytenr or @eb.
11671  *
11672  * @root:       the root containing the @bytenr or @eb
11673  * @eb:         tree block extent buffer, can be NULL
11674  * @bytenr:     bytenr of the tree block to search
11675  * @level:      tree level of the tree block
11676  * @owner:      owner of the tree block
11677  *
11678  * Return >0 for any error found and output error message
11679  * Return 0 for no error found
11680  */
11681 static int check_tree_block_ref(struct btrfs_root *root,
11682                                 struct extent_buffer *eb, u64 bytenr,
11683                                 int level, u64 owner, struct node_refs *nrefs)
11684 {
11685         struct btrfs_key key;
11686         struct btrfs_root *extent_root = root->fs_info->extent_root;
11687         struct btrfs_path path;
11688         struct btrfs_extent_item *ei;
11689         struct btrfs_extent_inline_ref *iref;
11690         struct extent_buffer *leaf;
11691         unsigned long end;
11692         unsigned long ptr;
11693         int slot;
11694         int skinny_level;
11695         int root_level = btrfs_header_level(root->node);
11696         int type;
11697         u32 nodesize = root->fs_info->nodesize;
11698         u32 item_size;
11699         u64 offset;
11700         int tree_reloc_root = 0;
11701         int found_ref = 0;
11702         int err = 0;
11703         int ret;
11704         int strict = 1;
11705         int parent = 0;
11706
11707         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
11708             btrfs_header_bytenr(root->node) == bytenr)
11709                 tree_reloc_root = 1;
11710         btrfs_init_path(&path);
11711         key.objectid = bytenr;
11712         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11713                 key.type = BTRFS_METADATA_ITEM_KEY;
11714         else
11715                 key.type = BTRFS_EXTENT_ITEM_KEY;
11716         key.offset = (u64)-1;
11717
11718         /* Search for the backref in extent tree */
11719         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11720         if (ret < 0) {
11721                 err |= BACKREF_MISSING;
11722                 goto out;
11723         }
11724         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11725         if (ret) {
11726                 err |= BACKREF_MISSING;
11727                 goto out;
11728         }
11729
11730         leaf = path.nodes[0];
11731         slot = path.slots[0];
11732         btrfs_item_key_to_cpu(leaf, &key, slot);
11733
11734         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11735
11736         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11737                 skinny_level = (int)key.offset;
11738                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11739         } else {
11740                 struct btrfs_tree_block_info *info;
11741
11742                 info = (struct btrfs_tree_block_info *)(ei + 1);
11743                 skinny_level = btrfs_tree_block_level(leaf, info);
11744                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11745         }
11746
11747
11748         if (eb) {
11749                 u64 header_gen;
11750                 u64 extent_gen;
11751
11752                 /*
11753                  * Due to the feature of shared tree blocks, if the upper node
11754                  * is a fs root or shared node, the extent of checked node may
11755                  * not be updated until the next CoW.
11756                  */
11757                 if (nrefs)
11758                         strict = should_check_extent_strictly(root, nrefs,
11759                                         level);
11760                 if (!(btrfs_extent_flags(leaf, ei) &
11761                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11762                         error(
11763                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11764                                 key.objectid, nodesize,
11765                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11766                         err = BACKREF_MISMATCH;
11767                 }
11768                 header_gen = btrfs_header_generation(eb);
11769                 extent_gen = btrfs_extent_generation(leaf, ei);
11770                 if (header_gen != extent_gen) {
11771                         error(
11772         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11773                                 key.objectid, nodesize, header_gen,
11774                                 extent_gen);
11775                         err = BACKREF_MISMATCH;
11776                 }
11777                 if (level != skinny_level) {
11778                         error(
11779                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11780                                 key.objectid, nodesize, level, skinny_level);
11781                         err = BACKREF_MISMATCH;
11782                 }
11783                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11784                         error(
11785                         "extent[%llu %u] is referred by other roots than %llu",
11786                                 key.objectid, nodesize, root->objectid);
11787                         err = BACKREF_MISMATCH;
11788                 }
11789         }
11790
11791         /*
11792          * Iterate the extent/metadata item to find the exact backref
11793          */
11794         item_size = btrfs_item_size_nr(leaf, slot);
11795         ptr = (unsigned long)iref;
11796         end = (unsigned long)ei + item_size;
11797
11798         while (ptr < end) {
11799                 iref = (struct btrfs_extent_inline_ref *)ptr;
11800                 type = btrfs_extent_inline_ref_type(leaf, iref);
11801                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11802
11803                 ret = check_extent_inline_ref(leaf, &key, iref);
11804                 if (ret) {
11805                         err |= ret;
11806                         break;
11807                 }
11808                 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11809                         if (offset == root->objectid)
11810                                 found_ref = 1;
11811                         if (!strict && owner == offset)
11812                                 found_ref = 1;
11813                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11814                         /*
11815                          * Backref of tree reloc root points to itself, no need
11816                          * to check backref any more.
11817                          */
11818                         if (tree_reloc_root) {
11819                                 found_ref = 1;
11820                         } else {
11821                                 /*
11822                                  * Check if the backref points to valid
11823                                  * referencer
11824                                  */
11825                                 found_ref = !check_tree_block_ref( root, NULL,
11826                                                 offset, level + 1, owner,
11827                                                 NULL);
11828                         }
11829                 }
11830
11831                 if (found_ref)
11832                         break;
11833                 ptr += btrfs_extent_inline_ref_size(type);
11834         }
11835
11836         /*
11837          * Inlined extent item doesn't have what we need, check
11838          * TREE_BLOCK_REF_KEY
11839          */
11840         if (!found_ref) {
11841                 btrfs_release_path(&path);
11842                 key.objectid = bytenr;
11843                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11844                 key.offset = root->objectid;
11845
11846                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11847                 if (!ret)
11848                         found_ref = 1;
11849         }
11850         if (!found_ref)
11851                 err |= BACKREF_MISSING;
11852 out:
11853         btrfs_release_path(&path);
11854         if (nrefs && strict &&
11855             level < root_level && nrefs->full_backref[level + 1])
11856                 parent = nrefs->bytenr[level + 1];
11857         if (eb && (err & BACKREF_MISSING))
11858                 error(
11859         "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11860                       bytenr, nodesize, owner, level,
11861                       parent ? "parent" : "root",
11862                       parent ? parent : root->objectid);
11863         return err;
11864 }
11865
11866 /*
11867  * If @err contains BACKREF_MISSING then add extent of the
11868  * file_extent_data_item.
11869  *
11870  * Returns error bits after reapir.
11871  */
11872 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11873                                    struct btrfs_root *root,
11874                                    struct btrfs_path *pathp,
11875                                    struct node_refs *nrefs,
11876                                    int err)
11877 {
11878         struct btrfs_file_extent_item *fi;
11879         struct btrfs_key fi_key;
11880         struct btrfs_key key;
11881         struct btrfs_extent_item *ei;
11882         struct btrfs_path path;
11883         struct btrfs_root *extent_root = root->fs_info->extent_root;
11884         struct extent_buffer *eb;
11885         u64 size;
11886         u64 disk_bytenr;
11887         u64 num_bytes;
11888         u64 parent;
11889         u64 offset;
11890         u64 extent_offset;
11891         u64 file_offset;
11892         int generation;
11893         int slot;
11894         int ret = 0;
11895
11896         eb = pathp->nodes[0];
11897         slot = pathp->slots[0];
11898         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11899         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11900
11901         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11902             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11903                 return err;
11904
11905         file_offset = fi_key.offset;
11906         generation = btrfs_file_extent_generation(eb, fi);
11907         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11908         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11909         extent_offset = btrfs_file_extent_offset(eb, fi);
11910         offset = file_offset - extent_offset;
11911
11912         /* now repair only adds backref */
11913         if ((err & BACKREF_MISSING) == 0)
11914                 return err;
11915
11916         /* search extent item */
11917         key.objectid = disk_bytenr;
11918         key.type = BTRFS_EXTENT_ITEM_KEY;
11919         key.offset = num_bytes;
11920
11921         btrfs_init_path(&path);
11922         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11923         if (ret < 0) {
11924                 ret = -EIO;
11925                 goto out;
11926         }
11927
11928         /* insert an extent item */
11929         if (ret > 0) {
11930                 key.objectid = disk_bytenr;
11931                 key.type = BTRFS_EXTENT_ITEM_KEY;
11932                 key.offset = num_bytes;
11933                 size = sizeof(*ei);
11934
11935                 btrfs_release_path(&path);
11936                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11937                                               size);
11938                 if (ret)
11939                         goto out;
11940                 eb = path.nodes[0];
11941                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11942
11943                 btrfs_set_extent_refs(eb, ei, 0);
11944                 btrfs_set_extent_generation(eb, ei, generation);
11945                 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11946
11947                 btrfs_mark_buffer_dirty(eb);
11948                 ret = btrfs_update_block_group(trans, extent_root, disk_bytenr,
11949                                                num_bytes, 1, 0);
11950                 btrfs_release_path(&path);
11951         }
11952
11953         if (nrefs->full_backref[0])
11954                 parent = btrfs_header_bytenr(eb);
11955         else
11956                 parent = 0;
11957
11958         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11959                                    root->objectid,
11960                    parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11961                                    offset);
11962         if (ret) {
11963                 error(
11964                 "failed to increase extent data backref[%llu %llu] root %llu",
11965                       disk_bytenr, num_bytes, root->objectid);
11966                 goto out;
11967         } else {
11968                 printf("Add one extent data backref [%llu %llu]\n",
11969                        disk_bytenr, num_bytes);
11970         }
11971
11972         err &= ~BACKREF_MISSING;
11973 out:
11974         if (ret)
11975                 error("can't repair root %llu extent data item[%llu %llu]",
11976                       root->objectid, disk_bytenr, num_bytes);
11977         return err;
11978 }
11979
11980 /*
11981  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11982  *
11983  * Return >0 any error found and output error message
11984  * Return 0 for no error found
11985  */
11986 static int check_extent_data_item(struct btrfs_root *root,
11987                                   struct btrfs_path *pathp,
11988                                   struct node_refs *nrefs,  int account_bytes)
11989 {
11990         struct btrfs_file_extent_item *fi;
11991         struct extent_buffer *eb = pathp->nodes[0];
11992         struct btrfs_path path;
11993         struct btrfs_root *extent_root = root->fs_info->extent_root;
11994         struct btrfs_key fi_key;
11995         struct btrfs_key dbref_key;
11996         struct extent_buffer *leaf;
11997         struct btrfs_extent_item *ei;
11998         struct btrfs_extent_inline_ref *iref;
11999         struct btrfs_extent_data_ref *dref;
12000         u64 owner;
12001         u64 disk_bytenr;
12002         u64 disk_num_bytes;
12003         u64 extent_num_bytes;
12004         u64 extent_flags;
12005         u32 item_size;
12006         unsigned long end;
12007         unsigned long ptr;
12008         int type;
12009         u64 ref_root;
12010         int found_dbackref = 0;
12011         int slot = pathp->slots[0];
12012         int err = 0;
12013         int ret;
12014         int strict;
12015
12016         btrfs_item_key_to_cpu(eb, &fi_key, slot);
12017         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
12018
12019         /* Nothing to check for hole and inline data extents */
12020         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
12021             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
12022                 return 0;
12023
12024         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
12025         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
12026         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
12027
12028         /* Check unaligned disk_num_bytes and num_bytes */
12029         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
12030                 error(
12031 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
12032                         fi_key.objectid, fi_key.offset, disk_num_bytes,
12033                         root->fs_info->sectorsize);
12034                 err |= BYTES_UNALIGNED;
12035         } else if (account_bytes) {
12036                 data_bytes_allocated += disk_num_bytes;
12037         }
12038         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
12039                 error(
12040 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
12041                         fi_key.objectid, fi_key.offset, extent_num_bytes,
12042                         root->fs_info->sectorsize);
12043                 err |= BYTES_UNALIGNED;
12044         } else if (account_bytes) {
12045                 data_bytes_referenced += extent_num_bytes;
12046         }
12047         owner = btrfs_header_owner(eb);
12048
12049         /* Check the extent item of the file extent in extent tree */
12050         btrfs_init_path(&path);
12051         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12052         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
12053         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
12054
12055         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
12056         if (ret)
12057                 goto out;
12058
12059         leaf = path.nodes[0];
12060         slot = path.slots[0];
12061         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12062
12063         extent_flags = btrfs_extent_flags(leaf, ei);
12064
12065         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
12066                 error(
12067                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
12068                     disk_bytenr, disk_num_bytes,
12069                     BTRFS_EXTENT_FLAG_DATA);
12070                 err |= BACKREF_MISMATCH;
12071         }
12072
12073         /* Check data backref inside that extent item */
12074         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
12075         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12076         ptr = (unsigned long)iref;
12077         end = (unsigned long)ei + item_size;
12078         strict = should_check_extent_strictly(root, nrefs, -1);
12079
12080         while (ptr < end) {
12081                 iref = (struct btrfs_extent_inline_ref *)ptr;
12082                 type = btrfs_extent_inline_ref_type(leaf, iref);
12083                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12084
12085                 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
12086                 if (ret) {
12087                         err |= ret;
12088                         break;
12089                 }
12090                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
12091                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
12092                         if (ref_root == root->objectid)
12093                                 found_dbackref = 1;
12094                         else if (!strict && owner == ref_root)
12095                                 found_dbackref = 1;
12096                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
12097                         found_dbackref = !check_tree_block_ref(root, NULL,
12098                                 btrfs_extent_inline_ref_offset(leaf, iref),
12099                                 0, owner, NULL);
12100                 }
12101
12102                 if (found_dbackref)
12103                         break;
12104                 ptr += btrfs_extent_inline_ref_size(type);
12105         }
12106
12107         if (!found_dbackref) {
12108                 btrfs_release_path(&path);
12109
12110                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
12111                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12112                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
12113                 dbref_key.offset = hash_extent_data_ref(root->objectid,
12114                                 fi_key.objectid, fi_key.offset);
12115
12116                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12117                                         &dbref_key, &path, 0, 0);
12118                 if (!ret) {
12119                         found_dbackref = 1;
12120                         goto out;
12121                 }
12122
12123                 btrfs_release_path(&path);
12124
12125                 /*
12126                  * Neither inlined nor EXTENT_DATA_REF found, try
12127                  * SHARED_DATA_REF as last chance.
12128                  */
12129                 dbref_key.objectid = disk_bytenr;
12130                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
12131                 dbref_key.offset = eb->start;
12132
12133                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12134                                         &dbref_key, &path, 0, 0);
12135                 if (!ret) {
12136                         found_dbackref = 1;
12137                         goto out;
12138                 }
12139         }
12140
12141 out:
12142         if (!found_dbackref)
12143                 err |= BACKREF_MISSING;
12144         btrfs_release_path(&path);
12145         if (err & BACKREF_MISSING) {
12146                 error("data extent[%llu %llu] backref lost",
12147                       disk_bytenr, disk_num_bytes);
12148         }
12149         return err;
12150 }
12151
12152 /*
12153  * Get real tree block level for the case like shared block
12154  * Return >= 0 as tree level
12155  * Return <0 for error
12156  */
12157 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
12158 {
12159         struct extent_buffer *eb;
12160         struct btrfs_path path;
12161         struct btrfs_key key;
12162         struct btrfs_extent_item *ei;
12163         u64 flags;
12164         u64 transid;
12165         u8 backref_level;
12166         u8 header_level;
12167         int ret;
12168
12169         /* Search extent tree for extent generation and level */
12170         key.objectid = bytenr;
12171         key.type = BTRFS_METADATA_ITEM_KEY;
12172         key.offset = (u64)-1;
12173
12174         btrfs_init_path(&path);
12175         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
12176         if (ret < 0)
12177                 goto release_out;
12178         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
12179         if (ret < 0)
12180                 goto release_out;
12181         if (ret > 0) {
12182                 ret = -ENOENT;
12183                 goto release_out;
12184         }
12185
12186         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12187         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
12188                             struct btrfs_extent_item);
12189         flags = btrfs_extent_flags(path.nodes[0], ei);
12190         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
12191                 ret = -ENOENT;
12192                 goto release_out;
12193         }
12194
12195         /* Get transid for later read_tree_block() check */
12196         transid = btrfs_extent_generation(path.nodes[0], ei);
12197
12198         /* Get backref level as one source */
12199         if (key.type == BTRFS_METADATA_ITEM_KEY) {
12200                 backref_level = key.offset;
12201         } else {
12202                 struct btrfs_tree_block_info *info;
12203
12204                 info = (struct btrfs_tree_block_info *)(ei + 1);
12205                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
12206         }
12207         btrfs_release_path(&path);
12208
12209         /* Get level from tree block as an alternative source */
12210         eb = read_tree_block(fs_info, bytenr, transid);
12211         if (!extent_buffer_uptodate(eb)) {
12212                 free_extent_buffer(eb);
12213                 return -EIO;
12214         }
12215         header_level = btrfs_header_level(eb);
12216         free_extent_buffer(eb);
12217
12218         if (header_level != backref_level)
12219                 return -EIO;
12220         return header_level;
12221
12222 release_out:
12223         btrfs_release_path(&path);
12224         return ret;
12225 }
12226
12227 /*
12228  * Check if a tree block backref is valid (points to a valid tree block)
12229  * if level == -1, level will be resolved
12230  * Return >0 for any error found and print error message
12231  */
12232 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
12233                                     u64 bytenr, int level)
12234 {
12235         struct btrfs_root *root;
12236         struct btrfs_key key;
12237         struct btrfs_path path;
12238         struct extent_buffer *eb;
12239         struct extent_buffer *node;
12240         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12241         int err = 0;
12242         int ret;
12243
12244         /* Query level for level == -1 special case */
12245         if (level == -1)
12246                 level = query_tree_block_level(fs_info, bytenr);
12247         if (level < 0) {
12248                 err |= REFERENCER_MISSING;
12249                 goto out;
12250         }
12251
12252         key.objectid = root_id;
12253         key.type = BTRFS_ROOT_ITEM_KEY;
12254         key.offset = (u64)-1;
12255
12256         root = btrfs_read_fs_root(fs_info, &key);
12257         if (IS_ERR(root)) {
12258                 err |= REFERENCER_MISSING;
12259                 goto out;
12260         }
12261
12262         /* Read out the tree block to get item/node key */
12263         eb = read_tree_block(fs_info, bytenr, 0);
12264         if (!extent_buffer_uptodate(eb)) {
12265                 err |= REFERENCER_MISSING;
12266                 free_extent_buffer(eb);
12267                 goto out;
12268         }
12269
12270         /* Empty tree, no need to check key */
12271         if (!btrfs_header_nritems(eb) && !level) {
12272                 free_extent_buffer(eb);
12273                 goto out;
12274         }
12275
12276         if (level)
12277                 btrfs_node_key_to_cpu(eb, &key, 0);
12278         else
12279                 btrfs_item_key_to_cpu(eb, &key, 0);
12280
12281         free_extent_buffer(eb);
12282
12283         btrfs_init_path(&path);
12284         path.lowest_level = level;
12285         /* Search with the first key, to ensure we can reach it */
12286         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12287         if (ret < 0) {
12288                 err |= REFERENCER_MISSING;
12289                 goto release_out;
12290         }
12291
12292         node = path.nodes[level];
12293         if (btrfs_header_bytenr(node) != bytenr) {
12294                 error(
12295         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
12296                         bytenr, nodesize, bytenr,
12297                         btrfs_header_bytenr(node));
12298                 err |= REFERENCER_MISMATCH;
12299         }
12300         if (btrfs_header_level(node) != level) {
12301                 error(
12302         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
12303                         bytenr, nodesize, level,
12304                         btrfs_header_level(node));
12305                 err |= REFERENCER_MISMATCH;
12306         }
12307
12308 release_out:
12309         btrfs_release_path(&path);
12310 out:
12311         if (err & REFERENCER_MISSING) {
12312                 if (level < 0)
12313                         error("extent [%llu %d] lost referencer (owner: %llu)",
12314                                 bytenr, nodesize, root_id);
12315                 else
12316                         error(
12317                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
12318                                 bytenr, nodesize, root_id, level);
12319         }
12320
12321         return err;
12322 }
12323
12324 /*
12325  * Check if tree block @eb is tree reloc root.
12326  * Return 0 if it's not or any problem happens
12327  * Return 1 if it's a tree reloc root
12328  */
12329 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
12330                                  struct extent_buffer *eb)
12331 {
12332         struct btrfs_root *tree_reloc_root;
12333         struct btrfs_key key;
12334         u64 bytenr = btrfs_header_bytenr(eb);
12335         u64 owner = btrfs_header_owner(eb);
12336         int ret = 0;
12337
12338         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12339         key.offset = owner;
12340         key.type = BTRFS_ROOT_ITEM_KEY;
12341
12342         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
12343         if (IS_ERR(tree_reloc_root))
12344                 return 0;
12345
12346         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
12347                 ret = 1;
12348         btrfs_free_fs_root(tree_reloc_root);
12349         return ret;
12350 }
12351
12352 /*
12353  * Check referencer for shared block backref
12354  * If level == -1, this function will resolve the level.
12355  */
12356 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
12357                                      u64 parent, u64 bytenr, int level)
12358 {
12359         struct extent_buffer *eb;
12360         u32 nr;
12361         int found_parent = 0;
12362         int i;
12363
12364         eb = read_tree_block(fs_info, parent, 0);
12365         if (!extent_buffer_uptodate(eb))
12366                 goto out;
12367
12368         if (level == -1)
12369                 level = query_tree_block_level(fs_info, bytenr);
12370         if (level < 0)
12371                 goto out;
12372
12373         /* It's possible it's a tree reloc root */
12374         if (parent == bytenr) {
12375                 if (is_tree_reloc_root(fs_info, eb))
12376                         found_parent = 1;
12377                 goto out;
12378         }
12379
12380         if (level + 1 != btrfs_header_level(eb))
12381                 goto out;
12382
12383         nr = btrfs_header_nritems(eb);
12384         for (i = 0; i < nr; i++) {
12385                 if (bytenr == btrfs_node_blockptr(eb, i)) {
12386                         found_parent = 1;
12387                         break;
12388                 }
12389         }
12390 out:
12391         free_extent_buffer(eb);
12392         if (!found_parent) {
12393                 error(
12394         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12395                         bytenr, fs_info->nodesize, parent, level);
12396                 return REFERENCER_MISSING;
12397         }
12398         return 0;
12399 }
12400
12401 /*
12402  * Check referencer for normal (inlined) data ref
12403  * If len == 0, it will be resolved by searching in extent tree
12404  */
12405 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12406                                      u64 root_id, u64 objectid, u64 offset,
12407                                      u64 bytenr, u64 len, u32 count)
12408 {
12409         struct btrfs_root *root;
12410         struct btrfs_root *extent_root = fs_info->extent_root;
12411         struct btrfs_key key;
12412         struct btrfs_path path;
12413         struct extent_buffer *leaf;
12414         struct btrfs_file_extent_item *fi;
12415         u32 found_count = 0;
12416         int slot;
12417         int ret = 0;
12418
12419         if (!len) {
12420                 key.objectid = bytenr;
12421                 key.type = BTRFS_EXTENT_ITEM_KEY;
12422                 key.offset = (u64)-1;
12423
12424                 btrfs_init_path(&path);
12425                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12426                 if (ret < 0)
12427                         goto out;
12428                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12429                 if (ret)
12430                         goto out;
12431                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12432                 if (key.objectid != bytenr ||
12433                     key.type != BTRFS_EXTENT_ITEM_KEY)
12434                         goto out;
12435                 len = key.offset;
12436                 btrfs_release_path(&path);
12437         }
12438         key.objectid = root_id;
12439         key.type = BTRFS_ROOT_ITEM_KEY;
12440         key.offset = (u64)-1;
12441         btrfs_init_path(&path);
12442
12443         root = btrfs_read_fs_root(fs_info, &key);
12444         if (IS_ERR(root))
12445                 goto out;
12446
12447         key.objectid = objectid;
12448         key.type = BTRFS_EXTENT_DATA_KEY;
12449         /*
12450          * It can be nasty as data backref offset is
12451          * file offset - file extent offset, which is smaller or
12452          * equal to original backref offset.  The only special case is
12453          * overflow.  So we need to special check and do further search.
12454          */
12455         key.offset = offset & (1ULL << 63) ? 0 : offset;
12456
12457         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12458         if (ret < 0)
12459                 goto out;
12460
12461         /*
12462          * Search afterwards to get correct one
12463          * NOTE: As we must do a comprehensive check on the data backref to
12464          * make sure the dref count also matches, we must iterate all file
12465          * extents for that inode.
12466          */
12467         while (1) {
12468                 leaf = path.nodes[0];
12469                 slot = path.slots[0];
12470
12471                 if (slot >= btrfs_header_nritems(leaf) ||
12472                     btrfs_header_owner(leaf) != root_id)
12473                         goto next;
12474                 btrfs_item_key_to_cpu(leaf, &key, slot);
12475                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12476                         break;
12477                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12478                 /*
12479                  * Except normal disk bytenr and disk num bytes, we still
12480                  * need to do extra check on dbackref offset as
12481                  * dbackref offset = file_offset - file_extent_offset
12482                  */
12483                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12484                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12485                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12486                     offset)
12487                         found_count++;
12488
12489 next:
12490                 ret = btrfs_next_item(root, &path);
12491                 if (ret)
12492                         break;
12493         }
12494 out:
12495         btrfs_release_path(&path);
12496         if (found_count != count) {
12497                 error(
12498 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12499                         bytenr, len, root_id, objectid, offset, count, found_count);
12500                 return REFERENCER_MISSING;
12501         }
12502         return 0;
12503 }
12504
12505 /*
12506  * Check if the referencer of a shared data backref exists
12507  */
12508 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12509                                      u64 parent, u64 bytenr)
12510 {
12511         struct extent_buffer *eb;
12512         struct btrfs_key key;
12513         struct btrfs_file_extent_item *fi;
12514         u32 nr;
12515         int found_parent = 0;
12516         int i;
12517
12518         eb = read_tree_block(fs_info, parent, 0);
12519         if (!extent_buffer_uptodate(eb))
12520                 goto out;
12521
12522         nr = btrfs_header_nritems(eb);
12523         for (i = 0; i < nr; i++) {
12524                 btrfs_item_key_to_cpu(eb, &key, i);
12525                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12526                         continue;
12527
12528                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12529                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12530                         continue;
12531
12532                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12533                         found_parent = 1;
12534                         break;
12535                 }
12536         }
12537
12538 out:
12539         free_extent_buffer(eb);
12540         if (!found_parent) {
12541                 error("shared extent %llu referencer lost (parent: %llu)",
12542                         bytenr, parent);
12543                 return REFERENCER_MISSING;
12544         }
12545         return 0;
12546 }
12547
12548 /*
12549  * Only delete backref if REFERENCER_MISSING now
12550  *
12551  * Returns <0   the extent was deleted
12552  * Returns >0   the backref was deleted but extent still exists, returned value
12553  *               means error after repair
12554  * Returns  0   nothing happened
12555  */
12556 static int repair_extent_item(struct btrfs_trans_handle *trans,
12557                       struct btrfs_root *root, struct btrfs_path *path,
12558                       u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12559                       u64 owner, u64 offset, int err)
12560 {
12561         struct btrfs_key old_key;
12562         int freed = 0;
12563         int ret;
12564
12565         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12566
12567         if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12568                 /* delete the backref */
12569                 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12570                           num_bytes, parent, root_objectid, owner, offset);
12571                 if (!ret) {
12572                         freed = 1;
12573                         err &= ~REFERENCER_MISSING;
12574                         printf("Delete backref in extent [%llu %llu]\n",
12575                                bytenr, num_bytes);
12576                 } else {
12577                         error("fail to delete backref in extent [%llu %llu]",
12578                                bytenr, num_bytes);
12579                 }
12580         }
12581
12582         /* btrfs_free_extent may delete the extent */
12583         btrfs_release_path(path);
12584         ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12585
12586         if (ret)
12587                 ret = -ENOENT;
12588         else if (freed)
12589                 ret = err;
12590         return ret;
12591 }
12592
12593 /*
12594  * This function will check a given extent item, including its backref and
12595  * itself (like crossing stripe boundary and type)
12596  *
12597  * Since we don't use extent_record anymore, introduce new error bit
12598  */
12599 static int check_extent_item(struct btrfs_trans_handle *trans,
12600                              struct btrfs_fs_info *fs_info,
12601                              struct btrfs_path *path)
12602 {
12603         struct btrfs_extent_item *ei;
12604         struct btrfs_extent_inline_ref *iref;
12605         struct btrfs_extent_data_ref *dref;
12606         struct extent_buffer *eb = path->nodes[0];
12607         unsigned long end;
12608         unsigned long ptr;
12609         int slot = path->slots[0];
12610         int type;
12611         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12612         u32 item_size = btrfs_item_size_nr(eb, slot);
12613         u64 flags;
12614         u64 offset;
12615         u64 parent;
12616         u64 num_bytes;
12617         u64 root_objectid;
12618         u64 owner;
12619         u64 owner_offset;
12620         int metadata = 0;
12621         int level;
12622         struct btrfs_key key;
12623         int ret;
12624         int err = 0;
12625
12626         btrfs_item_key_to_cpu(eb, &key, slot);
12627         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12628                 bytes_used += key.offset;
12629                 num_bytes = key.offset;
12630         } else {
12631                 bytes_used += nodesize;
12632                 num_bytes = nodesize;
12633         }
12634
12635         if (item_size < sizeof(*ei)) {
12636                 /*
12637                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12638                  * old thing when on disk format is still un-determined.
12639                  * No need to care about it anymore
12640                  */
12641                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12642                 return -ENOTTY;
12643         }
12644
12645         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12646         flags = btrfs_extent_flags(eb, ei);
12647
12648         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12649                 metadata = 1;
12650         if (metadata && check_crossing_stripes(global_info, key.objectid,
12651                                                eb->len)) {
12652                 error("bad metadata [%llu, %llu) crossing stripe boundary",
12653                       key.objectid, key.objectid + nodesize);
12654                 err |= CROSSING_STRIPE_BOUNDARY;
12655         }
12656
12657         ptr = (unsigned long)(ei + 1);
12658
12659         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12660                 /* Old EXTENT_ITEM metadata */
12661                 struct btrfs_tree_block_info *info;
12662
12663                 info = (struct btrfs_tree_block_info *)ptr;
12664                 level = btrfs_tree_block_level(eb, info);
12665                 ptr += sizeof(struct btrfs_tree_block_info);
12666         } else {
12667                 /* New METADATA_ITEM */
12668                 level = key.offset;
12669         }
12670         end = (unsigned long)ei + item_size;
12671
12672 next:
12673         /* Reached extent item end normally */
12674         if (ptr == end)
12675                 goto out;
12676
12677         /* Beyond extent item end, wrong item size */
12678         if (ptr > end) {
12679                 err |= ITEM_SIZE_MISMATCH;
12680                 error("extent item at bytenr %llu slot %d has wrong size",
12681                         eb->start, slot);
12682                 goto out;
12683         }
12684
12685         parent = 0;
12686         root_objectid = 0;
12687         owner = 0;
12688         owner_offset = 0;
12689         /* Now check every backref in this extent item */
12690         iref = (struct btrfs_extent_inline_ref *)ptr;
12691         type = btrfs_extent_inline_ref_type(eb, iref);
12692         offset = btrfs_extent_inline_ref_offset(eb, iref);
12693         switch (type) {
12694         case BTRFS_TREE_BLOCK_REF_KEY:
12695                 root_objectid = offset;
12696                 owner = level;
12697                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12698                                                level);
12699                 err |= ret;
12700                 break;
12701         case BTRFS_SHARED_BLOCK_REF_KEY:
12702                 parent = offset;
12703                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12704                                                  level);
12705                 err |= ret;
12706                 break;
12707         case BTRFS_EXTENT_DATA_REF_KEY:
12708                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12709                 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12710                 owner = btrfs_extent_data_ref_objectid(eb, dref);
12711                 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12712                 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12713                                         owner_offset, key.objectid, key.offset,
12714                                         btrfs_extent_data_ref_count(eb, dref));
12715                 err |= ret;
12716                 break;
12717         case BTRFS_SHARED_DATA_REF_KEY:
12718                 parent = offset;
12719                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12720                 err |= ret;
12721                 break;
12722         default:
12723                 error("extent[%llu %d %llu] has unknown ref type: %d",
12724                         key.objectid, key.type, key.offset, type);
12725                 ret = UNKNOWN_TYPE;
12726                 err |= ret;
12727                 goto out;
12728         }
12729
12730         if (err && repair) {
12731                 ret = repair_extent_item(trans, fs_info->extent_root, path,
12732                          key.objectid, num_bytes, parent, root_objectid,
12733                          owner, owner_offset, ret);
12734                 if (ret < 0)
12735                         goto out;
12736                 if (ret) {
12737                         goto next;
12738                         err = ret;
12739                 }
12740         }
12741
12742         ptr += btrfs_extent_inline_ref_size(type);
12743         goto next;
12744
12745 out:
12746         return err;
12747 }
12748
12749 /*
12750  * Check if a dev extent item is referred correctly by its chunk
12751  */
12752 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12753                                  struct extent_buffer *eb, int slot)
12754 {
12755         struct btrfs_root *chunk_root = fs_info->chunk_root;
12756         struct btrfs_dev_extent *ptr;
12757         struct btrfs_path path;
12758         struct btrfs_key chunk_key;
12759         struct btrfs_key devext_key;
12760         struct btrfs_chunk *chunk;
12761         struct extent_buffer *l;
12762         int num_stripes;
12763         u64 length;
12764         int i;
12765         int found_chunk = 0;
12766         int ret;
12767
12768         btrfs_item_key_to_cpu(eb, &devext_key, slot);
12769         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12770         length = btrfs_dev_extent_length(eb, ptr);
12771
12772         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12773         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12774         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12775
12776         btrfs_init_path(&path);
12777         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12778         if (ret)
12779                 goto out;
12780
12781         l = path.nodes[0];
12782         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12783         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12784                                       chunk_key.offset);
12785         if (ret < 0)
12786                 goto out;
12787
12788         if (btrfs_stripe_length(fs_info, l, chunk) != length)
12789                 goto out;
12790
12791         num_stripes = btrfs_chunk_num_stripes(l, chunk);
12792         for (i = 0; i < num_stripes; i++) {
12793                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12794                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12795
12796                 if (devid == devext_key.objectid &&
12797                     offset == devext_key.offset) {
12798                         found_chunk = 1;
12799                         break;
12800                 }
12801         }
12802 out:
12803         btrfs_release_path(&path);
12804         if (!found_chunk) {
12805                 error(
12806                 "device extent[%llu, %llu, %llu] did not find the related chunk",
12807                         devext_key.objectid, devext_key.offset, length);
12808                 return REFERENCER_MISSING;
12809         }
12810         return 0;
12811 }
12812
12813 /*
12814  * Check if the used space is correct with the dev item
12815  */
12816 static int check_dev_item(struct btrfs_fs_info *fs_info,
12817                           struct extent_buffer *eb, int slot)
12818 {
12819         struct btrfs_root *dev_root = fs_info->dev_root;
12820         struct btrfs_dev_item *dev_item;
12821         struct btrfs_path path;
12822         struct btrfs_key key;
12823         struct btrfs_dev_extent *ptr;
12824         u64 total_bytes;
12825         u64 dev_id;
12826         u64 used;
12827         u64 total = 0;
12828         int ret;
12829
12830         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12831         dev_id = btrfs_device_id(eb, dev_item);
12832         used = btrfs_device_bytes_used(eb, dev_item);
12833         total_bytes = btrfs_device_total_bytes(eb, dev_item);
12834
12835         key.objectid = dev_id;
12836         key.type = BTRFS_DEV_EXTENT_KEY;
12837         key.offset = 0;
12838
12839         btrfs_init_path(&path);
12840         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12841         if (ret < 0) {
12842                 btrfs_item_key_to_cpu(eb, &key, slot);
12843                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12844                         key.objectid, key.type, key.offset);
12845                 btrfs_release_path(&path);
12846                 return REFERENCER_MISSING;
12847         }
12848
12849         /* Iterate dev_extents to calculate the used space of a device */
12850         while (1) {
12851                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12852                         goto next;
12853
12854                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12855                 if (key.objectid > dev_id)
12856                         break;
12857                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12858                         goto next;
12859
12860                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12861                                      struct btrfs_dev_extent);
12862                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12863 next:
12864                 ret = btrfs_next_item(dev_root, &path);
12865                 if (ret)
12866                         break;
12867         }
12868         btrfs_release_path(&path);
12869
12870         if (used != total) {
12871                 btrfs_item_key_to_cpu(eb, &key, slot);
12872                 error(
12873 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12874                         total, used, BTRFS_ROOT_TREE_OBJECTID,
12875                         BTRFS_DEV_EXTENT_KEY, dev_id);
12876                 return ACCOUNTING_MISMATCH;
12877         }
12878         check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12879
12880         return 0;
12881 }
12882
12883 /*
12884  * Check a block group item with its referener (chunk) and its used space
12885  * with extent/metadata item
12886  */
12887 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12888                                   struct extent_buffer *eb, int slot)
12889 {
12890         struct btrfs_root *extent_root = fs_info->extent_root;
12891         struct btrfs_root *chunk_root = fs_info->chunk_root;
12892         struct btrfs_block_group_item *bi;
12893         struct btrfs_block_group_item bg_item;
12894         struct btrfs_path path;
12895         struct btrfs_key bg_key;
12896         struct btrfs_key chunk_key;
12897         struct btrfs_key extent_key;
12898         struct btrfs_chunk *chunk;
12899         struct extent_buffer *leaf;
12900         struct btrfs_extent_item *ei;
12901         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12902         u64 flags;
12903         u64 bg_flags;
12904         u64 used;
12905         u64 total = 0;
12906         int ret;
12907         int err = 0;
12908
12909         btrfs_item_key_to_cpu(eb, &bg_key, slot);
12910         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12911         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12912         used = btrfs_block_group_used(&bg_item);
12913         bg_flags = btrfs_block_group_flags(&bg_item);
12914
12915         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12916         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12917         chunk_key.offset = bg_key.objectid;
12918
12919         btrfs_init_path(&path);
12920         /* Search for the referencer chunk */
12921         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12922         if (ret) {
12923                 error(
12924                 "block group[%llu %llu] did not find the related chunk item",
12925                         bg_key.objectid, bg_key.offset);
12926                 err |= REFERENCER_MISSING;
12927         } else {
12928                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12929                                         struct btrfs_chunk);
12930                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12931                                                 bg_key.offset) {
12932                         error(
12933         "block group[%llu %llu] related chunk item length does not match",
12934                                 bg_key.objectid, bg_key.offset);
12935                         err |= REFERENCER_MISMATCH;
12936                 }
12937         }
12938         btrfs_release_path(&path);
12939
12940         /* Search from the block group bytenr */
12941         extent_key.objectid = bg_key.objectid;
12942         extent_key.type = 0;
12943         extent_key.offset = 0;
12944
12945         btrfs_init_path(&path);
12946         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12947         if (ret < 0)
12948                 goto out;
12949
12950         /* Iterate extent tree to account used space */
12951         while (1) {
12952                 leaf = path.nodes[0];
12953
12954                 /* Search slot can point to the last item beyond leaf nritems */
12955                 if (path.slots[0] >= btrfs_header_nritems(leaf))
12956                         goto next;
12957
12958                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12959                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12960                         break;
12961
12962                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12963                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12964                         goto next;
12965                 if (extent_key.objectid < bg_key.objectid)
12966                         goto next;
12967
12968                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12969                         total += nodesize;
12970                 else
12971                         total += extent_key.offset;
12972
12973                 ei = btrfs_item_ptr(leaf, path.slots[0],
12974                                     struct btrfs_extent_item);
12975                 flags = btrfs_extent_flags(leaf, ei);
12976                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12977                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12978                                 error(
12979                         "bad extent[%llu, %llu) type mismatch with chunk",
12980                                         extent_key.objectid,
12981                                         extent_key.objectid + extent_key.offset);
12982                                 err |= CHUNK_TYPE_MISMATCH;
12983                         }
12984                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12985                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12986                                     BTRFS_BLOCK_GROUP_METADATA))) {
12987                                 error(
12988                         "bad extent[%llu, %llu) type mismatch with chunk",
12989                                         extent_key.objectid,
12990                                         extent_key.objectid + nodesize);
12991                                 err |= CHUNK_TYPE_MISMATCH;
12992                         }
12993                 }
12994 next:
12995                 ret = btrfs_next_item(extent_root, &path);
12996                 if (ret)
12997                         break;
12998         }
12999
13000 out:
13001         btrfs_release_path(&path);
13002
13003         if (total != used) {
13004                 error(
13005                 "block group[%llu %llu] used %llu but extent items used %llu",
13006                         bg_key.objectid, bg_key.offset, used, total);
13007                 err |= BG_ACCOUNTING_ERROR;
13008         }
13009         return err;
13010 }
13011
13012 /*
13013  * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
13014  * FIXME: We still need to repair error of dev_item.
13015  *
13016  * Returns error after repair.
13017  */
13018 static int repair_chunk_item(struct btrfs_trans_handle *trans,
13019                              struct btrfs_root *chunk_root,
13020                              struct btrfs_path *path, int err)
13021 {
13022         struct btrfs_chunk *chunk;
13023         struct btrfs_key chunk_key;
13024         struct extent_buffer *eb = path->nodes[0];
13025         u64 length;
13026         int slot = path->slots[0];
13027         u64 type;
13028         int ret = 0;
13029
13030         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
13031         if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
13032                 return err;
13033         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
13034         type = btrfs_chunk_type(path->nodes[0], chunk);
13035         length = btrfs_chunk_length(eb, chunk);
13036
13037         if (err & REFERENCER_MISSING) {
13038                 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
13039                      type, chunk_key.objectid, chunk_key.offset, length);
13040                 if (ret) {
13041                         error("fail to add block group item[%llu %llu]",
13042                               chunk_key.offset, length);
13043                         goto out;
13044                 } else {
13045                         err &= ~REFERENCER_MISSING;
13046                         printf("Added block group item[%llu %llu]\n",
13047                                chunk_key.offset, length);
13048                 }
13049         }
13050
13051 out:
13052         return err;
13053 }
13054
13055 /*
13056  * Check a chunk item.
13057  * Including checking all referred dev_extents and block group
13058  */
13059 static int check_chunk_item(struct btrfs_fs_info *fs_info,
13060                             struct extent_buffer *eb, int slot)
13061 {
13062         struct btrfs_root *extent_root = fs_info->extent_root;
13063         struct btrfs_root *dev_root = fs_info->dev_root;
13064         struct btrfs_path path;
13065         struct btrfs_key chunk_key;
13066         struct btrfs_key bg_key;
13067         struct btrfs_key devext_key;
13068         struct btrfs_chunk *chunk;
13069         struct extent_buffer *leaf;
13070         struct btrfs_block_group_item *bi;
13071         struct btrfs_block_group_item bg_item;
13072         struct btrfs_dev_extent *ptr;
13073         u64 length;
13074         u64 chunk_end;
13075         u64 stripe_len;
13076         u64 type;
13077         int num_stripes;
13078         u64 offset;
13079         u64 objectid;
13080         int i;
13081         int ret;
13082         int err = 0;
13083
13084         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
13085         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
13086         length = btrfs_chunk_length(eb, chunk);
13087         chunk_end = chunk_key.offset + length;
13088         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
13089                                       chunk_key.offset);
13090         if (ret < 0) {
13091                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
13092                         chunk_end);
13093                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
13094                 goto out;
13095         }
13096         type = btrfs_chunk_type(eb, chunk);
13097
13098         bg_key.objectid = chunk_key.offset;
13099         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
13100         bg_key.offset = length;
13101
13102         btrfs_init_path(&path);
13103         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
13104         if (ret) {
13105                 error(
13106                 "chunk[%llu %llu) did not find the related block group item",
13107                         chunk_key.offset, chunk_end);
13108                 err |= REFERENCER_MISSING;
13109         } else{
13110                 leaf = path.nodes[0];
13111                 bi = btrfs_item_ptr(leaf, path.slots[0],
13112                                     struct btrfs_block_group_item);
13113                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
13114                                    sizeof(bg_item));
13115                 if (btrfs_block_group_flags(&bg_item) != type) {
13116                         error(
13117 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
13118                                 chunk_key.offset, chunk_end, type,
13119                                 btrfs_block_group_flags(&bg_item));
13120                         err |= REFERENCER_MISSING;
13121                 }
13122         }
13123
13124         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
13125         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
13126         for (i = 0; i < num_stripes; i++) {
13127                 btrfs_release_path(&path);
13128                 btrfs_init_path(&path);
13129                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
13130                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
13131                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
13132
13133                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
13134                                         0, 0);
13135                 if (ret)
13136                         goto not_match_dev;
13137
13138                 leaf = path.nodes[0];
13139                 ptr = btrfs_item_ptr(leaf, path.slots[0],
13140                                      struct btrfs_dev_extent);
13141                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
13142                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
13143                 if (objectid != chunk_key.objectid ||
13144                     offset != chunk_key.offset ||
13145                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
13146                         goto not_match_dev;
13147                 continue;
13148 not_match_dev:
13149                 err |= BACKREF_MISSING;
13150                 error(
13151                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
13152                         chunk_key.objectid, chunk_end, i);
13153                 continue;
13154         }
13155         btrfs_release_path(&path);
13156 out:
13157         return err;
13158 }
13159
13160 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
13161                                    struct btrfs_root *root,
13162                                    struct btrfs_path *path)
13163 {
13164         struct btrfs_key key;
13165         int ret = 0;
13166
13167         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
13168         btrfs_release_path(path);
13169         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
13170         if (ret) {
13171                 ret = -ENOENT;
13172                 goto out;
13173         }
13174
13175         ret = btrfs_del_item(trans, root, path);
13176         if (ret)
13177                 goto out;
13178
13179         if (path->slots[0] == 0)
13180                 btrfs_prev_leaf(root, path);
13181         else
13182                 path->slots[0]--;
13183 out:
13184         if (ret)
13185                 error("failed to delete root %llu item[%llu, %u, %llu]",
13186                       root->objectid, key.objectid, key.type, key.offset);
13187         else
13188                 printf("Deleted root %llu item[%llu, %u, %llu]\n",
13189                        root->objectid, key.objectid, key.type, key.offset);
13190         return ret;
13191 }
13192
13193 /*
13194  * Main entry function to check known items and update related accounting info
13195  */
13196 static int check_leaf_items(struct btrfs_trans_handle *trans,
13197                             struct btrfs_root *root, struct btrfs_path *path,
13198                             struct node_refs *nrefs, int account_bytes)
13199 {
13200         struct btrfs_fs_info *fs_info = root->fs_info;
13201         struct btrfs_key key;
13202         struct extent_buffer *eb;
13203         int slot;
13204         int type;
13205         struct btrfs_extent_data_ref *dref;
13206         int ret = 0;
13207         int err = 0;
13208
13209 again:
13210         eb = path->nodes[0];
13211         slot = path->slots[0];
13212         if (slot >= btrfs_header_nritems(eb)) {
13213                 if (slot == 0) {
13214                         error("empty leaf [%llu %u] root %llu", eb->start,
13215                                 root->fs_info->nodesize, root->objectid);
13216                         err |= EIO;
13217                 }
13218                 goto out;
13219         }
13220
13221         btrfs_item_key_to_cpu(eb, &key, slot);
13222         type = key.type;
13223
13224         switch (type) {
13225         case BTRFS_EXTENT_DATA_KEY:
13226                 ret = check_extent_data_item(root, path, nrefs, account_bytes);
13227                 if (repair && ret)
13228                         ret = repair_extent_data_item(trans, root, path, nrefs,
13229                                                       ret);
13230                 err |= ret;
13231                 break;
13232         case BTRFS_BLOCK_GROUP_ITEM_KEY:
13233                 ret = check_block_group_item(fs_info, eb, slot);
13234                 if (repair &&
13235                     ret & REFERENCER_MISSING)
13236                         ret = delete_extent_tree_item(trans, root, path);
13237                 err |= ret;
13238                 break;
13239         case BTRFS_DEV_ITEM_KEY:
13240                 ret = check_dev_item(fs_info, eb, slot);
13241                 err |= ret;
13242                 break;
13243         case BTRFS_CHUNK_ITEM_KEY:
13244                 ret = check_chunk_item(fs_info, eb, slot);
13245                 if (repair && ret)
13246                         ret = repair_chunk_item(trans, root, path, ret);
13247                 err |= ret;
13248                 break;
13249         case BTRFS_DEV_EXTENT_KEY:
13250                 ret = check_dev_extent_item(fs_info, eb, slot);
13251                 err |= ret;
13252                 break;
13253         case BTRFS_EXTENT_ITEM_KEY:
13254         case BTRFS_METADATA_ITEM_KEY:
13255                 ret = check_extent_item(trans, fs_info, path);
13256                 err |= ret;
13257                 break;
13258         case BTRFS_EXTENT_CSUM_KEY:
13259                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
13260                 err |= ret;
13261                 break;
13262         case BTRFS_TREE_BLOCK_REF_KEY:
13263                 ret = check_tree_block_backref(fs_info, key.offset,
13264                                                key.objectid, -1);
13265                 if (repair &&
13266                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13267                         ret = delete_extent_tree_item(trans, root, path);
13268                 err |= ret;
13269                 break;
13270         case BTRFS_EXTENT_DATA_REF_KEY:
13271                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
13272                 ret = check_extent_data_backref(fs_info,
13273                                 btrfs_extent_data_ref_root(eb, dref),
13274                                 btrfs_extent_data_ref_objectid(eb, dref),
13275                                 btrfs_extent_data_ref_offset(eb, dref),
13276                                 key.objectid, 0,
13277                                 btrfs_extent_data_ref_count(eb, dref));
13278                 if (repair &&
13279                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13280                         ret = delete_extent_tree_item(trans, root, path);
13281                 err |= ret;
13282                 break;
13283         case BTRFS_SHARED_BLOCK_REF_KEY:
13284                 ret = check_shared_block_backref(fs_info, key.offset,
13285                                                  key.objectid, -1);
13286                 if (repair &&
13287                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13288                         ret = delete_extent_tree_item(trans, root, path);
13289                 err |= ret;
13290                 break;
13291         case BTRFS_SHARED_DATA_REF_KEY:
13292                 ret = check_shared_data_backref(fs_info, key.offset,
13293                                                 key.objectid);
13294                 if (repair &&
13295                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13296                         ret = delete_extent_tree_item(trans, root, path);
13297                 err |= ret;
13298                 break;
13299         default:
13300                 break;
13301         }
13302
13303         ++path->slots[0];
13304         goto again;
13305 out:
13306         return err;
13307 }
13308
13309 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info);
13310
13311 /*
13312  * Low memory usage version check_chunks_and_extents.
13313  */
13314 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
13315 {
13316         struct btrfs_trans_handle *trans = NULL;
13317         struct btrfs_path path;
13318         struct btrfs_key old_key;
13319         struct btrfs_key key;
13320         struct btrfs_root *root1;
13321         struct btrfs_root *root;
13322         struct btrfs_root *cur_root;
13323         int err = 0;
13324         int ret;
13325
13326         root = fs_info->fs_root;
13327
13328         if (repair) {
13329                 /* pin every tree block to avoid extent overwrite */
13330                 ret = pin_metadata_blocks(fs_info);
13331                 if (ret) {
13332                         error("failed to pin metadata blocks");
13333                         return ret;
13334                 }
13335                 trans = btrfs_start_transaction(fs_info->extent_root, 1);
13336                 if (IS_ERR(trans)) {
13337                         error("failed to start transaction before check");
13338                         return PTR_ERR(trans);
13339                 }
13340         }
13341
13342         root1 = root->fs_info->chunk_root;
13343         ret = check_btrfs_root(trans, root1, 0, 1);
13344         err |= ret;
13345
13346         root1 = root->fs_info->tree_root;
13347         ret = check_btrfs_root(trans, root1, 0, 1);
13348         err |= ret;
13349
13350         btrfs_init_path(&path);
13351         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
13352         key.offset = 0;
13353         key.type = BTRFS_ROOT_ITEM_KEY;
13354
13355         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
13356         if (ret) {
13357                 error("cannot find extent tree in tree_root");
13358                 goto out;
13359         }
13360
13361         while (1) {
13362                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13363                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13364                         goto next;
13365                 old_key = key;
13366                 key.offset = (u64)-1;
13367
13368                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13369                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13370                                         &key);
13371                 else
13372                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
13373                 if (IS_ERR(cur_root) || !cur_root) {
13374                         error("failed to read tree: %lld", key.objectid);
13375                         goto next;
13376                 }
13377
13378                 ret = check_btrfs_root(trans, cur_root, 0, 1);
13379                 err |= ret;
13380
13381                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13382                         btrfs_free_fs_root(cur_root);
13383
13384                 btrfs_release_path(&path);
13385                 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13386                                         &old_key, &path, 0, 0);
13387                 if (ret)
13388                         goto out;
13389 next:
13390                 ret = btrfs_next_item(root1, &path);
13391                 if (ret)
13392                         goto out;
13393         }
13394 out:
13395
13396         /* if repair, update block accounting */
13397         if (repair) {
13398                 ret = btrfs_fix_block_accounting(trans, root);
13399                 if (ret)
13400                         err |= ret;
13401                 else
13402                         err &= ~BG_ACCOUNTING_ERROR;
13403         }
13404
13405         if (trans)
13406                 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13407
13408         btrfs_release_path(&path);
13409
13410         return err;
13411 }
13412
13413 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13414 {
13415         int ret;
13416
13417         if (!ctx.progress_enabled)
13418                 fprintf(stderr, "checking extents\n");
13419         if (check_mode == CHECK_MODE_LOWMEM)
13420                 ret = check_chunks_and_extents_v2(fs_info);
13421         else
13422                 ret = check_chunks_and_extents(fs_info);
13423
13424         /* Also repair device size related problems */
13425         if (repair && !ret) {
13426                 ret = btrfs_fix_device_and_super_size(fs_info);
13427                 if (ret > 0)
13428                         ret = 0;
13429         }
13430         return ret;
13431 }
13432
13433 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13434                            struct btrfs_root *root, int overwrite)
13435 {
13436         struct extent_buffer *c;
13437         struct extent_buffer *old = root->node;
13438         int level;
13439         int ret;
13440         struct btrfs_disk_key disk_key = {0,0,0};
13441
13442         level = 0;
13443
13444         if (overwrite) {
13445                 c = old;
13446                 extent_buffer_get(c);
13447                 goto init;
13448         }
13449         c = btrfs_alloc_free_block(trans, root,
13450                                    root->fs_info->nodesize,
13451                                    root->root_key.objectid,
13452                                    &disk_key, level, 0, 0);
13453         if (IS_ERR(c)) {
13454                 c = old;
13455                 extent_buffer_get(c);
13456                 overwrite = 1;
13457         }
13458 init:
13459         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13460         btrfs_set_header_level(c, level);
13461         btrfs_set_header_bytenr(c, c->start);
13462         btrfs_set_header_generation(c, trans->transid);
13463         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13464         btrfs_set_header_owner(c, root->root_key.objectid);
13465
13466         write_extent_buffer(c, root->fs_info->fsid,
13467                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
13468
13469         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13470                             btrfs_header_chunk_tree_uuid(c),
13471                             BTRFS_UUID_SIZE);
13472
13473         btrfs_mark_buffer_dirty(c);
13474         /*
13475          * this case can happen in the following case:
13476          *
13477          * 1.overwrite previous root.
13478          *
13479          * 2.reinit reloc data root, this is because we skip pin
13480          * down reloc data tree before which means we can allocate
13481          * same block bytenr here.
13482          */
13483         if (old->start == c->start) {
13484                 btrfs_set_root_generation(&root->root_item,
13485                                           trans->transid);
13486                 root->root_item.level = btrfs_header_level(root->node);
13487                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13488                                         &root->root_key, &root->root_item);
13489                 if (ret) {
13490                         free_extent_buffer(c);
13491                         return ret;
13492                 }
13493         }
13494         free_extent_buffer(old);
13495         root->node = c;
13496         add_root_to_dirty_list(root);
13497         return 0;
13498 }
13499
13500 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13501                                 struct extent_buffer *eb, int tree_root)
13502 {
13503         struct extent_buffer *tmp;
13504         struct btrfs_root_item *ri;
13505         struct btrfs_key key;
13506         u64 bytenr;
13507         int level = btrfs_header_level(eb);
13508         int nritems;
13509         int ret;
13510         int i;
13511
13512         /*
13513          * If we have pinned this block before, don't pin it again.
13514          * This can not only avoid forever loop with broken filesystem
13515          * but also give us some speedups.
13516          */
13517         if (test_range_bit(&fs_info->pinned_extents, eb->start,
13518                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13519                 return 0;
13520
13521         btrfs_pin_extent(fs_info, eb->start, eb->len);
13522
13523         nritems = btrfs_header_nritems(eb);
13524         for (i = 0; i < nritems; i++) {
13525                 if (level == 0) {
13526                         btrfs_item_key_to_cpu(eb, &key, i);
13527                         if (key.type != BTRFS_ROOT_ITEM_KEY)
13528                                 continue;
13529                         /* Skip the extent root and reloc roots */
13530                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13531                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13532                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13533                                 continue;
13534                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13535                         bytenr = btrfs_disk_root_bytenr(eb, ri);
13536
13537                         /*
13538                          * If at any point we start needing the real root we
13539                          * will have to build a stump root for the root we are
13540                          * in, but for now this doesn't actually use the root so
13541                          * just pass in extent_root.
13542                          */
13543                         tmp = read_tree_block(fs_info, bytenr, 0);
13544                         if (!extent_buffer_uptodate(tmp)) {
13545                                 fprintf(stderr, "Error reading root block\n");
13546                                 return -EIO;
13547                         }
13548                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
13549                         free_extent_buffer(tmp);
13550                         if (ret)
13551                                 return ret;
13552                 } else {
13553                         bytenr = btrfs_node_blockptr(eb, i);
13554
13555                         /* If we aren't the tree root don't read the block */
13556                         if (level == 1 && !tree_root) {
13557                                 btrfs_pin_extent(fs_info, bytenr,
13558                                                 fs_info->nodesize);
13559                                 continue;
13560                         }
13561
13562                         tmp = read_tree_block(fs_info, bytenr, 0);
13563                         if (!extent_buffer_uptodate(tmp)) {
13564                                 fprintf(stderr, "Error reading tree block\n");
13565                                 return -EIO;
13566                         }
13567                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13568                         free_extent_buffer(tmp);
13569                         if (ret)
13570                                 return ret;
13571                 }
13572         }
13573
13574         return 0;
13575 }
13576
13577 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13578 {
13579         int ret;
13580
13581         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13582         if (ret)
13583                 return ret;
13584
13585         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13586 }
13587
13588 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13589 {
13590         struct btrfs_block_group_cache *cache;
13591         struct btrfs_path path;
13592         struct extent_buffer *leaf;
13593         struct btrfs_chunk *chunk;
13594         struct btrfs_key key;
13595         int ret;
13596         u64 start;
13597
13598         btrfs_init_path(&path);
13599         key.objectid = 0;
13600         key.type = BTRFS_CHUNK_ITEM_KEY;
13601         key.offset = 0;
13602         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13603         if (ret < 0) {
13604                 btrfs_release_path(&path);
13605                 return ret;
13606         }
13607
13608         /*
13609          * We do this in case the block groups were screwed up and had alloc
13610          * bits that aren't actually set on the chunks.  This happens with
13611          * restored images every time and could happen in real life I guess.
13612          */
13613         fs_info->avail_data_alloc_bits = 0;
13614         fs_info->avail_metadata_alloc_bits = 0;
13615         fs_info->avail_system_alloc_bits = 0;
13616
13617         /* First we need to create the in-memory block groups */
13618         while (1) {
13619                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13620                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13621                         if (ret < 0) {
13622                                 btrfs_release_path(&path);
13623                                 return ret;
13624                         }
13625                         if (ret) {
13626                                 ret = 0;
13627                                 break;
13628                         }
13629                 }
13630                 leaf = path.nodes[0];
13631                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13632                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13633                         path.slots[0]++;
13634                         continue;
13635                 }
13636
13637                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13638                 btrfs_add_block_group(fs_info, 0,
13639                                       btrfs_chunk_type(leaf, chunk),
13640                                       key.objectid, key.offset,
13641                                       btrfs_chunk_length(leaf, chunk));
13642                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13643                                  key.offset + btrfs_chunk_length(leaf, chunk));
13644                 path.slots[0]++;
13645         }
13646         start = 0;
13647         while (1) {
13648                 cache = btrfs_lookup_first_block_group(fs_info, start);
13649                 if (!cache)
13650                         break;
13651                 cache->cached = 1;
13652                 start = cache->key.objectid + cache->key.offset;
13653         }
13654
13655         btrfs_release_path(&path);
13656         return 0;
13657 }
13658
13659 static int reset_balance(struct btrfs_trans_handle *trans,
13660                          struct btrfs_fs_info *fs_info)
13661 {
13662         struct btrfs_root *root = fs_info->tree_root;
13663         struct btrfs_path path;
13664         struct extent_buffer *leaf;
13665         struct btrfs_key key;
13666         int del_slot, del_nr = 0;
13667         int ret;
13668         int found = 0;
13669
13670         btrfs_init_path(&path);
13671         key.objectid = BTRFS_BALANCE_OBJECTID;
13672         key.type = BTRFS_BALANCE_ITEM_KEY;
13673         key.offset = 0;
13674         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13675         if (ret) {
13676                 if (ret > 0)
13677                         ret = 0;
13678                 if (!ret)
13679                         goto reinit_data_reloc;
13680                 else
13681                         goto out;
13682         }
13683
13684         ret = btrfs_del_item(trans, root, &path);
13685         if (ret)
13686                 goto out;
13687         btrfs_release_path(&path);
13688
13689         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13690         key.type = BTRFS_ROOT_ITEM_KEY;
13691         key.offset = 0;
13692         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13693         if (ret < 0)
13694                 goto out;
13695         while (1) {
13696                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13697                         if (!found)
13698                                 break;
13699
13700                         if (del_nr) {
13701                                 ret = btrfs_del_items(trans, root, &path,
13702                                                       del_slot, del_nr);
13703                                 del_nr = 0;
13704                                 if (ret)
13705                                         goto out;
13706                         }
13707                         key.offset++;
13708                         btrfs_release_path(&path);
13709
13710                         found = 0;
13711                         ret = btrfs_search_slot(trans, root, &key, &path,
13712                                                 -1, 1);
13713                         if (ret < 0)
13714                                 goto out;
13715                         continue;
13716                 }
13717                 found = 1;
13718                 leaf = path.nodes[0];
13719                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13720                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13721                         break;
13722                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13723                         path.slots[0]++;
13724                         continue;
13725                 }
13726                 if (!del_nr) {
13727                         del_slot = path.slots[0];
13728                         del_nr = 1;
13729                 } else {
13730                         del_nr++;
13731                 }
13732                 path.slots[0]++;
13733         }
13734
13735         if (del_nr) {
13736                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13737                 if (ret)
13738                         goto out;
13739         }
13740         btrfs_release_path(&path);
13741
13742 reinit_data_reloc:
13743         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13744         key.type = BTRFS_ROOT_ITEM_KEY;
13745         key.offset = (u64)-1;
13746         root = btrfs_read_fs_root(fs_info, &key);
13747         if (IS_ERR(root)) {
13748                 fprintf(stderr, "Error reading data reloc tree\n");
13749                 ret = PTR_ERR(root);
13750                 goto out;
13751         }
13752         record_root_in_trans(trans, root);
13753         ret = btrfs_fsck_reinit_root(trans, root, 0);
13754         if (ret)
13755                 goto out;
13756         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13757 out:
13758         btrfs_release_path(&path);
13759         return ret;
13760 }
13761
13762 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13763                               struct btrfs_fs_info *fs_info)
13764 {
13765         u64 start = 0;
13766         int ret;
13767
13768         /*
13769          * The only reason we don't do this is because right now we're just
13770          * walking the trees we find and pinning down their bytes, we don't look
13771          * at any of the leaves.  In order to do mixed groups we'd have to check
13772          * the leaves of any fs roots and pin down the bytes for any file
13773          * extents we find.  Not hard but why do it if we don't have to?
13774          */
13775         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13776                 fprintf(stderr, "We don't support re-initing the extent tree "
13777                         "for mixed block groups yet, please notify a btrfs "
13778                         "developer you want to do this so they can add this "
13779                         "functionality.\n");
13780                 return -EINVAL;
13781         }
13782
13783         /*
13784          * first we need to walk all of the trees except the extent tree and pin
13785          * down the bytes that are in use so we don't overwrite any existing
13786          * metadata.
13787          */
13788         ret = pin_metadata_blocks(fs_info);
13789         if (ret) {
13790                 fprintf(stderr, "error pinning down used bytes\n");
13791                 return ret;
13792         }
13793
13794         /*
13795          * Need to drop all the block groups since we're going to recreate all
13796          * of them again.
13797          */
13798         btrfs_free_block_groups(fs_info);
13799         ret = reset_block_groups(fs_info);
13800         if (ret) {
13801                 fprintf(stderr, "error resetting the block groups\n");
13802                 return ret;
13803         }
13804
13805         /* Ok we can allocate now, reinit the extent root */
13806         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13807         if (ret) {
13808                 fprintf(stderr, "extent root initialization failed\n");
13809                 /*
13810                  * When the transaction code is updated we should end the
13811                  * transaction, but for now progs only knows about commit so
13812                  * just return an error.
13813                  */
13814                 return ret;
13815         }
13816
13817         /*
13818          * Now we have all the in-memory block groups setup so we can make
13819          * allocations properly, and the metadata we care about is safe since we
13820          * pinned all of it above.
13821          */
13822         while (1) {
13823                 struct btrfs_block_group_cache *cache;
13824
13825                 cache = btrfs_lookup_first_block_group(fs_info, start);
13826                 if (!cache)
13827                         break;
13828                 start = cache->key.objectid + cache->key.offset;
13829                 ret = btrfs_insert_item(trans, fs_info->extent_root,
13830                                         &cache->key, &cache->item,
13831                                         sizeof(cache->item));
13832                 if (ret) {
13833                         fprintf(stderr, "Error adding block group\n");
13834                         return ret;
13835                 }
13836                 btrfs_extent_post_op(trans, fs_info->extent_root);
13837         }
13838
13839         ret = reset_balance(trans, fs_info);
13840         if (ret)
13841                 fprintf(stderr, "error resetting the pending balance\n");
13842
13843         return ret;
13844 }
13845
13846 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13847 {
13848         struct btrfs_path path;
13849         struct btrfs_trans_handle *trans;
13850         struct btrfs_key key;
13851         int ret;
13852
13853         printf("Recowing metadata block %llu\n", eb->start);
13854         key.objectid = btrfs_header_owner(eb);
13855         key.type = BTRFS_ROOT_ITEM_KEY;
13856         key.offset = (u64)-1;
13857
13858         root = btrfs_read_fs_root(root->fs_info, &key);
13859         if (IS_ERR(root)) {
13860                 fprintf(stderr, "Couldn't find owner root %llu\n",
13861                         key.objectid);
13862                 return PTR_ERR(root);
13863         }
13864
13865         trans = btrfs_start_transaction(root, 1);
13866         if (IS_ERR(trans))
13867                 return PTR_ERR(trans);
13868
13869         btrfs_init_path(&path);
13870         path.lowest_level = btrfs_header_level(eb);
13871         if (path.lowest_level)
13872                 btrfs_node_key_to_cpu(eb, &key, 0);
13873         else
13874                 btrfs_item_key_to_cpu(eb, &key, 0);
13875
13876         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13877         btrfs_commit_transaction(trans, root);
13878         btrfs_release_path(&path);
13879         return ret;
13880 }
13881
13882 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13883 {
13884         struct btrfs_path path;
13885         struct btrfs_trans_handle *trans;
13886         struct btrfs_key key;
13887         int ret;
13888
13889         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13890                bad->key.type, bad->key.offset);
13891         key.objectid = bad->root_id;
13892         key.type = BTRFS_ROOT_ITEM_KEY;
13893         key.offset = (u64)-1;
13894
13895         root = btrfs_read_fs_root(root->fs_info, &key);
13896         if (IS_ERR(root)) {
13897                 fprintf(stderr, "Couldn't find owner root %llu\n",
13898                         key.objectid);
13899                 return PTR_ERR(root);
13900         }
13901
13902         trans = btrfs_start_transaction(root, 1);
13903         if (IS_ERR(trans))
13904                 return PTR_ERR(trans);
13905
13906         btrfs_init_path(&path);
13907         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13908         if (ret) {
13909                 if (ret > 0)
13910                         ret = 0;
13911                 goto out;
13912         }
13913         ret = btrfs_del_item(trans, root, &path);
13914 out:
13915         btrfs_commit_transaction(trans, root);
13916         btrfs_release_path(&path);
13917         return ret;
13918 }
13919
13920 static int zero_log_tree(struct btrfs_root *root)
13921 {
13922         struct btrfs_trans_handle *trans;
13923         int ret;
13924
13925         trans = btrfs_start_transaction(root, 1);
13926         if (IS_ERR(trans)) {
13927                 ret = PTR_ERR(trans);
13928                 return ret;
13929         }
13930         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13931         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13932         ret = btrfs_commit_transaction(trans, root);
13933         return ret;
13934 }
13935
13936 static int populate_csum(struct btrfs_trans_handle *trans,
13937                          struct btrfs_root *csum_root, char *buf, u64 start,
13938                          u64 len)
13939 {
13940         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13941         u64 offset = 0;
13942         u64 sectorsize;
13943         int ret = 0;
13944
13945         while (offset < len) {
13946                 sectorsize = fs_info->sectorsize;
13947                 ret = read_extent_data(fs_info, buf, start + offset,
13948                                        &sectorsize, 0);
13949                 if (ret)
13950                         break;
13951                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13952                                             start + offset, buf, sectorsize);
13953                 if (ret)
13954                         break;
13955                 offset += sectorsize;
13956         }
13957         return ret;
13958 }
13959
13960 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13961                                       struct btrfs_root *csum_root,
13962                                       struct btrfs_root *cur_root)
13963 {
13964         struct btrfs_path path;
13965         struct btrfs_key key;
13966         struct extent_buffer *node;
13967         struct btrfs_file_extent_item *fi;
13968         char *buf = NULL;
13969         u64 start = 0;
13970         u64 len = 0;
13971         int slot = 0;
13972         int ret = 0;
13973
13974         buf = malloc(cur_root->fs_info->sectorsize);
13975         if (!buf)
13976                 return -ENOMEM;
13977
13978         btrfs_init_path(&path);
13979         key.objectid = 0;
13980         key.offset = 0;
13981         key.type = 0;
13982         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13983         if (ret < 0)
13984                 goto out;
13985         /* Iterate all regular file extents and fill its csum */
13986         while (1) {
13987                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13988
13989                 if (key.type != BTRFS_EXTENT_DATA_KEY)
13990                         goto next;
13991                 node = path.nodes[0];
13992                 slot = path.slots[0];
13993                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13994                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13995                         goto next;
13996                 start = btrfs_file_extent_disk_bytenr(node, fi);
13997                 len = btrfs_file_extent_disk_num_bytes(node, fi);
13998
13999                 ret = populate_csum(trans, csum_root, buf, start, len);
14000                 if (ret == -EEXIST)
14001                         ret = 0;
14002                 if (ret < 0)
14003                         goto out;
14004 next:
14005                 /*
14006                  * TODO: if next leaf is corrupted, jump to nearest next valid
14007                  * leaf.
14008                  */
14009                 ret = btrfs_next_item(cur_root, &path);
14010                 if (ret < 0)
14011                         goto out;
14012                 if (ret > 0) {
14013                         ret = 0;
14014                         goto out;
14015                 }
14016         }
14017
14018 out:
14019         btrfs_release_path(&path);
14020         free(buf);
14021         return ret;
14022 }
14023
14024 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
14025                                   struct btrfs_root *csum_root)
14026 {
14027         struct btrfs_fs_info *fs_info = csum_root->fs_info;
14028         struct btrfs_path path;
14029         struct btrfs_root *tree_root = fs_info->tree_root;
14030         struct btrfs_root *cur_root;
14031         struct extent_buffer *node;
14032         struct btrfs_key key;
14033         int slot = 0;
14034         int ret = 0;
14035
14036         btrfs_init_path(&path);
14037         key.objectid = BTRFS_FS_TREE_OBJECTID;
14038         key.offset = 0;
14039         key.type = BTRFS_ROOT_ITEM_KEY;
14040         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
14041         if (ret < 0)
14042                 goto out;
14043         if (ret > 0) {
14044                 ret = -ENOENT;
14045                 goto out;
14046         }
14047
14048         while (1) {
14049                 node = path.nodes[0];
14050                 slot = path.slots[0];
14051                 btrfs_item_key_to_cpu(node, &key, slot);
14052                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
14053                         goto out;
14054                 if (key.type != BTRFS_ROOT_ITEM_KEY)
14055                         goto next;
14056                 if (!is_fstree(key.objectid))
14057                         goto next;
14058                 key.offset = (u64)-1;
14059
14060                 cur_root = btrfs_read_fs_root(fs_info, &key);
14061                 if (IS_ERR(cur_root) || !cur_root) {
14062                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
14063                                 key.objectid);
14064                         goto out;
14065                 }
14066                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
14067                                 cur_root);
14068                 if (ret < 0)
14069                         goto out;
14070 next:
14071                 ret = btrfs_next_item(tree_root, &path);
14072                 if (ret > 0) {
14073                         ret = 0;
14074                         goto out;
14075                 }
14076                 if (ret < 0)
14077                         goto out;
14078         }
14079
14080 out:
14081         btrfs_release_path(&path);
14082         return ret;
14083 }
14084
14085 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
14086                                       struct btrfs_root *csum_root)
14087 {
14088         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
14089         struct btrfs_path path;
14090         struct btrfs_extent_item *ei;
14091         struct extent_buffer *leaf;
14092         char *buf;
14093         struct btrfs_key key;
14094         int ret;
14095
14096         btrfs_init_path(&path);
14097         key.objectid = 0;
14098         key.type = BTRFS_EXTENT_ITEM_KEY;
14099         key.offset = 0;
14100         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
14101         if (ret < 0) {
14102                 btrfs_release_path(&path);
14103                 return ret;
14104         }
14105
14106         buf = malloc(csum_root->fs_info->sectorsize);
14107         if (!buf) {
14108                 btrfs_release_path(&path);
14109                 return -ENOMEM;
14110         }
14111
14112         while (1) {
14113                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
14114                         ret = btrfs_next_leaf(extent_root, &path);
14115                         if (ret < 0)
14116                                 break;
14117                         if (ret) {
14118                                 ret = 0;
14119                                 break;
14120                         }
14121                 }
14122                 leaf = path.nodes[0];
14123
14124                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
14125                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
14126                         path.slots[0]++;
14127                         continue;
14128                 }
14129
14130                 ei = btrfs_item_ptr(leaf, path.slots[0],
14131                                     struct btrfs_extent_item);
14132                 if (!(btrfs_extent_flags(leaf, ei) &
14133                       BTRFS_EXTENT_FLAG_DATA)) {
14134                         path.slots[0]++;
14135                         continue;
14136                 }
14137
14138                 ret = populate_csum(trans, csum_root, buf, key.objectid,
14139                                     key.offset);
14140                 if (ret)
14141                         break;
14142                 path.slots[0]++;
14143         }
14144
14145         btrfs_release_path(&path);
14146         free(buf);
14147         return ret;
14148 }
14149
14150 /*
14151  * Recalculate the csum and put it into the csum tree.
14152  *
14153  * Extent tree init will wipe out all the extent info, so in that case, we
14154  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
14155  * will use fs/subvol trees to init the csum tree.
14156  */
14157 static int fill_csum_tree(struct btrfs_trans_handle *trans,
14158                           struct btrfs_root *csum_root,
14159                           int search_fs_tree)
14160 {
14161         if (search_fs_tree)
14162                 return fill_csum_tree_from_fs(trans, csum_root);
14163         else
14164                 return fill_csum_tree_from_extent(trans, csum_root);
14165 }
14166
14167 static void free_roots_info_cache(void)
14168 {
14169         if (!roots_info_cache)
14170                 return;
14171
14172         while (!cache_tree_empty(roots_info_cache)) {
14173                 struct cache_extent *entry;
14174                 struct root_item_info *rii;
14175
14176                 entry = first_cache_extent(roots_info_cache);
14177                 if (!entry)
14178                         break;
14179                 remove_cache_extent(roots_info_cache, entry);
14180                 rii = container_of(entry, struct root_item_info, cache_extent);
14181                 free(rii);
14182         }
14183
14184         free(roots_info_cache);
14185         roots_info_cache = NULL;
14186 }
14187
14188 static int build_roots_info_cache(struct btrfs_fs_info *info)
14189 {
14190         int ret = 0;
14191         struct btrfs_key key;
14192         struct extent_buffer *leaf;
14193         struct btrfs_path path;
14194
14195         if (!roots_info_cache) {
14196                 roots_info_cache = malloc(sizeof(*roots_info_cache));
14197                 if (!roots_info_cache)
14198                         return -ENOMEM;
14199                 cache_tree_init(roots_info_cache);
14200         }
14201
14202         btrfs_init_path(&path);
14203         key.objectid = 0;
14204         key.type = BTRFS_EXTENT_ITEM_KEY;
14205         key.offset = 0;
14206         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
14207         if (ret < 0)
14208                 goto out;
14209         leaf = path.nodes[0];
14210
14211         while (1) {
14212                 struct btrfs_key found_key;
14213                 struct btrfs_extent_item *ei;
14214                 struct btrfs_extent_inline_ref *iref;
14215                 int slot = path.slots[0];
14216                 int type;
14217                 u64 flags;
14218                 u64 root_id;
14219                 u8 level;
14220                 struct cache_extent *entry;
14221                 struct root_item_info *rii;
14222
14223                 if (slot >= btrfs_header_nritems(leaf)) {
14224                         ret = btrfs_next_leaf(info->extent_root, &path);
14225                         if (ret < 0) {
14226                                 break;
14227                         } else if (ret) {
14228                                 ret = 0;
14229                                 break;
14230                         }
14231                         leaf = path.nodes[0];
14232                         slot = path.slots[0];
14233                 }
14234
14235                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14236
14237                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
14238                     found_key.type != BTRFS_METADATA_ITEM_KEY)
14239                         goto next;
14240
14241                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
14242                 flags = btrfs_extent_flags(leaf, ei);
14243
14244                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
14245                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
14246                         goto next;
14247
14248                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
14249                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
14250                         level = found_key.offset;
14251                 } else {
14252                         struct btrfs_tree_block_info *binfo;
14253
14254                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
14255                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
14256                         level = btrfs_tree_block_level(leaf, binfo);
14257                 }
14258
14259                 /*
14260                  * For a root extent, it must be of the following type and the
14261                  * first (and only one) iref in the item.
14262                  */
14263                 type = btrfs_extent_inline_ref_type(leaf, iref);
14264                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
14265                         goto next;
14266
14267                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
14268                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14269                 if (!entry) {
14270                         rii = malloc(sizeof(struct root_item_info));
14271                         if (!rii) {
14272                                 ret = -ENOMEM;
14273                                 goto out;
14274                         }
14275                         rii->cache_extent.start = root_id;
14276                         rii->cache_extent.size = 1;
14277                         rii->level = (u8)-1;
14278                         entry = &rii->cache_extent;
14279                         ret = insert_cache_extent(roots_info_cache, entry);
14280                         ASSERT(ret == 0);
14281                 } else {
14282                         rii = container_of(entry, struct root_item_info,
14283                                            cache_extent);
14284                 }
14285
14286                 ASSERT(rii->cache_extent.start == root_id);
14287                 ASSERT(rii->cache_extent.size == 1);
14288
14289                 if (level > rii->level || rii->level == (u8)-1) {
14290                         rii->level = level;
14291                         rii->bytenr = found_key.objectid;
14292                         rii->gen = btrfs_extent_generation(leaf, ei);
14293                         rii->node_count = 1;
14294                 } else if (level == rii->level) {
14295                         rii->node_count++;
14296                 }
14297 next:
14298                 path.slots[0]++;
14299         }
14300
14301 out:
14302         btrfs_release_path(&path);
14303
14304         return ret;
14305 }
14306
14307 static int maybe_repair_root_item(struct btrfs_path *path,
14308                                   const struct btrfs_key *root_key,
14309                                   const int read_only_mode)
14310 {
14311         const u64 root_id = root_key->objectid;
14312         struct cache_extent *entry;
14313         struct root_item_info *rii;
14314         struct btrfs_root_item ri;
14315         unsigned long offset;
14316
14317         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14318         if (!entry) {
14319                 fprintf(stderr,
14320                         "Error: could not find extent items for root %llu\n",
14321                         root_key->objectid);
14322                 return -ENOENT;
14323         }
14324
14325         rii = container_of(entry, struct root_item_info, cache_extent);
14326         ASSERT(rii->cache_extent.start == root_id);
14327         ASSERT(rii->cache_extent.size == 1);
14328
14329         if (rii->node_count != 1) {
14330                 fprintf(stderr,
14331                         "Error: could not find btree root extent for root %llu\n",
14332                         root_id);
14333                 return -ENOENT;
14334         }
14335
14336         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
14337         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
14338
14339         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
14340             btrfs_root_level(&ri) != rii->level ||
14341             btrfs_root_generation(&ri) != rii->gen) {
14342
14343                 /*
14344                  * If we're in repair mode but our caller told us to not update
14345                  * the root item, i.e. just check if it needs to be updated, don't
14346                  * print this message, since the caller will call us again shortly
14347                  * for the same root item without read only mode (the caller will
14348                  * open a transaction first).
14349                  */
14350                 if (!(read_only_mode && repair))
14351                         fprintf(stderr,
14352                                 "%sroot item for root %llu,"
14353                                 " current bytenr %llu, current gen %llu, current level %u,"
14354                                 " new bytenr %llu, new gen %llu, new level %u\n",
14355                                 (read_only_mode ? "" : "fixing "),
14356                                 root_id,
14357                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
14358                                 btrfs_root_level(&ri),
14359                                 rii->bytenr, rii->gen, rii->level);
14360
14361                 if (btrfs_root_generation(&ri) > rii->gen) {
14362                         fprintf(stderr,
14363                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
14364                                 root_id, btrfs_root_generation(&ri), rii->gen);
14365                         return -EINVAL;
14366                 }
14367
14368                 if (!read_only_mode) {
14369                         btrfs_set_root_bytenr(&ri, rii->bytenr);
14370                         btrfs_set_root_level(&ri, rii->level);
14371                         btrfs_set_root_generation(&ri, rii->gen);
14372                         write_extent_buffer(path->nodes[0], &ri,
14373                                             offset, sizeof(ri));
14374                 }
14375
14376                 return 1;
14377         }
14378
14379         return 0;
14380 }
14381
14382 /*
14383  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14384  * caused read-only snapshots to be corrupted if they were created at a moment
14385  * when the source subvolume/snapshot had orphan items. The issue was that the
14386  * on-disk root items became incorrect, referring to the pre orphan cleanup root
14387  * node instead of the post orphan cleanup root node.
14388  * So this function, and its callees, just detects and fixes those cases. Even
14389  * though the regression was for read-only snapshots, this function applies to
14390  * any snapshot/subvolume root.
14391  * This must be run before any other repair code - not doing it so, makes other
14392  * repair code delete or modify backrefs in the extent tree for example, which
14393  * will result in an inconsistent fs after repairing the root items.
14394  */
14395 static int repair_root_items(struct btrfs_fs_info *info)
14396 {
14397         struct btrfs_path path;
14398         struct btrfs_key key;
14399         struct extent_buffer *leaf;
14400         struct btrfs_trans_handle *trans = NULL;
14401         int ret = 0;
14402         int bad_roots = 0;
14403         int need_trans = 0;
14404
14405         btrfs_init_path(&path);
14406
14407         ret = build_roots_info_cache(info);
14408         if (ret)
14409                 goto out;
14410
14411         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14412         key.type = BTRFS_ROOT_ITEM_KEY;
14413         key.offset = 0;
14414
14415 again:
14416         /*
14417          * Avoid opening and committing transactions if a leaf doesn't have
14418          * any root items that need to be fixed, so that we avoid rotating
14419          * backup roots unnecessarily.
14420          */
14421         if (need_trans) {
14422                 trans = btrfs_start_transaction(info->tree_root, 1);
14423                 if (IS_ERR(trans)) {
14424                         ret = PTR_ERR(trans);
14425                         goto out;
14426                 }
14427         }
14428
14429         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14430                                 0, trans ? 1 : 0);
14431         if (ret < 0)
14432                 goto out;
14433         leaf = path.nodes[0];
14434
14435         while (1) {
14436                 struct btrfs_key found_key;
14437
14438                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14439                         int no_more_keys = find_next_key(&path, &key);
14440
14441                         btrfs_release_path(&path);
14442                         if (trans) {
14443                                 ret = btrfs_commit_transaction(trans,
14444                                                                info->tree_root);
14445                                 trans = NULL;
14446                                 if (ret < 0)
14447                                         goto out;
14448                         }
14449                         need_trans = 0;
14450                         if (no_more_keys)
14451                                 break;
14452                         goto again;
14453                 }
14454
14455                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14456
14457                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14458                         goto next;
14459                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14460                         goto next;
14461
14462                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14463                 if (ret < 0)
14464                         goto out;
14465                 if (ret) {
14466                         if (!trans && repair) {
14467                                 need_trans = 1;
14468                                 key = found_key;
14469                                 btrfs_release_path(&path);
14470                                 goto again;
14471                         }
14472                         bad_roots++;
14473                 }
14474 next:
14475                 path.slots[0]++;
14476         }
14477         ret = 0;
14478 out:
14479         free_roots_info_cache();
14480         btrfs_release_path(&path);
14481         if (trans)
14482                 btrfs_commit_transaction(trans, info->tree_root);
14483         if (ret < 0)
14484                 return ret;
14485
14486         return bad_roots;
14487 }
14488
14489 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14490 {
14491         struct btrfs_trans_handle *trans;
14492         struct btrfs_block_group_cache *bg_cache;
14493         u64 current = 0;
14494         int ret = 0;
14495
14496         /* Clear all free space cache inodes and its extent data */
14497         while (1) {
14498                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14499                 if (!bg_cache)
14500                         break;
14501                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14502                 if (ret < 0)
14503                         return ret;
14504                 current = bg_cache->key.objectid + bg_cache->key.offset;
14505         }
14506
14507         /* Don't forget to set cache_generation to -1 */
14508         trans = btrfs_start_transaction(fs_info->tree_root, 0);
14509         if (IS_ERR(trans)) {
14510                 error("failed to update super block cache generation");
14511                 return PTR_ERR(trans);
14512         }
14513         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14514         btrfs_commit_transaction(trans, fs_info->tree_root);
14515
14516         return ret;
14517 }
14518
14519 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14520                 int clear_version)
14521 {
14522         int ret = 0;
14523
14524         if (clear_version == 1) {
14525                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14526                         error(
14527                 "free space cache v2 detected, use --clear-space-cache v2");
14528                         ret = 1;
14529                         goto close_out;
14530                 }
14531                 printf("Clearing free space cache\n");
14532                 ret = clear_free_space_cache(fs_info);
14533                 if (ret) {
14534                         error("failed to clear free space cache");
14535                         ret = 1;
14536                 } else {
14537                         printf("Free space cache cleared\n");
14538                 }
14539         } else if (clear_version == 2) {
14540                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14541                         printf("no free space cache v2 to clear\n");
14542                         ret = 0;
14543                         goto close_out;
14544                 }
14545                 printf("Clear free space cache v2\n");
14546                 ret = btrfs_clear_free_space_tree(fs_info);
14547                 if (ret) {
14548                         error("failed to clear free space cache v2: %d", ret);
14549                         ret = 1;
14550                 } else {
14551                         printf("free space cache v2 cleared\n");
14552                 }
14553         }
14554 close_out:
14555         return ret;
14556 }
14557
14558 const char * const cmd_check_usage[] = {
14559         "btrfs check [options] <device>",
14560         "Check structural integrity of a filesystem (unmounted).",
14561         "Check structural integrity of an unmounted filesystem. Verify internal",
14562         "trees' consistency and item connectivity. In the repair mode try to",
14563         "fix the problems found. ",
14564         "WARNING: the repair mode is considered dangerous",
14565         "",
14566         "-s|--super <superblock>     use this superblock copy",
14567         "-b|--backup                 use the first valid backup root copy",
14568         "--force                     skip mount checks, repair is not possible",
14569         "--repair                    try to repair the filesystem",
14570         "--readonly                  run in read-only mode (default)",
14571         "--init-csum-tree            create a new CRC tree",
14572         "--init-extent-tree          create a new extent tree",
14573         "--mode <MODE>               allows choice of memory/IO trade-offs",
14574         "                            where MODE is one of:",
14575         "                            original - read inodes and extents to memory (requires",
14576         "                                       more memory, does less IO)",
14577         "                            lowmem   - try to use less memory but read blocks again",
14578         "                                       when needed",
14579         "--check-data-csum           verify checksums of data blocks",
14580         "-Q|--qgroup-report          print a report on qgroup consistency",
14581         "-E|--subvol-extents <subvolid>",
14582         "                            print subvolume extents and sharing state",
14583         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
14584         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
14585         "-p|--progress               indicate progress",
14586         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
14587         NULL
14588 };
14589
14590 int cmd_check(int argc, char **argv)
14591 {
14592         struct cache_tree root_cache;
14593         struct btrfs_root *root;
14594         struct btrfs_fs_info *info;
14595         u64 bytenr = 0;
14596         u64 subvolid = 0;
14597         u64 tree_root_bytenr = 0;
14598         u64 chunk_root_bytenr = 0;
14599         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14600         int ret = 0;
14601         int err = 0;
14602         u64 num;
14603         int init_csum_tree = 0;
14604         int readonly = 0;
14605         int clear_space_cache = 0;
14606         int qgroup_report = 0;
14607         int qgroups_repaired = 0;
14608         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14609         int force = 0;
14610
14611         while(1) {
14612                 int c;
14613                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14614                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14615                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14616                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14617                         GETOPT_VAL_FORCE };
14618                 static const struct option long_options[] = {
14619                         { "super", required_argument, NULL, 's' },
14620                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14621                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14622                         { "init-csum-tree", no_argument, NULL,
14623                                 GETOPT_VAL_INIT_CSUM },
14624                         { "init-extent-tree", no_argument, NULL,
14625                                 GETOPT_VAL_INIT_EXTENT },
14626                         { "check-data-csum", no_argument, NULL,
14627                                 GETOPT_VAL_CHECK_CSUM },
14628                         { "backup", no_argument, NULL, 'b' },
14629                         { "subvol-extents", required_argument, NULL, 'E' },
14630                         { "qgroup-report", no_argument, NULL, 'Q' },
14631                         { "tree-root", required_argument, NULL, 'r' },
14632                         { "chunk-root", required_argument, NULL,
14633                                 GETOPT_VAL_CHUNK_TREE },
14634                         { "progress", no_argument, NULL, 'p' },
14635                         { "mode", required_argument, NULL,
14636                                 GETOPT_VAL_MODE },
14637                         { "clear-space-cache", required_argument, NULL,
14638                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
14639                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14640                         { NULL, 0, NULL, 0}
14641                 };
14642
14643                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14644                 if (c < 0)
14645                         break;
14646                 switch(c) {
14647                         case 'a': /* ignored */ break;
14648                         case 'b':
14649                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14650                                 break;
14651                         case 's':
14652                                 num = arg_strtou64(optarg);
14653                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14654                                         error(
14655                                         "super mirror should be less than %d",
14656                                                 BTRFS_SUPER_MIRROR_MAX);
14657                                         exit(1);
14658                                 }
14659                                 bytenr = btrfs_sb_offset(((int)num));
14660                                 printf("using SB copy %llu, bytenr %llu\n", num,
14661                                        (unsigned long long)bytenr);
14662                                 break;
14663                         case 'Q':
14664                                 qgroup_report = 1;
14665                                 break;
14666                         case 'E':
14667                                 subvolid = arg_strtou64(optarg);
14668                                 break;
14669                         case 'r':
14670                                 tree_root_bytenr = arg_strtou64(optarg);
14671                                 break;
14672                         case GETOPT_VAL_CHUNK_TREE:
14673                                 chunk_root_bytenr = arg_strtou64(optarg);
14674                                 break;
14675                         case 'p':
14676                                 ctx.progress_enabled = true;
14677                                 break;
14678                         case '?':
14679                         case 'h':
14680                                 usage(cmd_check_usage);
14681                         case GETOPT_VAL_REPAIR:
14682                                 printf("enabling repair mode\n");
14683                                 repair = 1;
14684                                 ctree_flags |= OPEN_CTREE_WRITES;
14685                                 break;
14686                         case GETOPT_VAL_READONLY:
14687                                 readonly = 1;
14688                                 break;
14689                         case GETOPT_VAL_INIT_CSUM:
14690                                 printf("Creating a new CRC tree\n");
14691                                 init_csum_tree = 1;
14692                                 repair = 1;
14693                                 ctree_flags |= OPEN_CTREE_WRITES;
14694                                 break;
14695                         case GETOPT_VAL_INIT_EXTENT:
14696                                 init_extent_tree = 1;
14697                                 ctree_flags |= (OPEN_CTREE_WRITES |
14698                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
14699                                 repair = 1;
14700                                 break;
14701                         case GETOPT_VAL_CHECK_CSUM:
14702                                 check_data_csum = 1;
14703                                 break;
14704                         case GETOPT_VAL_MODE:
14705                                 check_mode = parse_check_mode(optarg);
14706                                 if (check_mode == CHECK_MODE_UNKNOWN) {
14707                                         error("unknown mode: %s", optarg);
14708                                         exit(1);
14709                                 }
14710                                 break;
14711                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
14712                                 if (strcmp(optarg, "v1") == 0) {
14713                                         clear_space_cache = 1;
14714                                 } else if (strcmp(optarg, "v2") == 0) {
14715                                         clear_space_cache = 2;
14716                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14717                                 } else {
14718                                         error(
14719                 "invalid argument to --clear-space-cache, must be v1 or v2");
14720                                         exit(1);
14721                                 }
14722                                 ctree_flags |= OPEN_CTREE_WRITES;
14723                                 break;
14724                         case GETOPT_VAL_FORCE:
14725                                 force = 1;
14726                                 break;
14727                 }
14728         }
14729
14730         if (check_argc_exact(argc - optind, 1))
14731                 usage(cmd_check_usage);
14732
14733         if (ctx.progress_enabled) {
14734                 ctx.tp = TASK_NOTHING;
14735                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14736         }
14737
14738         /* This check is the only reason for --readonly to exist */
14739         if (readonly && repair) {
14740                 error("repair options are not compatible with --readonly");
14741                 exit(1);
14742         }
14743
14744         /*
14745          * experimental and dangerous
14746          */
14747         if (repair && check_mode == CHECK_MODE_LOWMEM)
14748                 warning("low-memory mode repair support is only partial");
14749
14750         radix_tree_init();
14751         cache_tree_init(&root_cache);
14752
14753         ret = check_mounted(argv[optind]);
14754         if (!force) {
14755                 if (ret < 0) {
14756                         error("could not check mount status: %s",
14757                                         strerror(-ret));
14758                         err |= !!ret;
14759                         goto err_out;
14760                 } else if (ret) {
14761                         error(
14762 "%s is currently mounted, use --force if you really intend to check the filesystem",
14763                                 argv[optind]);
14764                         ret = -EBUSY;
14765                         err |= !!ret;
14766                         goto err_out;
14767                 }
14768         } else {
14769                 if (repair) {
14770                         error("repair and --force is not yet supported");
14771                         ret = 1;
14772                         err |= !!ret;
14773                         goto err_out;
14774                 }
14775                 if (ret < 0) {
14776                         warning(
14777 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14778                                 argv[optind]);
14779                 } else if (ret) {
14780                         warning(
14781                         "filesystem mounted, continuing because of --force");
14782                 }
14783                 /* A block device is mounted in exclusive mode by kernel */
14784                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14785         }
14786
14787         /* only allow partial opening under repair mode */
14788         if (repair)
14789                 ctree_flags |= OPEN_CTREE_PARTIAL;
14790
14791         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14792                                   chunk_root_bytenr, ctree_flags);
14793         if (!info) {
14794                 error("cannot open file system");
14795                 ret = -EIO;
14796                 err |= !!ret;
14797                 goto err_out;
14798         }
14799
14800         global_info = info;
14801         root = info->fs_root;
14802         uuid_unparse(info->super_copy->fsid, uuidbuf);
14803
14804         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14805
14806         /*
14807          * Check the bare minimum before starting anything else that could rely
14808          * on it, namely the tree roots, any local consistency checks
14809          */
14810         if (!extent_buffer_uptodate(info->tree_root->node) ||
14811             !extent_buffer_uptodate(info->dev_root->node) ||
14812             !extent_buffer_uptodate(info->chunk_root->node)) {
14813                 error("critical roots corrupted, unable to check the filesystem");
14814                 err |= !!ret;
14815                 ret = -EIO;
14816                 goto close_out;
14817         }
14818
14819         if (clear_space_cache) {
14820                 ret = do_clear_free_space_cache(info, clear_space_cache);
14821                 err |= !!ret;
14822                 goto close_out;
14823         }
14824
14825         /*
14826          * repair mode will force us to commit transaction which
14827          * will make us fail to load log tree when mounting.
14828          */
14829         if (repair && btrfs_super_log_root(info->super_copy)) {
14830                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14831                 if (!ret) {
14832                         ret = 1;
14833                         err |= !!ret;
14834                         goto close_out;
14835                 }
14836                 ret = zero_log_tree(root);
14837                 err |= !!ret;
14838                 if (ret) {
14839                         error("failed to zero log tree: %d", ret);
14840                         goto close_out;
14841                 }
14842         }
14843
14844         if (qgroup_report) {
14845                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14846                        uuidbuf);
14847                 ret = qgroup_verify_all(info);
14848                 err |= !!ret;
14849                 if (ret == 0)
14850                         report_qgroups(1);
14851                 goto close_out;
14852         }
14853         if (subvolid) {
14854                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14855                        subvolid, argv[optind], uuidbuf);
14856                 ret = print_extent_state(info, subvolid);
14857                 err |= !!ret;
14858                 goto close_out;
14859         }
14860
14861         if (init_extent_tree || init_csum_tree) {
14862                 struct btrfs_trans_handle *trans;
14863
14864                 trans = btrfs_start_transaction(info->extent_root, 0);
14865                 if (IS_ERR(trans)) {
14866                         error("error starting transaction");
14867                         ret = PTR_ERR(trans);
14868                         err |= !!ret;
14869                         goto close_out;
14870                 }
14871
14872                 if (init_extent_tree) {
14873                         printf("Creating a new extent tree\n");
14874                         ret = reinit_extent_tree(trans, info);
14875                         err |= !!ret;
14876                         if (ret)
14877                                 goto close_out;
14878                 }
14879
14880                 if (init_csum_tree) {
14881                         printf("Reinitialize checksum tree\n");
14882                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14883                         if (ret) {
14884                                 error("checksum tree initialization failed: %d",
14885                                                 ret);
14886                                 ret = -EIO;
14887                                 err |= !!ret;
14888                                 goto close_out;
14889                         }
14890
14891                         ret = fill_csum_tree(trans, info->csum_root,
14892                                              init_extent_tree);
14893                         err |= !!ret;
14894                         if (ret) {
14895                                 error("checksum tree refilling failed: %d", ret);
14896                                 return -EIO;
14897                         }
14898                 }
14899                 /*
14900                  * Ok now we commit and run the normal fsck, which will add
14901                  * extent entries for all of the items it finds.
14902                  */
14903                 ret = btrfs_commit_transaction(trans, info->extent_root);
14904                 err |= !!ret;
14905                 if (ret)
14906                         goto close_out;
14907         }
14908         if (!extent_buffer_uptodate(info->extent_root->node)) {
14909                 error("critical: extent_root, unable to check the filesystem");
14910                 ret = -EIO;
14911                 err |= !!ret;
14912                 goto close_out;
14913         }
14914         if (!extent_buffer_uptodate(info->csum_root->node)) {
14915                 error("critical: csum_root, unable to check the filesystem");
14916                 ret = -EIO;
14917                 err |= !!ret;
14918                 goto close_out;
14919         }
14920
14921         if (!init_extent_tree) {
14922                 ret = repair_root_items(info);
14923                 if (ret < 0) {
14924                         err = !!ret;
14925                         error("failed to repair root items: %s", strerror(-ret));
14926                         goto close_out;
14927                 }
14928                 if (repair) {
14929                         fprintf(stderr, "Fixed %d roots.\n", ret);
14930                         ret = 0;
14931                 } else if (ret > 0) {
14932                         fprintf(stderr,
14933                                 "Found %d roots with an outdated root item.\n",
14934                                 ret);
14935                         fprintf(stderr,
14936         "Please run a filesystem check with the option --repair to fix them.\n");
14937                         ret = 1;
14938                         err |= ret;
14939                         goto close_out;
14940                 }
14941         }
14942
14943         ret = do_check_chunks_and_extents(info);
14944         err |= !!ret;
14945         if (ret)
14946                 error(
14947                 "errors found in extent allocation tree or chunk allocation");
14948
14949         /* Only re-check super size after we checked and repaired the fs */
14950         err |= !is_super_size_valid(info);
14951
14952         if (!ctx.progress_enabled) {
14953                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14954                         fprintf(stderr, "checking free space tree\n");
14955                 else
14956                         fprintf(stderr, "checking free space cache\n");
14957         }
14958         ret = check_space_cache(root);
14959         err |= !!ret;
14960         if (ret) {
14961                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14962                         error("errors found in free space tree");
14963                 else
14964                         error("errors found in free space cache");
14965                 goto out;
14966         }
14967
14968         /*
14969          * We used to have to have these hole extents in between our real
14970          * extents so if we don't have this flag set we need to make sure there
14971          * are no gaps in the file extents for inodes, otherwise we can just
14972          * ignore it when this happens.
14973          */
14974         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14975         ret = do_check_fs_roots(info, &root_cache);
14976         err |= !!ret;
14977         if (ret) {
14978                 error("errors found in fs roots");
14979                 goto out;
14980         }
14981
14982         fprintf(stderr, "checking csums\n");
14983         ret = check_csums(root);
14984         err |= !!ret;
14985         if (ret) {
14986                 error("errors found in csum tree");
14987                 goto out;
14988         }
14989
14990         fprintf(stderr, "checking root refs\n");
14991         /* For low memory mode, check_fs_roots_v2 handles root refs */
14992         if (check_mode != CHECK_MODE_LOWMEM) {
14993                 ret = check_root_refs(root, &root_cache);
14994                 err |= !!ret;
14995                 if (ret) {
14996                         error("errors found in root refs");
14997                         goto out;
14998                 }
14999         }
15000
15001         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
15002                 struct extent_buffer *eb;
15003
15004                 eb = list_first_entry(&root->fs_info->recow_ebs,
15005                                       struct extent_buffer, recow);
15006                 list_del_init(&eb->recow);
15007                 ret = recow_extent_buffer(root, eb);
15008                 err |= !!ret;
15009                 if (ret) {
15010                         error("fails to fix transid errors");
15011                         break;
15012                 }
15013         }
15014
15015         while (!list_empty(&delete_items)) {
15016                 struct bad_item *bad;
15017
15018                 bad = list_first_entry(&delete_items, struct bad_item, list);
15019                 list_del_init(&bad->list);
15020                 if (repair) {
15021                         ret = delete_bad_item(root, bad);
15022                         err |= !!ret;
15023                 }
15024                 free(bad);
15025         }
15026
15027         if (info->quota_enabled) {
15028                 fprintf(stderr, "checking quota groups\n");
15029                 ret = qgroup_verify_all(info);
15030                 err |= !!ret;
15031                 if (ret) {
15032                         error("failed to check quota groups");
15033                         goto out;
15034                 }
15035                 report_qgroups(0);
15036                 ret = repair_qgroups(info, &qgroups_repaired);
15037                 err |= !!ret;
15038                 if (err) {
15039                         error("failed to repair quota groups");
15040                         goto out;
15041                 }
15042                 ret = 0;
15043         }
15044
15045         if (!list_empty(&root->fs_info->recow_ebs)) {
15046                 error("transid errors in file system");
15047                 ret = 1;
15048                 err |= !!ret;
15049         }
15050 out:
15051         printf("found %llu bytes used, ",
15052                (unsigned long long)bytes_used);
15053         if (err)
15054                 printf("error(s) found\n");
15055         else
15056                 printf("no error found\n");
15057         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
15058         printf("total tree bytes: %llu\n",
15059                (unsigned long long)total_btree_bytes);
15060         printf("total fs tree bytes: %llu\n",
15061                (unsigned long long)total_fs_tree_bytes);
15062         printf("total extent tree bytes: %llu\n",
15063                (unsigned long long)total_extent_tree_bytes);
15064         printf("btree space waste bytes: %llu\n",
15065                (unsigned long long)btree_space_waste);
15066         printf("file data blocks allocated: %llu\n referenced %llu\n",
15067                 (unsigned long long)data_bytes_allocated,
15068                 (unsigned long long)data_bytes_referenced);
15069
15070         free_qgroup_counts();
15071         free_root_recs_tree(&root_cache);
15072 close_out:
15073         close_ctree(root);
15074 err_out:
15075         if (ctx.progress_enabled)
15076                 task_deinit(ctx.info);
15077
15078         return err;
15079 }