btrfs-progs: image: move metadump definitions to own header
[platform/upstream/btrfs-progs.git] / cmds-check.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46
47 enum task_position {
48         TASK_EXTENTS,
49         TASK_FREE_SPACE,
50         TASK_FS_ROOTS,
51         TASK_NOTHING, /* have to be the last element */
52 };
53
54 struct task_ctx {
55         int progress_enabled;
56         enum task_position tp;
57
58         struct task_info *info;
59 };
60
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
77
78 enum btrfs_check_mode {
79         CHECK_MODE_ORIGINAL,
80         CHECK_MODE_LOWMEM,
81         CHECK_MODE_UNKNOWN,
82         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
83 };
84
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86
87 struct extent_backref {
88         struct rb_node node;
89         unsigned int is_data:1;
90         unsigned int found_extent_tree:1;
91         unsigned int full_backref:1;
92         unsigned int found_ref:1;
93         unsigned int broken:1;
94 };
95
96 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 {
98         return rb_entry(node, struct extent_backref, node);
99 }
100
101 struct data_backref {
102         struct extent_backref node;
103         union {
104                 u64 parent;
105                 u64 root;
106         };
107         u64 owner;
108         u64 offset;
109         u64 disk_bytenr;
110         u64 bytes;
111         u64 ram_bytes;
112         u32 num_refs;
113         u32 found_ref;
114 };
115
116 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
117 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
118 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
119 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
120 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
121 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
122 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
123 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
124 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
125 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
126 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
127 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
128 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
129 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
130 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
131 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
132 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
133 #define DIR_INDEX_MISSING       (1<<18) /* INODE_INDEX not found */
134 #define DIR_INDEX_MISMATCH      (1<<19) /* INODE_INDEX found but not match */
135 #define DIR_COUNT_AGAIN         (1<<20) /* DIR isize should be recalculated */
136 #define BG_ACCOUNTING_ERROR     (1<<21) /* Block group accounting error */
137
138 static inline struct data_backref* to_data_backref(struct extent_backref *back)
139 {
140         return container_of(back, struct data_backref, node);
141 }
142
143 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
144 {
145         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
146         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
147         struct data_backref *back1 = to_data_backref(ext1);
148         struct data_backref *back2 = to_data_backref(ext2);
149
150         WARN_ON(!ext1->is_data);
151         WARN_ON(!ext2->is_data);
152
153         /* parent and root are a union, so this covers both */
154         if (back1->parent > back2->parent)
155                 return 1;
156         if (back1->parent < back2->parent)
157                 return -1;
158
159         /* This is a full backref and the parents match. */
160         if (back1->node.full_backref)
161                 return 0;
162
163         if (back1->owner > back2->owner)
164                 return 1;
165         if (back1->owner < back2->owner)
166                 return -1;
167
168         if (back1->offset > back2->offset)
169                 return 1;
170         if (back1->offset < back2->offset)
171                 return -1;
172
173         if (back1->found_ref && back2->found_ref) {
174                 if (back1->disk_bytenr > back2->disk_bytenr)
175                         return 1;
176                 if (back1->disk_bytenr < back2->disk_bytenr)
177                         return -1;
178
179                 if (back1->bytes > back2->bytes)
180                         return 1;
181                 if (back1->bytes < back2->bytes)
182                         return -1;
183         }
184
185         return 0;
186 }
187
188 /*
189  * Much like data_backref, just removed the undetermined members
190  * and change it to use list_head.
191  * During extent scan, it is stored in root->orphan_data_extent.
192  * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
193  */
194 struct orphan_data_extent {
195         struct list_head list;
196         u64 root;
197         u64 objectid;
198         u64 offset;
199         u64 disk_bytenr;
200         u64 disk_len;
201 };
202
203 struct tree_backref {
204         struct extent_backref node;
205         union {
206                 u64 parent;
207                 u64 root;
208         };
209 };
210
211 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
212 {
213         return container_of(back, struct tree_backref, node);
214 }
215
216 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
217 {
218         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
219         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
220         struct tree_backref *back1 = to_tree_backref(ext1);
221         struct tree_backref *back2 = to_tree_backref(ext2);
222
223         WARN_ON(ext1->is_data);
224         WARN_ON(ext2->is_data);
225
226         /* parent and root are a union, so this covers both */
227         if (back1->parent > back2->parent)
228                 return 1;
229         if (back1->parent < back2->parent)
230                 return -1;
231
232         return 0;
233 }
234
235 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
236 {
237         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
238         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
239
240         if (ext1->is_data > ext2->is_data)
241                 return 1;
242
243         if (ext1->is_data < ext2->is_data)
244                 return -1;
245
246         if (ext1->full_backref > ext2->full_backref)
247                 return 1;
248         if (ext1->full_backref < ext2->full_backref)
249                 return -1;
250
251         if (ext1->is_data)
252                 return compare_data_backref(node1, node2);
253         else
254                 return compare_tree_backref(node1, node2);
255 }
256
257 /* Explicit initialization for extent_record::flag_block_full_backref */
258 enum { FLAG_UNSET = 2 };
259
260 struct extent_record {
261         struct list_head backrefs;
262         struct list_head dups;
263         struct rb_root backref_tree;
264         struct list_head list;
265         struct cache_extent cache;
266         struct btrfs_disk_key parent_key;
267         u64 start;
268         u64 max_size;
269         u64 nr;
270         u64 refs;
271         u64 extent_item_refs;
272         u64 generation;
273         u64 parent_generation;
274         u64 info_objectid;
275         u32 num_duplicates;
276         u8 info_level;
277         unsigned int flag_block_full_backref:2;
278         unsigned int found_rec:1;
279         unsigned int content_checked:1;
280         unsigned int owner_ref_checked:1;
281         unsigned int is_root:1;
282         unsigned int metadata:1;
283         unsigned int bad_full_backref:1;
284         unsigned int crossing_stripes:1;
285         unsigned int wrong_chunk_type:1;
286 };
287
288 static inline struct extent_record* to_extent_record(struct list_head *entry)
289 {
290         return container_of(entry, struct extent_record, list);
291 }
292
293 struct inode_backref {
294         struct list_head list;
295         unsigned int found_dir_item:1;
296         unsigned int found_dir_index:1;
297         unsigned int found_inode_ref:1;
298         u8 filetype;
299         u8 ref_type;
300         int errors;
301         u64 dir;
302         u64 index;
303         u16 namelen;
304         char name[0];
305 };
306
307 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
308 {
309         return list_entry(entry, struct inode_backref, list);
310 }
311
312 struct root_item_record {
313         struct list_head list;
314         u64 objectid;
315         u64 bytenr;
316         u64 last_snapshot;
317         u8 level;
318         u8 drop_level;
319         struct btrfs_key drop_key;
320 };
321
322 #define REF_ERR_NO_DIR_ITEM             (1 << 0)
323 #define REF_ERR_NO_DIR_INDEX            (1 << 1)
324 #define REF_ERR_NO_INODE_REF            (1 << 2)
325 #define REF_ERR_DUP_DIR_ITEM            (1 << 3)
326 #define REF_ERR_DUP_DIR_INDEX           (1 << 4)
327 #define REF_ERR_DUP_INODE_REF           (1 << 5)
328 #define REF_ERR_INDEX_UNMATCH           (1 << 6)
329 #define REF_ERR_FILETYPE_UNMATCH        (1 << 7)
330 #define REF_ERR_NAME_TOO_LONG           (1 << 8) // 100
331 #define REF_ERR_NO_ROOT_REF             (1 << 9)
332 #define REF_ERR_NO_ROOT_BACKREF         (1 << 10)
333 #define REF_ERR_DUP_ROOT_REF            (1 << 11)
334 #define REF_ERR_DUP_ROOT_BACKREF        (1 << 12)
335
336 struct file_extent_hole {
337         struct rb_node node;
338         u64 start;
339         u64 len;
340 };
341
342 struct inode_record {
343         struct list_head backrefs;
344         unsigned int checked:1;
345         unsigned int merging:1;
346         unsigned int found_inode_item:1;
347         unsigned int found_dir_item:1;
348         unsigned int found_file_extent:1;
349         unsigned int found_csum_item:1;
350         unsigned int some_csum_missing:1;
351         unsigned int nodatasum:1;
352         int errors;
353
354         u64 ino;
355         u32 nlink;
356         u32 imode;
357         u64 isize;
358         u64 nbytes;
359
360         u32 found_link;
361         u64 found_size;
362         u64 extent_start;
363         u64 extent_end;
364         struct rb_root holes;
365         struct list_head orphan_extents;
366
367         u32 refs;
368 };
369
370 #define I_ERR_NO_INODE_ITEM             (1 << 0)
371 #define I_ERR_NO_ORPHAN_ITEM            (1 << 1)
372 #define I_ERR_DUP_INODE_ITEM            (1 << 2)
373 #define I_ERR_DUP_DIR_INDEX             (1 << 3)
374 #define I_ERR_ODD_DIR_ITEM              (1 << 4)
375 #define I_ERR_ODD_FILE_EXTENT           (1 << 5)
376 #define I_ERR_BAD_FILE_EXTENT           (1 << 6)
377 #define I_ERR_FILE_EXTENT_OVERLAP       (1 << 7)
378 #define I_ERR_FILE_EXTENT_DISCOUNT      (1 << 8) // 100
379 #define I_ERR_DIR_ISIZE_WRONG           (1 << 9)
380 #define I_ERR_FILE_NBYTES_WRONG         (1 << 10) // 400
381 #define I_ERR_ODD_CSUM_ITEM             (1 << 11)
382 #define I_ERR_SOME_CSUM_MISSING         (1 << 12)
383 #define I_ERR_LINK_COUNT_WRONG          (1 << 13)
384 #define I_ERR_FILE_EXTENT_ORPHAN        (1 << 14)
385
386 struct root_backref {
387         struct list_head list;
388         unsigned int found_dir_item:1;
389         unsigned int found_dir_index:1;
390         unsigned int found_back_ref:1;
391         unsigned int found_forward_ref:1;
392         unsigned int reachable:1;
393         int errors;
394         u64 ref_root;
395         u64 dir;
396         u64 index;
397         u16 namelen;
398         char name[0];
399 };
400
401 static inline struct root_backref* to_root_backref(struct list_head *entry)
402 {
403         return list_entry(entry, struct root_backref, list);
404 }
405
406 struct root_record {
407         struct list_head backrefs;
408         struct cache_extent cache;
409         unsigned int found_root_item:1;
410         u64 objectid;
411         u32 found_ref;
412 };
413
414 struct ptr_node {
415         struct cache_extent cache;
416         void *data;
417 };
418
419 struct shared_node {
420         struct cache_extent cache;
421         struct cache_tree root_cache;
422         struct cache_tree inode_cache;
423         struct inode_record *current;
424         u32 refs;
425 };
426
427 struct block_info {
428         u64 start;
429         u32 size;
430 };
431
432 struct walk_control {
433         struct cache_tree shared;
434         struct shared_node *nodes[BTRFS_MAX_LEVEL];
435         int active_node;
436         int root_level;
437 };
438
439 struct bad_item {
440         struct btrfs_key key;
441         u64 root_id;
442         struct list_head list;
443 };
444
445 struct extent_entry {
446         u64 bytenr;
447         u64 bytes;
448         int count;
449         int broken;
450         struct list_head list;
451 };
452
453 struct root_item_info {
454         /* level of the root */
455         u8 level;
456         /* number of nodes at this level, must be 1 for a root */
457         int node_count;
458         u64 bytenr;
459         u64 gen;
460         struct cache_extent cache_extent;
461 };
462
463 /*
464  * Error bit for low memory mode check.
465  *
466  * Currently no caller cares about it yet.  Just internal use for error
467  * classification.
468  */
469 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
470 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
471 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
472 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
473 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
474 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
475 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
476 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
477 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
478 #define CHUNK_TYPE_MISMATCH     (1 << 8)
479
480 static void *print_status_check(void *p)
481 {
482         struct task_ctx *priv = p;
483         const char work_indicator[] = { '.', 'o', 'O', 'o' };
484         uint32_t count = 0;
485         static char *task_position_string[] = {
486                 "checking extents",
487                 "checking free space cache",
488                 "checking fs roots",
489         };
490
491         task_period_start(priv->info, 1000 /* 1s */);
492
493         if (priv->tp == TASK_NOTHING)
494                 return NULL;
495
496         while (1) {
497                 printf("%s [%c]\r", task_position_string[priv->tp],
498                                 work_indicator[count % 4]);
499                 count++;
500                 fflush(stdout);
501                 task_period_wait(priv->info);
502         }
503         return NULL;
504 }
505
506 static int print_status_return(void *p)
507 {
508         printf("\n");
509         fflush(stdout);
510
511         return 0;
512 }
513
514 static enum btrfs_check_mode parse_check_mode(const char *str)
515 {
516         if (strcmp(str, "lowmem") == 0)
517                 return CHECK_MODE_LOWMEM;
518         if (strcmp(str, "orig") == 0)
519                 return CHECK_MODE_ORIGINAL;
520         if (strcmp(str, "original") == 0)
521                 return CHECK_MODE_ORIGINAL;
522
523         return CHECK_MODE_UNKNOWN;
524 }
525
526 /* Compatible function to allow reuse of old codes */
527 static u64 first_extent_gap(struct rb_root *holes)
528 {
529         struct file_extent_hole *hole;
530
531         if (RB_EMPTY_ROOT(holes))
532                 return (u64)-1;
533
534         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
535         return hole->start;
536 }
537
538 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
539 {
540         struct file_extent_hole *hole1;
541         struct file_extent_hole *hole2;
542
543         hole1 = rb_entry(node1, struct file_extent_hole, node);
544         hole2 = rb_entry(node2, struct file_extent_hole, node);
545
546         if (hole1->start > hole2->start)
547                 return -1;
548         if (hole1->start < hole2->start)
549                 return 1;
550         /* Now hole1->start == hole2->start */
551         if (hole1->len >= hole2->len)
552                 /*
553                  * Hole 1 will be merge center
554                  * Same hole will be merged later
555                  */
556                 return -1;
557         /* Hole 2 will be merge center */
558         return 1;
559 }
560
561 /*
562  * Add a hole to the record
563  *
564  * This will do hole merge for copy_file_extent_holes(),
565  * which will ensure there won't be continuous holes.
566  */
567 static int add_file_extent_hole(struct rb_root *holes,
568                                 u64 start, u64 len)
569 {
570         struct file_extent_hole *hole;
571         struct file_extent_hole *prev = NULL;
572         struct file_extent_hole *next = NULL;
573
574         hole = malloc(sizeof(*hole));
575         if (!hole)
576                 return -ENOMEM;
577         hole->start = start;
578         hole->len = len;
579         /* Since compare will not return 0, no -EEXIST will happen */
580         rb_insert(holes, &hole->node, compare_hole);
581
582         /* simple merge with previous hole */
583         if (rb_prev(&hole->node))
584                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
585                                 node);
586         if (prev && prev->start + prev->len >= hole->start) {
587                 hole->len = hole->start + hole->len - prev->start;
588                 hole->start = prev->start;
589                 rb_erase(&prev->node, holes);
590                 free(prev);
591                 prev = NULL;
592         }
593
594         /* iterate merge with next holes */
595         while (1) {
596                 if (!rb_next(&hole->node))
597                         break;
598                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
599                                         node);
600                 if (hole->start + hole->len >= next->start) {
601                         if (hole->start + hole->len <= next->start + next->len)
602                                 hole->len = next->start + next->len -
603                                             hole->start;
604                         rb_erase(&next->node, holes);
605                         free(next);
606                         next = NULL;
607                 } else
608                         break;
609         }
610         return 0;
611 }
612
613 static int compare_hole_range(struct rb_node *node, void *data)
614 {
615         struct file_extent_hole *hole;
616         u64 start;
617
618         hole = (struct file_extent_hole *)data;
619         start = hole->start;
620
621         hole = rb_entry(node, struct file_extent_hole, node);
622         if (start < hole->start)
623                 return -1;
624         if (start >= hole->start && start < hole->start + hole->len)
625                 return 0;
626         return 1;
627 }
628
629 /*
630  * Delete a hole in the record
631  *
632  * This will do the hole split and is much restrict than add.
633  */
634 static int del_file_extent_hole(struct rb_root *holes,
635                                 u64 start, u64 len)
636 {
637         struct file_extent_hole *hole;
638         struct file_extent_hole tmp;
639         u64 prev_start = 0;
640         u64 prev_len = 0;
641         u64 next_start = 0;
642         u64 next_len = 0;
643         struct rb_node *node;
644         int have_prev = 0;
645         int have_next = 0;
646         int ret = 0;
647
648         tmp.start = start;
649         tmp.len = len;
650         node = rb_search(holes, &tmp, compare_hole_range, NULL);
651         if (!node)
652                 return -EEXIST;
653         hole = rb_entry(node, struct file_extent_hole, node);
654         if (start + len > hole->start + hole->len)
655                 return -EEXIST;
656
657         /*
658          * Now there will be no overlap, delete the hole and re-add the
659          * split(s) if they exists.
660          */
661         if (start > hole->start) {
662                 prev_start = hole->start;
663                 prev_len = start - hole->start;
664                 have_prev = 1;
665         }
666         if (hole->start + hole->len > start + len) {
667                 next_start = start + len;
668                 next_len = hole->start + hole->len - start - len;
669                 have_next = 1;
670         }
671         rb_erase(node, holes);
672         free(hole);
673         if (have_prev) {
674                 ret = add_file_extent_hole(holes, prev_start, prev_len);
675                 if (ret < 0)
676                         return ret;
677         }
678         if (have_next) {
679                 ret = add_file_extent_hole(holes, next_start, next_len);
680                 if (ret < 0)
681                         return ret;
682         }
683         return 0;
684 }
685
686 static int copy_file_extent_holes(struct rb_root *dst,
687                                   struct rb_root *src)
688 {
689         struct file_extent_hole *hole;
690         struct rb_node *node;
691         int ret = 0;
692
693         node = rb_first(src);
694         while (node) {
695                 hole = rb_entry(node, struct file_extent_hole, node);
696                 ret = add_file_extent_hole(dst, hole->start, hole->len);
697                 if (ret)
698                         break;
699                 node = rb_next(node);
700         }
701         return ret;
702 }
703
704 static void free_file_extent_holes(struct rb_root *holes)
705 {
706         struct rb_node *node;
707         struct file_extent_hole *hole;
708
709         node = rb_first(holes);
710         while (node) {
711                 hole = rb_entry(node, struct file_extent_hole, node);
712                 rb_erase(node, holes);
713                 free(hole);
714                 node = rb_first(holes);
715         }
716 }
717
718 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
719
720 static void record_root_in_trans(struct btrfs_trans_handle *trans,
721                                  struct btrfs_root *root)
722 {
723         if (root->last_trans != trans->transid) {
724                 root->track_dirty = 1;
725                 root->last_trans = trans->transid;
726                 root->commit_root = root->node;
727                 extent_buffer_get(root->node);
728         }
729 }
730
731 static u8 imode_to_type(u32 imode)
732 {
733 #define S_SHIFT 12
734         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
735                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
736                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
737                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
738                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
739                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
740                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
741                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
742         };
743
744         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
745 #undef S_SHIFT
746 }
747
748 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
749 {
750         struct device_record *rec1;
751         struct device_record *rec2;
752
753         rec1 = rb_entry(node1, struct device_record, node);
754         rec2 = rb_entry(node2, struct device_record, node);
755         if (rec1->devid > rec2->devid)
756                 return -1;
757         else if (rec1->devid < rec2->devid)
758                 return 1;
759         else
760                 return 0;
761 }
762
763 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
764 {
765         struct inode_record *rec;
766         struct inode_backref *backref;
767         struct inode_backref *orig;
768         struct inode_backref *tmp;
769         struct orphan_data_extent *src_orphan;
770         struct orphan_data_extent *dst_orphan;
771         struct rb_node *rb;
772         size_t size;
773         int ret;
774
775         rec = malloc(sizeof(*rec));
776         if (!rec)
777                 return ERR_PTR(-ENOMEM);
778         memcpy(rec, orig_rec, sizeof(*rec));
779         rec->refs = 1;
780         INIT_LIST_HEAD(&rec->backrefs);
781         INIT_LIST_HEAD(&rec->orphan_extents);
782         rec->holes = RB_ROOT;
783
784         list_for_each_entry(orig, &orig_rec->backrefs, list) {
785                 size = sizeof(*orig) + orig->namelen + 1;
786                 backref = malloc(size);
787                 if (!backref) {
788                         ret = -ENOMEM;
789                         goto cleanup;
790                 }
791                 memcpy(backref, orig, size);
792                 list_add_tail(&backref->list, &rec->backrefs);
793         }
794         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
795                 dst_orphan = malloc(sizeof(*dst_orphan));
796                 if (!dst_orphan) {
797                         ret = -ENOMEM;
798                         goto cleanup;
799                 }
800                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
801                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
802         }
803         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
804         if (ret < 0)
805                 goto cleanup_rb;
806
807         return rec;
808
809 cleanup_rb:
810         rb = rb_first(&rec->holes);
811         while (rb) {
812                 struct file_extent_hole *hole;
813
814                 hole = rb_entry(rb, struct file_extent_hole, node);
815                 rb = rb_next(rb);
816                 free(hole);
817         }
818
819 cleanup:
820         if (!list_empty(&rec->backrefs))
821                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
822                         list_del(&orig->list);
823                         free(orig);
824                 }
825
826         if (!list_empty(&rec->orphan_extents))
827                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
828                         list_del(&orig->list);
829                         free(orig);
830                 }
831
832         free(rec);
833
834         return ERR_PTR(ret);
835 }
836
837 static void print_orphan_data_extents(struct list_head *orphan_extents,
838                                       u64 objectid)
839 {
840         struct orphan_data_extent *orphan;
841
842         if (list_empty(orphan_extents))
843                 return;
844         printf("The following data extent is lost in tree %llu:\n",
845                objectid);
846         list_for_each_entry(orphan, orphan_extents, list) {
847                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
848                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
849                        orphan->disk_len);
850         }
851 }
852
853 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
854 {
855         u64 root_objectid = root->root_key.objectid;
856         int errors = rec->errors;
857
858         if (!errors)
859                 return;
860         /* reloc root errors, we print its corresponding fs root objectid*/
861         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
862                 root_objectid = root->root_key.offset;
863                 fprintf(stderr, "reloc");
864         }
865         fprintf(stderr, "root %llu inode %llu errors %x",
866                 (unsigned long long) root_objectid,
867                 (unsigned long long) rec->ino, rec->errors);
868
869         if (errors & I_ERR_NO_INODE_ITEM)
870                 fprintf(stderr, ", no inode item");
871         if (errors & I_ERR_NO_ORPHAN_ITEM)
872                 fprintf(stderr, ", no orphan item");
873         if (errors & I_ERR_DUP_INODE_ITEM)
874                 fprintf(stderr, ", dup inode item");
875         if (errors & I_ERR_DUP_DIR_INDEX)
876                 fprintf(stderr, ", dup dir index");
877         if (errors & I_ERR_ODD_DIR_ITEM)
878                 fprintf(stderr, ", odd dir item");
879         if (errors & I_ERR_ODD_FILE_EXTENT)
880                 fprintf(stderr, ", odd file extent");
881         if (errors & I_ERR_BAD_FILE_EXTENT)
882                 fprintf(stderr, ", bad file extent");
883         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
884                 fprintf(stderr, ", file extent overlap");
885         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
886                 fprintf(stderr, ", file extent discount");
887         if (errors & I_ERR_DIR_ISIZE_WRONG)
888                 fprintf(stderr, ", dir isize wrong");
889         if (errors & I_ERR_FILE_NBYTES_WRONG)
890                 fprintf(stderr, ", nbytes wrong");
891         if (errors & I_ERR_ODD_CSUM_ITEM)
892                 fprintf(stderr, ", odd csum item");
893         if (errors & I_ERR_SOME_CSUM_MISSING)
894                 fprintf(stderr, ", some csum missing");
895         if (errors & I_ERR_LINK_COUNT_WRONG)
896                 fprintf(stderr, ", link count wrong");
897         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
898                 fprintf(stderr, ", orphan file extent");
899         fprintf(stderr, "\n");
900         /* Print the orphan extents if needed */
901         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
902                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
903
904         /* Print the holes if needed */
905         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
906                 struct file_extent_hole *hole;
907                 struct rb_node *node;
908                 int found = 0;
909
910                 node = rb_first(&rec->holes);
911                 fprintf(stderr, "Found file extent holes:\n");
912                 while (node) {
913                         found = 1;
914                         hole = rb_entry(node, struct file_extent_hole, node);
915                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
916                                 hole->start, hole->len);
917                         node = rb_next(node);
918                 }
919                 if (!found)
920                         fprintf(stderr, "\tstart: 0, len: %llu\n",
921                                 round_up(rec->isize,
922                                          root->fs_info->sectorsize));
923         }
924 }
925
926 static void print_ref_error(int errors)
927 {
928         if (errors & REF_ERR_NO_DIR_ITEM)
929                 fprintf(stderr, ", no dir item");
930         if (errors & REF_ERR_NO_DIR_INDEX)
931                 fprintf(stderr, ", no dir index");
932         if (errors & REF_ERR_NO_INODE_REF)
933                 fprintf(stderr, ", no inode ref");
934         if (errors & REF_ERR_DUP_DIR_ITEM)
935                 fprintf(stderr, ", dup dir item");
936         if (errors & REF_ERR_DUP_DIR_INDEX)
937                 fprintf(stderr, ", dup dir index");
938         if (errors & REF_ERR_DUP_INODE_REF)
939                 fprintf(stderr, ", dup inode ref");
940         if (errors & REF_ERR_INDEX_UNMATCH)
941                 fprintf(stderr, ", index mismatch");
942         if (errors & REF_ERR_FILETYPE_UNMATCH)
943                 fprintf(stderr, ", filetype mismatch");
944         if (errors & REF_ERR_NAME_TOO_LONG)
945                 fprintf(stderr, ", name too long");
946         if (errors & REF_ERR_NO_ROOT_REF)
947                 fprintf(stderr, ", no root ref");
948         if (errors & REF_ERR_NO_ROOT_BACKREF)
949                 fprintf(stderr, ", no root backref");
950         if (errors & REF_ERR_DUP_ROOT_REF)
951                 fprintf(stderr, ", dup root ref");
952         if (errors & REF_ERR_DUP_ROOT_BACKREF)
953                 fprintf(stderr, ", dup root backref");
954         fprintf(stderr, "\n");
955 }
956
957 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
958                                           u64 ino, int mod)
959 {
960         struct ptr_node *node;
961         struct cache_extent *cache;
962         struct inode_record *rec = NULL;
963         int ret;
964
965         cache = lookup_cache_extent(inode_cache, ino, 1);
966         if (cache) {
967                 node = container_of(cache, struct ptr_node, cache);
968                 rec = node->data;
969                 if (mod && rec->refs > 1) {
970                         node->data = clone_inode_rec(rec);
971                         if (IS_ERR(node->data))
972                                 return node->data;
973                         rec->refs--;
974                         rec = node->data;
975                 }
976         } else if (mod) {
977                 rec = calloc(1, sizeof(*rec));
978                 if (!rec)
979                         return ERR_PTR(-ENOMEM);
980                 rec->ino = ino;
981                 rec->extent_start = (u64)-1;
982                 rec->refs = 1;
983                 INIT_LIST_HEAD(&rec->backrefs);
984                 INIT_LIST_HEAD(&rec->orphan_extents);
985                 rec->holes = RB_ROOT;
986
987                 node = malloc(sizeof(*node));
988                 if (!node) {
989                         free(rec);
990                         return ERR_PTR(-ENOMEM);
991                 }
992                 node->cache.start = ino;
993                 node->cache.size = 1;
994                 node->data = rec;
995
996                 if (ino == BTRFS_FREE_INO_OBJECTID)
997                         rec->found_link = 1;
998
999                 ret = insert_cache_extent(inode_cache, &node->cache);
1000                 if (ret)
1001                         return ERR_PTR(-EEXIST);
1002         }
1003         return rec;
1004 }
1005
1006 static void free_orphan_data_extents(struct list_head *orphan_extents)
1007 {
1008         struct orphan_data_extent *orphan;
1009
1010         while (!list_empty(orphan_extents)) {
1011                 orphan = list_entry(orphan_extents->next,
1012                                     struct orphan_data_extent, list);
1013                 list_del(&orphan->list);
1014                 free(orphan);
1015         }
1016 }
1017
1018 static void free_inode_rec(struct inode_record *rec)
1019 {
1020         struct inode_backref *backref;
1021
1022         if (--rec->refs > 0)
1023                 return;
1024
1025         while (!list_empty(&rec->backrefs)) {
1026                 backref = to_inode_backref(rec->backrefs.next);
1027                 list_del(&backref->list);
1028                 free(backref);
1029         }
1030         free_orphan_data_extents(&rec->orphan_extents);
1031         free_file_extent_holes(&rec->holes);
1032         free(rec);
1033 }
1034
1035 static int can_free_inode_rec(struct inode_record *rec)
1036 {
1037         if (!rec->errors && rec->checked && rec->found_inode_item &&
1038             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1039                 return 1;
1040         return 0;
1041 }
1042
1043 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1044                                  struct inode_record *rec)
1045 {
1046         struct cache_extent *cache;
1047         struct inode_backref *tmp, *backref;
1048         struct ptr_node *node;
1049         u8 filetype;
1050
1051         if (!rec->found_inode_item)
1052                 return;
1053
1054         filetype = imode_to_type(rec->imode);
1055         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1056                 if (backref->found_dir_item && backref->found_dir_index) {
1057                         if (backref->filetype != filetype)
1058                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1059                         if (!backref->errors && backref->found_inode_ref &&
1060                             rec->nlink == rec->found_link) {
1061                                 list_del(&backref->list);
1062                                 free(backref);
1063                         }
1064                 }
1065         }
1066
1067         if (!rec->checked || rec->merging)
1068                 return;
1069
1070         if (S_ISDIR(rec->imode)) {
1071                 if (rec->found_size != rec->isize)
1072                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1073                 if (rec->found_file_extent)
1074                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
1075         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1076                 if (rec->found_dir_item)
1077                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1078                 if (rec->found_size != rec->nbytes)
1079                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1080                 if (rec->nlink > 0 && !no_holes &&
1081                     (rec->extent_end < rec->isize ||
1082                      first_extent_gap(&rec->holes) < rec->isize))
1083                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1084         }
1085
1086         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1087                 if (rec->found_csum_item && rec->nodatasum)
1088                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
1089                 if (rec->some_csum_missing && !rec->nodatasum)
1090                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
1091         }
1092
1093         BUG_ON(rec->refs != 1);
1094         if (can_free_inode_rec(rec)) {
1095                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1096                 node = container_of(cache, struct ptr_node, cache);
1097                 BUG_ON(node->data != rec);
1098                 remove_cache_extent(inode_cache, &node->cache);
1099                 free(node);
1100                 free_inode_rec(rec);
1101         }
1102 }
1103
1104 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1105 {
1106         struct btrfs_path path;
1107         struct btrfs_key key;
1108         int ret;
1109
1110         key.objectid = BTRFS_ORPHAN_OBJECTID;
1111         key.type = BTRFS_ORPHAN_ITEM_KEY;
1112         key.offset = ino;
1113
1114         btrfs_init_path(&path);
1115         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1116         btrfs_release_path(&path);
1117         if (ret > 0)
1118                 ret = -ENOENT;
1119         return ret;
1120 }
1121
1122 static int process_inode_item(struct extent_buffer *eb,
1123                               int slot, struct btrfs_key *key,
1124                               struct shared_node *active_node)
1125 {
1126         struct inode_record *rec;
1127         struct btrfs_inode_item *item;
1128
1129         rec = active_node->current;
1130         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1131         if (rec->found_inode_item) {
1132                 rec->errors |= I_ERR_DUP_INODE_ITEM;
1133                 return 1;
1134         }
1135         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1136         rec->nlink = btrfs_inode_nlink(eb, item);
1137         rec->isize = btrfs_inode_size(eb, item);
1138         rec->nbytes = btrfs_inode_nbytes(eb, item);
1139         rec->imode = btrfs_inode_mode(eb, item);
1140         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1141                 rec->nodatasum = 1;
1142         rec->found_inode_item = 1;
1143         if (rec->nlink == 0)
1144                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1145         maybe_free_inode_rec(&active_node->inode_cache, rec);
1146         return 0;
1147 }
1148
1149 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1150                                                 const char *name,
1151                                                 int namelen, u64 dir)
1152 {
1153         struct inode_backref *backref;
1154
1155         list_for_each_entry(backref, &rec->backrefs, list) {
1156                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1157                         break;
1158                 if (backref->dir != dir || backref->namelen != namelen)
1159                         continue;
1160                 if (memcmp(name, backref->name, namelen))
1161                         continue;
1162                 return backref;
1163         }
1164
1165         backref = malloc(sizeof(*backref) + namelen + 1);
1166         if (!backref)
1167                 return NULL;
1168         memset(backref, 0, sizeof(*backref));
1169         backref->dir = dir;
1170         backref->namelen = namelen;
1171         memcpy(backref->name, name, namelen);
1172         backref->name[namelen] = '\0';
1173         list_add_tail(&backref->list, &rec->backrefs);
1174         return backref;
1175 }
1176
1177 static int add_inode_backref(struct cache_tree *inode_cache,
1178                              u64 ino, u64 dir, u64 index,
1179                              const char *name, int namelen,
1180                              u8 filetype, u8 itemtype, int errors)
1181 {
1182         struct inode_record *rec;
1183         struct inode_backref *backref;
1184
1185         rec = get_inode_rec(inode_cache, ino, 1);
1186         BUG_ON(IS_ERR(rec));
1187         backref = get_inode_backref(rec, name, namelen, dir);
1188         BUG_ON(!backref);
1189         if (errors)
1190                 backref->errors |= errors;
1191         if (itemtype == BTRFS_DIR_INDEX_KEY) {
1192                 if (backref->found_dir_index)
1193                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
1194                 if (backref->found_inode_ref && backref->index != index)
1195                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1196                 if (backref->found_dir_item && backref->filetype != filetype)
1197                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1198
1199                 backref->index = index;
1200                 backref->filetype = filetype;
1201                 backref->found_dir_index = 1;
1202         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1203                 rec->found_link++;
1204                 if (backref->found_dir_item)
1205                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
1206                 if (backref->found_dir_index && backref->filetype != filetype)
1207                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1208
1209                 backref->filetype = filetype;
1210                 backref->found_dir_item = 1;
1211         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1212                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1213                 if (backref->found_inode_ref)
1214                         backref->errors |= REF_ERR_DUP_INODE_REF;
1215                 if (backref->found_dir_index && backref->index != index)
1216                         backref->errors |= REF_ERR_INDEX_UNMATCH;
1217                 else
1218                         backref->index = index;
1219
1220                 backref->ref_type = itemtype;
1221                 backref->found_inode_ref = 1;
1222         } else {
1223                 BUG_ON(1);
1224         }
1225
1226         maybe_free_inode_rec(inode_cache, rec);
1227         return 0;
1228 }
1229
1230 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1231                             struct cache_tree *dst_cache)
1232 {
1233         struct inode_backref *backref;
1234         u32 dir_count = 0;
1235         int ret = 0;
1236
1237         dst->merging = 1;
1238         list_for_each_entry(backref, &src->backrefs, list) {
1239                 if (backref->found_dir_index) {
1240                         add_inode_backref(dst_cache, dst->ino, backref->dir,
1241                                         backref->index, backref->name,
1242                                         backref->namelen, backref->filetype,
1243                                         BTRFS_DIR_INDEX_KEY, backref->errors);
1244                 }
1245                 if (backref->found_dir_item) {
1246                         dir_count++;
1247                         add_inode_backref(dst_cache, dst->ino,
1248                                         backref->dir, 0, backref->name,
1249                                         backref->namelen, backref->filetype,
1250                                         BTRFS_DIR_ITEM_KEY, backref->errors);
1251                 }
1252                 if (backref->found_inode_ref) {
1253                         add_inode_backref(dst_cache, dst->ino,
1254                                         backref->dir, backref->index,
1255                                         backref->name, backref->namelen, 0,
1256                                         backref->ref_type, backref->errors);
1257                 }
1258         }
1259
1260         if (src->found_dir_item)
1261                 dst->found_dir_item = 1;
1262         if (src->found_file_extent)
1263                 dst->found_file_extent = 1;
1264         if (src->found_csum_item)
1265                 dst->found_csum_item = 1;
1266         if (src->some_csum_missing)
1267                 dst->some_csum_missing = 1;
1268         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1269                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1270                 if (ret < 0)
1271                         return ret;
1272         }
1273
1274         BUG_ON(src->found_link < dir_count);
1275         dst->found_link += src->found_link - dir_count;
1276         dst->found_size += src->found_size;
1277         if (src->extent_start != (u64)-1) {
1278                 if (dst->extent_start == (u64)-1) {
1279                         dst->extent_start = src->extent_start;
1280                         dst->extent_end = src->extent_end;
1281                 } else {
1282                         if (dst->extent_end > src->extent_start)
1283                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1284                         else if (dst->extent_end < src->extent_start) {
1285                                 ret = add_file_extent_hole(&dst->holes,
1286                                         dst->extent_end,
1287                                         src->extent_start - dst->extent_end);
1288                         }
1289                         if (dst->extent_end < src->extent_end)
1290                                 dst->extent_end = src->extent_end;
1291                 }
1292         }
1293
1294         dst->errors |= src->errors;
1295         if (src->found_inode_item) {
1296                 if (!dst->found_inode_item) {
1297                         dst->nlink = src->nlink;
1298                         dst->isize = src->isize;
1299                         dst->nbytes = src->nbytes;
1300                         dst->imode = src->imode;
1301                         dst->nodatasum = src->nodatasum;
1302                         dst->found_inode_item = 1;
1303                 } else {
1304                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1305                 }
1306         }
1307         dst->merging = 0;
1308
1309         return 0;
1310 }
1311
1312 static int splice_shared_node(struct shared_node *src_node,
1313                               struct shared_node *dst_node)
1314 {
1315         struct cache_extent *cache;
1316         struct ptr_node *node, *ins;
1317         struct cache_tree *src, *dst;
1318         struct inode_record *rec, *conflict;
1319         u64 current_ino = 0;
1320         int splice = 0;
1321         int ret;
1322
1323         if (--src_node->refs == 0)
1324                 splice = 1;
1325         if (src_node->current)
1326                 current_ino = src_node->current->ino;
1327
1328         src = &src_node->root_cache;
1329         dst = &dst_node->root_cache;
1330 again:
1331         cache = search_cache_extent(src, 0);
1332         while (cache) {
1333                 node = container_of(cache, struct ptr_node, cache);
1334                 rec = node->data;
1335                 cache = next_cache_extent(cache);
1336
1337                 if (splice) {
1338                         remove_cache_extent(src, &node->cache);
1339                         ins = node;
1340                 } else {
1341                         ins = malloc(sizeof(*ins));
1342                         BUG_ON(!ins);
1343                         ins->cache.start = node->cache.start;
1344                         ins->cache.size = node->cache.size;
1345                         ins->data = rec;
1346                         rec->refs++;
1347                 }
1348                 ret = insert_cache_extent(dst, &ins->cache);
1349                 if (ret == -EEXIST) {
1350                         conflict = get_inode_rec(dst, rec->ino, 1);
1351                         BUG_ON(IS_ERR(conflict));
1352                         merge_inode_recs(rec, conflict, dst);
1353                         if (rec->checked) {
1354                                 conflict->checked = 1;
1355                                 if (dst_node->current == conflict)
1356                                         dst_node->current = NULL;
1357                         }
1358                         maybe_free_inode_rec(dst, conflict);
1359                         free_inode_rec(rec);
1360                         free(ins);
1361                 } else {
1362                         BUG_ON(ret);
1363                 }
1364         }
1365
1366         if (src == &src_node->root_cache) {
1367                 src = &src_node->inode_cache;
1368                 dst = &dst_node->inode_cache;
1369                 goto again;
1370         }
1371
1372         if (current_ino > 0 && (!dst_node->current ||
1373             current_ino > dst_node->current->ino)) {
1374                 if (dst_node->current) {
1375                         dst_node->current->checked = 1;
1376                         maybe_free_inode_rec(dst, dst_node->current);
1377                 }
1378                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1379                 BUG_ON(IS_ERR(dst_node->current));
1380         }
1381         return 0;
1382 }
1383
1384 static void free_inode_ptr(struct cache_extent *cache)
1385 {
1386         struct ptr_node *node;
1387         struct inode_record *rec;
1388
1389         node = container_of(cache, struct ptr_node, cache);
1390         rec = node->data;
1391         free_inode_rec(rec);
1392         free(node);
1393 }
1394
1395 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1396
1397 static struct shared_node *find_shared_node(struct cache_tree *shared,
1398                                             u64 bytenr)
1399 {
1400         struct cache_extent *cache;
1401         struct shared_node *node;
1402
1403         cache = lookup_cache_extent(shared, bytenr, 1);
1404         if (cache) {
1405                 node = container_of(cache, struct shared_node, cache);
1406                 return node;
1407         }
1408         return NULL;
1409 }
1410
1411 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1412 {
1413         int ret;
1414         struct shared_node *node;
1415
1416         node = calloc(1, sizeof(*node));
1417         if (!node)
1418                 return -ENOMEM;
1419         node->cache.start = bytenr;
1420         node->cache.size = 1;
1421         cache_tree_init(&node->root_cache);
1422         cache_tree_init(&node->inode_cache);
1423         node->refs = refs;
1424
1425         ret = insert_cache_extent(shared, &node->cache);
1426
1427         return ret;
1428 }
1429
1430 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1431                              struct walk_control *wc, int level)
1432 {
1433         struct shared_node *node;
1434         struct shared_node *dest;
1435         int ret;
1436
1437         if (level == wc->active_node)
1438                 return 0;
1439
1440         BUG_ON(wc->active_node <= level);
1441         node = find_shared_node(&wc->shared, bytenr);
1442         if (!node) {
1443                 ret = add_shared_node(&wc->shared, bytenr, refs);
1444                 BUG_ON(ret);
1445                 node = find_shared_node(&wc->shared, bytenr);
1446                 wc->nodes[level] = node;
1447                 wc->active_node = level;
1448                 return 0;
1449         }
1450
1451         if (wc->root_level == wc->active_node &&
1452             btrfs_root_refs(&root->root_item) == 0) {
1453                 if (--node->refs == 0) {
1454                         free_inode_recs_tree(&node->root_cache);
1455                         free_inode_recs_tree(&node->inode_cache);
1456                         remove_cache_extent(&wc->shared, &node->cache);
1457                         free(node);
1458                 }
1459                 return 1;
1460         }
1461
1462         dest = wc->nodes[wc->active_node];
1463         splice_shared_node(node, dest);
1464         if (node->refs == 0) {
1465                 remove_cache_extent(&wc->shared, &node->cache);
1466                 free(node);
1467         }
1468         return 1;
1469 }
1470
1471 static int leave_shared_node(struct btrfs_root *root,
1472                              struct walk_control *wc, int level)
1473 {
1474         struct shared_node *node;
1475         struct shared_node *dest;
1476         int i;
1477
1478         if (level == wc->root_level)
1479                 return 0;
1480
1481         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1482                 if (wc->nodes[i])
1483                         break;
1484         }
1485         BUG_ON(i >= BTRFS_MAX_LEVEL);
1486
1487         node = wc->nodes[wc->active_node];
1488         wc->nodes[wc->active_node] = NULL;
1489         wc->active_node = i;
1490
1491         dest = wc->nodes[wc->active_node];
1492         if (wc->active_node < wc->root_level ||
1493             btrfs_root_refs(&root->root_item) > 0) {
1494                 BUG_ON(node->refs <= 1);
1495                 splice_shared_node(node, dest);
1496         } else {
1497                 BUG_ON(node->refs < 2);
1498                 node->refs--;
1499         }
1500         return 0;
1501 }
1502
1503 /*
1504  * Returns:
1505  * < 0 - on error
1506  * 1   - if the root with id child_root_id is a child of root parent_root_id
1507  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1508  *       has other root(s) as parent(s)
1509  * 2   - if the root child_root_id doesn't have any parent roots
1510  */
1511 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1512                          u64 child_root_id)
1513 {
1514         struct btrfs_path path;
1515         struct btrfs_key key;
1516         struct extent_buffer *leaf;
1517         int has_parent = 0;
1518         int ret;
1519
1520         btrfs_init_path(&path);
1521
1522         key.objectid = parent_root_id;
1523         key.type = BTRFS_ROOT_REF_KEY;
1524         key.offset = child_root_id;
1525         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1526                                 0, 0);
1527         if (ret < 0)
1528                 return ret;
1529         btrfs_release_path(&path);
1530         if (!ret)
1531                 return 1;
1532
1533         key.objectid = child_root_id;
1534         key.type = BTRFS_ROOT_BACKREF_KEY;
1535         key.offset = 0;
1536         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1537                                 0, 0);
1538         if (ret < 0)
1539                 goto out;
1540
1541         while (1) {
1542                 leaf = path.nodes[0];
1543                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1544                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1545                         if (ret)
1546                                 break;
1547                         leaf = path.nodes[0];
1548                 }
1549
1550                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1551                 if (key.objectid != child_root_id ||
1552                     key.type != BTRFS_ROOT_BACKREF_KEY)
1553                         break;
1554
1555                 has_parent = 1;
1556
1557                 if (key.offset == parent_root_id) {
1558                         btrfs_release_path(&path);
1559                         return 1;
1560                 }
1561
1562                 path.slots[0]++;
1563         }
1564 out:
1565         btrfs_release_path(&path);
1566         if (ret < 0)
1567                 return ret;
1568         return has_parent ? 0 : 2;
1569 }
1570
1571 static int process_dir_item(struct extent_buffer *eb,
1572                             int slot, struct btrfs_key *key,
1573                             struct shared_node *active_node)
1574 {
1575         u32 total;
1576         u32 cur = 0;
1577         u32 len;
1578         u32 name_len;
1579         u32 data_len;
1580         int error;
1581         int nritems = 0;
1582         u8 filetype;
1583         struct btrfs_dir_item *di;
1584         struct inode_record *rec;
1585         struct cache_tree *root_cache;
1586         struct cache_tree *inode_cache;
1587         struct btrfs_key location;
1588         char namebuf[BTRFS_NAME_LEN];
1589
1590         root_cache = &active_node->root_cache;
1591         inode_cache = &active_node->inode_cache;
1592         rec = active_node->current;
1593         rec->found_dir_item = 1;
1594
1595         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1596         total = btrfs_item_size_nr(eb, slot);
1597         while (cur < total) {
1598                 nritems++;
1599                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1600                 name_len = btrfs_dir_name_len(eb, di);
1601                 data_len = btrfs_dir_data_len(eb, di);
1602                 filetype = btrfs_dir_type(eb, di);
1603
1604                 rec->found_size += name_len;
1605                 if (cur + sizeof(*di) + name_len > total ||
1606                     name_len > BTRFS_NAME_LEN) {
1607                         error = REF_ERR_NAME_TOO_LONG;
1608
1609                         if (cur + sizeof(*di) > total)
1610                                 break;
1611                         len = min_t(u32, total - cur - sizeof(*di),
1612                                     BTRFS_NAME_LEN);
1613                 } else {
1614                         len = name_len;
1615                         error = 0;
1616                 }
1617
1618                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1619
1620                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1621                     key->offset != btrfs_name_hash(namebuf, len)) {
1622                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1623                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1624                         key->objectid, key->offset, namebuf, len, filetype,
1625                         key->offset, btrfs_name_hash(namebuf, len));
1626                 }
1627
1628                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1629                         add_inode_backref(inode_cache, location.objectid,
1630                                           key->objectid, key->offset, namebuf,
1631                                           len, filetype, key->type, error);
1632                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1633                         add_inode_backref(root_cache, location.objectid,
1634                                           key->objectid, key->offset,
1635                                           namebuf, len, filetype,
1636                                           key->type, error);
1637                 } else {
1638                         fprintf(stderr, "invalid location in dir item %u\n",
1639                                 location.type);
1640                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1641                                           key->objectid, key->offset, namebuf,
1642                                           len, filetype, key->type, error);
1643                 }
1644
1645                 len = sizeof(*di) + name_len + data_len;
1646                 di = (struct btrfs_dir_item *)((char *)di + len);
1647                 cur += len;
1648         }
1649         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1650                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1651
1652         return 0;
1653 }
1654
1655 static int process_inode_ref(struct extent_buffer *eb,
1656                              int slot, struct btrfs_key *key,
1657                              struct shared_node *active_node)
1658 {
1659         u32 total;
1660         u32 cur = 0;
1661         u32 len;
1662         u32 name_len;
1663         u64 index;
1664         int error;
1665         struct cache_tree *inode_cache;
1666         struct btrfs_inode_ref *ref;
1667         char namebuf[BTRFS_NAME_LEN];
1668
1669         inode_cache = &active_node->inode_cache;
1670
1671         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1672         total = btrfs_item_size_nr(eb, slot);
1673         while (cur < total) {
1674                 name_len = btrfs_inode_ref_name_len(eb, ref);
1675                 index = btrfs_inode_ref_index(eb, ref);
1676
1677                 /* inode_ref + namelen should not cross item boundary */
1678                 if (cur + sizeof(*ref) + name_len > total ||
1679                     name_len > BTRFS_NAME_LEN) {
1680                         if (total < cur + sizeof(*ref))
1681                                 break;
1682
1683                         /* Still try to read out the remaining part */
1684                         len = min_t(u32, total - cur - sizeof(*ref),
1685                                     BTRFS_NAME_LEN);
1686                         error = REF_ERR_NAME_TOO_LONG;
1687                 } else {
1688                         len = name_len;
1689                         error = 0;
1690                 }
1691
1692                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1693                 add_inode_backref(inode_cache, key->objectid, key->offset,
1694                                   index, namebuf, len, 0, key->type, error);
1695
1696                 len = sizeof(*ref) + name_len;
1697                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1698                 cur += len;
1699         }
1700         return 0;
1701 }
1702
1703 static int process_inode_extref(struct extent_buffer *eb,
1704                                 int slot, struct btrfs_key *key,
1705                                 struct shared_node *active_node)
1706 {
1707         u32 total;
1708         u32 cur = 0;
1709         u32 len;
1710         u32 name_len;
1711         u64 index;
1712         u64 parent;
1713         int error;
1714         struct cache_tree *inode_cache;
1715         struct btrfs_inode_extref *extref;
1716         char namebuf[BTRFS_NAME_LEN];
1717
1718         inode_cache = &active_node->inode_cache;
1719
1720         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1721         total = btrfs_item_size_nr(eb, slot);
1722         while (cur < total) {
1723                 name_len = btrfs_inode_extref_name_len(eb, extref);
1724                 index = btrfs_inode_extref_index(eb, extref);
1725                 parent = btrfs_inode_extref_parent(eb, extref);
1726                 if (name_len <= BTRFS_NAME_LEN) {
1727                         len = name_len;
1728                         error = 0;
1729                 } else {
1730                         len = BTRFS_NAME_LEN;
1731                         error = REF_ERR_NAME_TOO_LONG;
1732                 }
1733                 read_extent_buffer(eb, namebuf,
1734                                    (unsigned long)(extref + 1), len);
1735                 add_inode_backref(inode_cache, key->objectid, parent,
1736                                   index, namebuf, len, 0, key->type, error);
1737
1738                 len = sizeof(*extref) + name_len;
1739                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1740                 cur += len;
1741         }
1742         return 0;
1743
1744 }
1745
1746 static int count_csum_range(struct btrfs_root *root, u64 start,
1747                             u64 len, u64 *found)
1748 {
1749         struct btrfs_key key;
1750         struct btrfs_path path;
1751         struct extent_buffer *leaf;
1752         int ret;
1753         size_t size;
1754         *found = 0;
1755         u64 csum_end;
1756         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1757
1758         btrfs_init_path(&path);
1759
1760         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1761         key.offset = start;
1762         key.type = BTRFS_EXTENT_CSUM_KEY;
1763
1764         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1765                                 &key, &path, 0, 0);
1766         if (ret < 0)
1767                 goto out;
1768         if (ret > 0 && path.slots[0] > 0) {
1769                 leaf = path.nodes[0];
1770                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1771                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1772                     key.type == BTRFS_EXTENT_CSUM_KEY)
1773                         path.slots[0]--;
1774         }
1775
1776         while (len > 0) {
1777                 leaf = path.nodes[0];
1778                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1779                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1780                         if (ret > 0)
1781                                 break;
1782                         else if (ret < 0)
1783                                 goto out;
1784                         leaf = path.nodes[0];
1785                 }
1786
1787                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1788                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1789                     key.type != BTRFS_EXTENT_CSUM_KEY)
1790                         break;
1791
1792                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1793                 if (key.offset >= start + len)
1794                         break;
1795
1796                 if (key.offset > start)
1797                         start = key.offset;
1798
1799                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1800                 csum_end = key.offset + (size / csum_size) *
1801                            root->fs_info->sectorsize;
1802                 if (csum_end > start) {
1803                         size = min(csum_end - start, len);
1804                         len -= size;
1805                         start += size;
1806                         *found += size;
1807                 }
1808
1809                 path.slots[0]++;
1810         }
1811 out:
1812         btrfs_release_path(&path);
1813         if (ret < 0)
1814                 return ret;
1815         return 0;
1816 }
1817
1818 static int process_file_extent(struct btrfs_root *root,
1819                                 struct extent_buffer *eb,
1820                                 int slot, struct btrfs_key *key,
1821                                 struct shared_node *active_node)
1822 {
1823         struct inode_record *rec;
1824         struct btrfs_file_extent_item *fi;
1825         u64 num_bytes = 0;
1826         u64 disk_bytenr = 0;
1827         u64 extent_offset = 0;
1828         u64 mask = root->fs_info->sectorsize - 1;
1829         int extent_type;
1830         int ret;
1831
1832         rec = active_node->current;
1833         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1834         rec->found_file_extent = 1;
1835
1836         if (rec->extent_start == (u64)-1) {
1837                 rec->extent_start = key->offset;
1838                 rec->extent_end = key->offset;
1839         }
1840
1841         if (rec->extent_end > key->offset)
1842                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1843         else if (rec->extent_end < key->offset) {
1844                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1845                                            key->offset - rec->extent_end);
1846                 if (ret < 0)
1847                         return ret;
1848         }
1849
1850         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1851         extent_type = btrfs_file_extent_type(eb, fi);
1852
1853         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1854                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1855                 if (num_bytes == 0)
1856                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1857                 rec->found_size += num_bytes;
1858                 num_bytes = (num_bytes + mask) & ~mask;
1859         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1860                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1861                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1862                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1863                 extent_offset = btrfs_file_extent_offset(eb, fi);
1864                 if (num_bytes == 0 || (num_bytes & mask))
1865                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1866                 if (num_bytes + extent_offset >
1867                     btrfs_file_extent_ram_bytes(eb, fi))
1868                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1869                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1870                     (btrfs_file_extent_compression(eb, fi) ||
1871                      btrfs_file_extent_encryption(eb, fi) ||
1872                      btrfs_file_extent_other_encoding(eb, fi)))
1873                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1874                 if (disk_bytenr > 0)
1875                         rec->found_size += num_bytes;
1876         } else {
1877                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1878         }
1879         rec->extent_end = key->offset + num_bytes;
1880
1881         /*
1882          * The data reloc tree will copy full extents into its inode and then
1883          * copy the corresponding csums.  Because the extent it copied could be
1884          * a preallocated extent that hasn't been written to yet there may be no
1885          * csums to copy, ergo we won't have csums for our file extent.  This is
1886          * ok so just don't bother checking csums if the inode belongs to the
1887          * data reloc tree.
1888          */
1889         if (disk_bytenr > 0 &&
1890             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1891                 u64 found;
1892                 if (btrfs_file_extent_compression(eb, fi))
1893                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1894                 else
1895                         disk_bytenr += extent_offset;
1896
1897                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1898                 if (ret < 0)
1899                         return ret;
1900                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1901                         if (found > 0)
1902                                 rec->found_csum_item = 1;
1903                         if (found < num_bytes)
1904                                 rec->some_csum_missing = 1;
1905                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1906                         if (found > 0)
1907                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1908                 }
1909         }
1910         return 0;
1911 }
1912
1913 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1914                             struct walk_control *wc)
1915 {
1916         struct btrfs_key key;
1917         u32 nritems;
1918         int i;
1919         int ret = 0;
1920         struct cache_tree *inode_cache;
1921         struct shared_node *active_node;
1922
1923         if (wc->root_level == wc->active_node &&
1924             btrfs_root_refs(&root->root_item) == 0)
1925                 return 0;
1926
1927         active_node = wc->nodes[wc->active_node];
1928         inode_cache = &active_node->inode_cache;
1929         nritems = btrfs_header_nritems(eb);
1930         for (i = 0; i < nritems; i++) {
1931                 btrfs_item_key_to_cpu(eb, &key, i);
1932
1933                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1934                         continue;
1935                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1936                         continue;
1937
1938                 if (active_node->current == NULL ||
1939                     active_node->current->ino < key.objectid) {
1940                         if (active_node->current) {
1941                                 active_node->current->checked = 1;
1942                                 maybe_free_inode_rec(inode_cache,
1943                                                      active_node->current);
1944                         }
1945                         active_node->current = get_inode_rec(inode_cache,
1946                                                              key.objectid, 1);
1947                         BUG_ON(IS_ERR(active_node->current));
1948                 }
1949                 switch (key.type) {
1950                 case BTRFS_DIR_ITEM_KEY:
1951                 case BTRFS_DIR_INDEX_KEY:
1952                         ret = process_dir_item(eb, i, &key, active_node);
1953                         break;
1954                 case BTRFS_INODE_REF_KEY:
1955                         ret = process_inode_ref(eb, i, &key, active_node);
1956                         break;
1957                 case BTRFS_INODE_EXTREF_KEY:
1958                         ret = process_inode_extref(eb, i, &key, active_node);
1959                         break;
1960                 case BTRFS_INODE_ITEM_KEY:
1961                         ret = process_inode_item(eb, i, &key, active_node);
1962                         break;
1963                 case BTRFS_EXTENT_DATA_KEY:
1964                         ret = process_file_extent(root, eb, i, &key,
1965                                                   active_node);
1966                         break;
1967                 default:
1968                         break;
1969                 };
1970         }
1971         return ret;
1972 }
1973
1974 struct node_refs {
1975         u64 bytenr[BTRFS_MAX_LEVEL];
1976         u64 refs[BTRFS_MAX_LEVEL];
1977         int need_check[BTRFS_MAX_LEVEL];
1978         /* field for checking all trees */
1979         int checked[BTRFS_MAX_LEVEL];
1980         /* the corresponding extent should be marked as full backref or not */
1981         int full_backref[BTRFS_MAX_LEVEL];
1982 };
1983
1984 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1985                              struct extent_buffer *eb, struct node_refs *nrefs,
1986                              u64 level, int check_all);
1987 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1988                             unsigned int ext_ref);
1989
1990 /*
1991  * Returns >0  Found error, not fatal, should continue
1992  * Returns <0  Fatal error, must exit the whole check
1993  * Returns 0   No errors found
1994  */
1995 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1996                                struct node_refs *nrefs, int *level, int ext_ref)
1997 {
1998         struct extent_buffer *cur = path->nodes[0];
1999         struct btrfs_key key;
2000         u64 cur_bytenr;
2001         u32 nritems;
2002         u64 first_ino = 0;
2003         int root_level = btrfs_header_level(root->node);
2004         int i;
2005         int ret = 0; /* Final return value */
2006         int err = 0; /* Positive error bitmap */
2007
2008         cur_bytenr = cur->start;
2009
2010         /* skip to first inode item or the first inode number change */
2011         nritems = btrfs_header_nritems(cur);
2012         for (i = 0; i < nritems; i++) {
2013                 btrfs_item_key_to_cpu(cur, &key, i);
2014                 if (i == 0)
2015                         first_ino = key.objectid;
2016                 if (key.type == BTRFS_INODE_ITEM_KEY ||
2017                     (first_ino && first_ino != key.objectid))
2018                         break;
2019         }
2020         if (i == nritems) {
2021                 path->slots[0] = nritems;
2022                 return 0;
2023         }
2024         path->slots[0] = i;
2025
2026 again:
2027         err |= check_inode_item(root, path, ext_ref);
2028
2029         /* modify cur since check_inode_item may change path */
2030         cur = path->nodes[0];
2031
2032         if (err & LAST_ITEM)
2033                 goto out;
2034
2035         /* still have inode items in thie leaf */
2036         if (cur->start == cur_bytenr)
2037                 goto again;
2038
2039         /*
2040          * we have switched to another leaf, above nodes may
2041          * have changed, here walk down the path, if a node
2042          * or leaf is shared, check whether we can skip this
2043          * node or leaf.
2044          */
2045         for (i = root_level; i >= 0; i--) {
2046                 if (path->nodes[i]->start == nrefs->bytenr[i])
2047                         continue;
2048
2049                 ret = update_nodes_refs(root, path->nodes[i]->start,
2050                                 path->nodes[i], nrefs, i, 0);
2051                 if (ret)
2052                         goto out;
2053
2054                 if (!nrefs->need_check[i]) {
2055                         *level += 1;
2056                         break;
2057                 }
2058         }
2059
2060         for (i = 0; i < *level; i++) {
2061                 free_extent_buffer(path->nodes[i]);
2062                 path->nodes[i] = NULL;
2063         }
2064 out:
2065         err &= ~LAST_ITEM;
2066         if (err && !ret)
2067                 ret = err;
2068         return ret;
2069 }
2070
2071 static void reada_walk_down(struct btrfs_root *root,
2072                             struct extent_buffer *node, int slot)
2073 {
2074         struct btrfs_fs_info *fs_info = root->fs_info;
2075         u64 bytenr;
2076         u64 ptr_gen;
2077         u32 nritems;
2078         int i;
2079         int level;
2080
2081         level = btrfs_header_level(node);
2082         if (level != 1)
2083                 return;
2084
2085         nritems = btrfs_header_nritems(node);
2086         for (i = slot; i < nritems; i++) {
2087                 bytenr = btrfs_node_blockptr(node, i);
2088                 ptr_gen = btrfs_node_ptr_generation(node, i);
2089                 readahead_tree_block(fs_info, bytenr, ptr_gen);
2090         }
2091 }
2092
2093 /*
2094  * Check the child node/leaf by the following condition:
2095  * 1. the first item key of the node/leaf should be the same with the one
2096  *    in parent.
2097  * 2. block in parent node should match the child node/leaf.
2098  * 3. generation of parent node and child's header should be consistent.
2099  *
2100  * Or the child node/leaf pointed by the key in parent is not valid.
2101  *
2102  * We hope to check leaf owner too, but since subvol may share leaves,
2103  * which makes leaf owner check not so strong, key check should be
2104  * sufficient enough for that case.
2105  */
2106 static int check_child_node(struct extent_buffer *parent, int slot,
2107                             struct extent_buffer *child)
2108 {
2109         struct btrfs_key parent_key;
2110         struct btrfs_key child_key;
2111         int ret = 0;
2112
2113         btrfs_node_key_to_cpu(parent, &parent_key, slot);
2114         if (btrfs_header_level(child) == 0)
2115                 btrfs_item_key_to_cpu(child, &child_key, 0);
2116         else
2117                 btrfs_node_key_to_cpu(child, &child_key, 0);
2118
2119         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2120                 ret = -EINVAL;
2121                 fprintf(stderr,
2122                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2123                         parent_key.objectid, parent_key.type, parent_key.offset,
2124                         child_key.objectid, child_key.type, child_key.offset);
2125         }
2126         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2127                 ret = -EINVAL;
2128                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2129                         btrfs_node_blockptr(parent, slot),
2130                         btrfs_header_bytenr(child));
2131         }
2132         if (btrfs_node_ptr_generation(parent, slot) !=
2133             btrfs_header_generation(child)) {
2134                 ret = -EINVAL;
2135                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2136                         btrfs_header_generation(child),
2137                         btrfs_node_ptr_generation(parent, slot));
2138         }
2139         return ret;
2140 }
2141
2142 /*
2143  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2144  * in every fs or file tree check. Here we find its all root ids, and only check
2145  * it in the fs or file tree which has the smallest root id.
2146  */
2147 static int need_check(struct btrfs_root *root, struct ulist *roots)
2148 {
2149         struct rb_node *node;
2150         struct ulist_node *u;
2151
2152         if (roots->nnodes == 1)
2153                 return 1;
2154
2155         node = rb_first(&roots->root);
2156         u = rb_entry(node, struct ulist_node, rb_node);
2157         /*
2158          * current root id is not smallest, we skip it and let it be checked
2159          * in the fs or file tree who hash the smallest root id.
2160          */
2161         if (root->objectid != u->val)
2162                 return 0;
2163
2164         return 1;
2165 }
2166
2167 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
2168                                u64 *flags_ret)
2169 {
2170         struct btrfs_root *extent_root = root->fs_info->extent_root;
2171         struct btrfs_root_item *ri = &root->root_item;
2172         struct btrfs_extent_inline_ref *iref;
2173         struct btrfs_extent_item *ei;
2174         struct btrfs_key key;
2175         struct btrfs_path *path = NULL;
2176         unsigned long ptr;
2177         unsigned long end;
2178         u64 flags;
2179         u64 owner = 0;
2180         u64 offset;
2181         int slot;
2182         int type;
2183         int ret = 0;
2184
2185         /*
2186          * Except file/reloc tree, we can not have FULL BACKREF MODE
2187          */
2188         if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
2189                 goto normal;
2190
2191         /* root node */
2192         if (eb->start == btrfs_root_bytenr(ri))
2193                 goto normal;
2194
2195         if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
2196                 goto full_backref;
2197
2198         owner = btrfs_header_owner(eb);
2199         if (owner == root->objectid)
2200                 goto normal;
2201
2202         path = btrfs_alloc_path();
2203         if (!path)
2204                 return -ENOMEM;
2205
2206         key.objectid = btrfs_header_bytenr(eb);
2207         key.type = (u8)-1;
2208         key.offset = (u64)-1;
2209
2210         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2211         if (ret <= 0) {
2212                 ret = -EIO;
2213                 goto out;
2214         }
2215
2216         if (ret > 0) {
2217                 ret = btrfs_previous_extent_item(extent_root, path,
2218                                                  key.objectid);
2219                 if (ret)
2220                         goto full_backref;
2221
2222         }
2223         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2224
2225         eb = path->nodes[0];
2226         slot = path->slots[0];
2227         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
2228
2229         flags = btrfs_extent_flags(eb, ei);
2230         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2231                 goto full_backref;
2232
2233         ptr = (unsigned long)(ei + 1);
2234         end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
2235
2236         if (key.type == BTRFS_EXTENT_ITEM_KEY)
2237                 ptr += sizeof(struct btrfs_tree_block_info);
2238
2239 next:
2240         /* Reached extent item ends normally */
2241         if (ptr == end)
2242                 goto full_backref;
2243
2244         /* Beyond extent item end, wrong item size */
2245         if (ptr > end) {
2246                 error("extent item at bytenr %llu slot %d has wrong size",
2247                         eb->start, slot);
2248                 goto full_backref;
2249         }
2250
2251         iref = (struct btrfs_extent_inline_ref *)ptr;
2252         offset = btrfs_extent_inline_ref_offset(eb, iref);
2253         type = btrfs_extent_inline_ref_type(eb, iref);
2254
2255         if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
2256                 goto normal;
2257         ptr += btrfs_extent_inline_ref_size(type);
2258         goto next;
2259
2260 normal:
2261         *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
2262         goto out;
2263
2264 full_backref:
2265         *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2266 out:
2267         btrfs_free_path(path);
2268         return ret;
2269 }
2270
2271 /*
2272  * for a tree node or leaf, we record its reference count, so later if we still
2273  * process this node or leaf, don't need to compute its reference count again.
2274  *
2275  * @bytenr  if @bytenr == (u64)-1, only update nrefs->full_backref[level]
2276  */
2277 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2278                              struct extent_buffer *eb, struct node_refs *nrefs,
2279                              u64 level, int check_all)
2280 {
2281         struct ulist *roots;
2282         u64 refs = 0;
2283         u64 flags = 0;
2284         int root_level = btrfs_header_level(root->node);
2285         int check;
2286         int ret;
2287
2288         if (nrefs->bytenr[level] == bytenr)
2289                 return 0;
2290
2291         if (bytenr != (u64)-1) {
2292                 /* the return value of this function seems a mistake */
2293                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2294                                        level, 1, &refs, &flags);
2295                 /* temporary fix */
2296                 if (ret < 0 && !check_all)
2297                         return ret;
2298
2299                 nrefs->bytenr[level] = bytenr;
2300                 nrefs->refs[level] = refs;
2301                 nrefs->full_backref[level] = 0;
2302                 nrefs->checked[level] = 0;
2303
2304                 if (refs > 1) {
2305                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2306                                                    0, &roots);
2307                         if (ret)
2308                                 return -EIO;
2309
2310                         check = need_check(root, roots);
2311                         ulist_free(roots);
2312                         nrefs->need_check[level] = check;
2313                 } else {
2314                         if (!check_all) {
2315                                 nrefs->need_check[level] = 1;
2316                         } else {
2317                                 if (level == root_level) {
2318                                         nrefs->need_check[level] = 1;
2319                                 } else {
2320                                         /*
2321                                          * The node refs may have not been
2322                                          * updated if upper needs checking (the
2323                                          * lowest root_objectid) the node can
2324                                          * be checked.
2325                                          */
2326                                         nrefs->need_check[level] =
2327                                                 nrefs->need_check[level + 1];
2328                                 }
2329                         }
2330                 }
2331         }
2332
2333         if (check_all && eb) {
2334                 calc_extent_flag_v2(root, eb, &flags);
2335                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2336                         nrefs->full_backref[level] = 1;
2337         }
2338
2339         return 0;
2340 }
2341
2342 /*
2343  * @level           if @level == -1 means extent data item
2344  *                  else normal treeblocl.
2345  */
2346 static int should_check_extent_strictly(struct btrfs_root *root,
2347                                         struct node_refs *nrefs, int level)
2348 {
2349         int root_level = btrfs_header_level(root->node);
2350
2351         if (level > root_level || level < -1)
2352                 return 1;
2353         if (level == root_level)
2354                 return 1;
2355         /*
2356          * if the upper node is marked full backref, it should contain shared
2357          * backref of the parent (except owner == root->objectid).
2358          */
2359         while (++level <= root_level)
2360                 if (nrefs->refs[level] > 1)
2361                         return 0;
2362
2363         return 1;
2364 }
2365
2366 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2367                           struct walk_control *wc, int *level,
2368                           struct node_refs *nrefs)
2369 {
2370         enum btrfs_tree_block_status status;
2371         u64 bytenr;
2372         u64 ptr_gen;
2373         struct btrfs_fs_info *fs_info = root->fs_info;
2374         struct extent_buffer *next;
2375         struct extent_buffer *cur;
2376         int ret, err = 0;
2377         u64 refs;
2378
2379         WARN_ON(*level < 0);
2380         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2381
2382         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2383                 refs = nrefs->refs[*level];
2384                 ret = 0;
2385         } else {
2386                 ret = btrfs_lookup_extent_info(NULL, root,
2387                                        path->nodes[*level]->start,
2388                                        *level, 1, &refs, NULL);
2389                 if (ret < 0) {
2390                         err = ret;
2391                         goto out;
2392                 }
2393                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2394                 nrefs->refs[*level] = refs;
2395         }
2396
2397         if (refs > 1) {
2398                 ret = enter_shared_node(root, path->nodes[*level]->start,
2399                                         refs, wc, *level);
2400                 if (ret > 0) {
2401                         err = ret;
2402                         goto out;
2403                 }
2404         }
2405
2406         while (*level >= 0) {
2407                 WARN_ON(*level < 0);
2408                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2409                 cur = path->nodes[*level];
2410
2411                 if (btrfs_header_level(cur) != *level)
2412                         WARN_ON(1);
2413
2414                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2415                         break;
2416                 if (*level == 0) {
2417                         ret = process_one_leaf(root, cur, wc);
2418                         if (ret < 0)
2419                                 err = ret;
2420                         break;
2421                 }
2422                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2423                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2424
2425                 if (bytenr == nrefs->bytenr[*level - 1]) {
2426                         refs = nrefs->refs[*level - 1];
2427                 } else {
2428                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2429                                         *level - 1, 1, &refs, NULL);
2430                         if (ret < 0) {
2431                                 refs = 0;
2432                         } else {
2433                                 nrefs->bytenr[*level - 1] = bytenr;
2434                                 nrefs->refs[*level - 1] = refs;
2435                         }
2436                 }
2437
2438                 if (refs > 1) {
2439                         ret = enter_shared_node(root, bytenr, refs,
2440                                                 wc, *level - 1);
2441                         if (ret > 0) {
2442                                 path->slots[*level]++;
2443                                 continue;
2444                         }
2445                 }
2446
2447                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2448                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2449                         free_extent_buffer(next);
2450                         reada_walk_down(root, cur, path->slots[*level]);
2451                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2452                         if (!extent_buffer_uptodate(next)) {
2453                                 struct btrfs_key node_key;
2454
2455                                 btrfs_node_key_to_cpu(path->nodes[*level],
2456                                                       &node_key,
2457                                                       path->slots[*level]);
2458                                 btrfs_add_corrupt_extent_record(root->fs_info,
2459                                                 &node_key,
2460                                                 path->nodes[*level]->start,
2461                                                 root->fs_info->nodesize,
2462                                                 *level);
2463                                 err = -EIO;
2464                                 goto out;
2465                         }
2466                 }
2467
2468                 ret = check_child_node(cur, path->slots[*level], next);
2469                 if (ret) {
2470                         free_extent_buffer(next);
2471                         err = ret;
2472                         goto out;
2473                 }
2474
2475                 if (btrfs_is_leaf(next))
2476                         status = btrfs_check_leaf(root, NULL, next);
2477                 else
2478                         status = btrfs_check_node(root, NULL, next);
2479                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2480                         free_extent_buffer(next);
2481                         err = -EIO;
2482                         goto out;
2483                 }
2484
2485                 *level = *level - 1;
2486                 free_extent_buffer(path->nodes[*level]);
2487                 path->nodes[*level] = next;
2488                 path->slots[*level] = 0;
2489         }
2490 out:
2491         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2492         return err;
2493 }
2494
2495 static int fs_root_objectid(u64 objectid);
2496
2497 /*
2498  * Update global fs information.
2499  */
2500 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2501                          int level)
2502 {
2503         u32 free_nrs;
2504         struct extent_buffer *eb = path->nodes[level];
2505
2506         total_btree_bytes += eb->len;
2507         if (fs_root_objectid(root->objectid))
2508                 total_fs_tree_bytes += eb->len;
2509         if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2510                 total_extent_tree_bytes += eb->len;
2511
2512         if (level == 0) {
2513                 btree_space_waste += btrfs_leaf_free_space(root, eb);
2514         } else {
2515                 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root) -
2516                             btrfs_header_nritems(eb));
2517                 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2518         }
2519 }
2520
2521 /*
2522  * This function only handles BACKREF_MISSING,
2523  * If corresponding extent item exists, increase the ref, else insert an extent
2524  * item and backref.
2525  *
2526  * Returns error bits after repair.
2527  */
2528 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2529                                  struct btrfs_root *root,
2530                                  struct extent_buffer *node,
2531                                  struct node_refs *nrefs, int level, int err)
2532 {
2533         struct btrfs_fs_info *fs_info = root->fs_info;
2534         struct btrfs_root *extent_root = fs_info->extent_root;
2535         struct btrfs_path path;
2536         struct btrfs_extent_item *ei;
2537         struct btrfs_tree_block_info *bi;
2538         struct btrfs_key key;
2539         struct extent_buffer *eb;
2540         u32 size = sizeof(*ei);
2541         u32 node_size = root->fs_info->nodesize;
2542         int insert_extent = 0;
2543         int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2544         int root_level = btrfs_header_level(root->node);
2545         int generation;
2546         int ret;
2547         u64 owner;
2548         u64 bytenr;
2549         u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2550         u64 parent = 0;
2551
2552         if ((err & BACKREF_MISSING) == 0)
2553                 return err;
2554
2555         WARN_ON(level > BTRFS_MAX_LEVEL);
2556         WARN_ON(level < 0);
2557
2558         btrfs_init_path(&path);
2559         bytenr = btrfs_header_bytenr(node);
2560         owner = btrfs_header_owner(node);
2561         generation = btrfs_header_generation(node);
2562
2563         key.objectid = bytenr;
2564         key.type = (u8)-1;
2565         key.offset = (u64)-1;
2566
2567         /* Search for the extent item */
2568         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2569         if (ret <= 0) {
2570                 ret = -EIO;
2571                 goto out;
2572         }
2573
2574         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2575         if (ret)
2576                 insert_extent = 1;
2577
2578         /* calculate if the extent item flag is full backref or not */
2579         if (nrefs->full_backref[level] != 0)
2580                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2581
2582         /* insert an extent item */
2583         if (insert_extent) {
2584                 struct btrfs_disk_key copy_key;
2585
2586                 generation = btrfs_header_generation(node);
2587
2588                 if (level < root_level && nrefs->full_backref[level + 1] &&
2589                     owner != root->objectid) {
2590                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2591                 }
2592
2593                 key.objectid = bytenr;
2594                 if (!skinny_metadata) {
2595                         key.type = BTRFS_EXTENT_ITEM_KEY;
2596                         key.offset = node_size;
2597                         size += sizeof(*bi);
2598                 } else {
2599                         key.type = BTRFS_METADATA_ITEM_KEY;
2600                         key.offset = level;
2601                 }
2602
2603                 btrfs_release_path(&path);
2604                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2605                                               size);
2606                 if (ret)
2607                         goto out;
2608
2609                 eb = path.nodes[0];
2610                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2611
2612                 btrfs_set_extent_refs(eb, ei, 0);
2613                 btrfs_set_extent_generation(eb, ei, generation);
2614                 btrfs_set_extent_flags(eb, ei, flags);
2615
2616                 if (!skinny_metadata) {
2617                         bi = (struct btrfs_tree_block_info *)(ei + 1);
2618                         memset_extent_buffer(eb, 0, (unsigned long)bi,
2619                                              sizeof(*bi));
2620                         btrfs_set_disk_key_objectid(&copy_key, root->objectid);
2621                         btrfs_set_disk_key_type(&copy_key, 0);
2622                         btrfs_set_disk_key_offset(&copy_key, 0);
2623
2624                         btrfs_set_tree_block_level(eb, bi, level);
2625                         btrfs_set_tree_block_key(eb, bi, &copy_key);
2626                 }
2627                 btrfs_mark_buffer_dirty(eb);
2628                 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2629                 btrfs_update_block_group(trans, extent_root, bytenr, node_size,
2630                                          1, 0);
2631
2632                 nrefs->refs[level] = 0;
2633                 nrefs->full_backref[level] =
2634                         flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2635                 btrfs_release_path(&path);
2636         }
2637
2638         if (level < root_level && nrefs->full_backref[level + 1] &&
2639             owner != root->objectid)
2640                 parent = nrefs->bytenr[level + 1];
2641
2642         /* increase the ref */
2643         ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2644                         parent, root->objectid, level, 0);
2645
2646         nrefs->refs[level]++;
2647 out:
2648         btrfs_release_path(&path);
2649         if (ret) {
2650                 error(
2651         "failed to repair tree block ref start %llu root %llu due to %s",
2652                       bytenr, root->objectid, strerror(-ret));
2653         } else {
2654                 printf("Added one tree block ref start %llu %s %llu\n",
2655                        bytenr, parent ? "parent" : "root",
2656                        parent ? parent : root->objectid);
2657                 err &= ~BACKREF_MISSING;
2658         }
2659
2660         return err;
2661 }
2662
2663 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2664                             unsigned int ext_ref);
2665 static int check_tree_block_ref(struct btrfs_root *root,
2666                                 struct extent_buffer *eb, u64 bytenr,
2667                                 int level, u64 owner, struct node_refs *nrefs);
2668 static int check_leaf_items(struct btrfs_trans_handle *trans,
2669                             struct btrfs_root *root, struct btrfs_path *path,
2670                             struct node_refs *nrefs, int account_bytes);
2671
2672 /*
2673  * @trans      just for lowmem repair mode
2674  * @check all  if not 0 then check all tree block backrefs and items
2675  *             0 then just check relationship of items in fs tree(s)
2676  *
2677  * Returns >0  Found error, should continue
2678  * Returns <0  Fatal error, must exit the whole check
2679  * Returns 0   No errors found
2680  */
2681 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2682                              struct btrfs_root *root, struct btrfs_path *path,
2683                              int *level, struct node_refs *nrefs, int ext_ref,
2684                              int check_all)
2685
2686 {
2687         enum btrfs_tree_block_status status;
2688         u64 bytenr;
2689         u64 ptr_gen;
2690         struct btrfs_fs_info *fs_info = root->fs_info;
2691         struct extent_buffer *next;
2692         struct extent_buffer *cur;
2693         int ret;
2694         int err = 0;
2695         int check;
2696         int account_file_data = 0;
2697
2698         WARN_ON(*level < 0);
2699         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2700
2701         ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2702                                 path->nodes[*level], nrefs, *level, check_all);
2703         if (ret < 0)
2704                 return ret;
2705
2706         while (*level >= 0) {
2707                 WARN_ON(*level < 0);
2708                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2709                 cur = path->nodes[*level];
2710                 bytenr = btrfs_header_bytenr(cur);
2711                 check = nrefs->need_check[*level];
2712
2713                 if (btrfs_header_level(cur) != *level)
2714                         WARN_ON(1);
2715                /*
2716                 * Update bytes accounting and check tree block ref
2717                 * NOTE: Doing accounting and check before checking nritems
2718                 * is necessary because of empty node/leaf.
2719                 */
2720                 if ((check_all && !nrefs->checked[*level]) ||
2721                     (!check_all && nrefs->need_check[*level])) {
2722                         ret = check_tree_block_ref(root, cur,
2723                            btrfs_header_bytenr(cur), btrfs_header_level(cur),
2724                            btrfs_header_owner(cur), nrefs);
2725
2726                         if (repair && ret)
2727                                 ret = repair_tree_block_ref(trans, root,
2728                                     path->nodes[*level], nrefs, *level, ret);
2729                         err |= ret;
2730
2731                         if (check_all && nrefs->need_check[*level] &&
2732                                 nrefs->refs[*level]) {
2733                                 account_bytes(root, path, *level);
2734                                 account_file_data = 1;
2735                         }
2736                         nrefs->checked[*level] = 1;
2737                 }
2738
2739                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2740                         break;
2741
2742                 /* Don't forgot to check leaf/node validation */
2743                 if (*level == 0) {
2744                         /* skip duplicate check */
2745                         if (check || !check_all) {
2746                                 ret = btrfs_check_leaf(root, NULL, cur);
2747                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2748                                         err |= -EIO;
2749                                         break;
2750                                 }
2751                         }
2752
2753                         ret = 0;
2754                         if (!check_all)
2755                                 ret = process_one_leaf_v2(root, path, nrefs,
2756                                                           level, ext_ref);
2757                         else
2758                                 ret = check_leaf_items(trans, root, path,
2759                                                nrefs, account_file_data);
2760                         err |= ret;
2761                         break;
2762                 } else {
2763                         if (check || !check_all) {
2764                                 ret = btrfs_check_node(root, NULL, cur);
2765                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2766                                         err |= -EIO;
2767                                         break;
2768                                 }
2769                         }
2770                 }
2771
2772                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2773                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2774
2775                 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2776                                         check_all);
2777                 if (ret < 0)
2778                         break;
2779                 /*
2780                  * check all trees in check_chunks_and_extent_v2
2781                  * check shared node once in check_fs_roots
2782                  */
2783                 if (!check_all && !nrefs->need_check[*level - 1]) {
2784                         path->slots[*level]++;
2785                         continue;
2786                 }
2787
2788                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2789                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2790                         free_extent_buffer(next);
2791                         reada_walk_down(root, cur, path->slots[*level]);
2792                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2793                         if (!extent_buffer_uptodate(next)) {
2794                                 struct btrfs_key node_key;
2795
2796                                 btrfs_node_key_to_cpu(path->nodes[*level],
2797                                                       &node_key,
2798                                                       path->slots[*level]);
2799                                 btrfs_add_corrupt_extent_record(fs_info,
2800                                         &node_key, path->nodes[*level]->start,
2801                                         fs_info->nodesize, *level);
2802                                 err |= -EIO;
2803                                 break;
2804                         }
2805                 }
2806
2807                 ret = check_child_node(cur, path->slots[*level], next);
2808                 err |= ret;
2809                 if (ret < 0) 
2810                         break;
2811
2812                 if (btrfs_is_leaf(next))
2813                         status = btrfs_check_leaf(root, NULL, next);
2814                 else
2815                         status = btrfs_check_node(root, NULL, next);
2816                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2817                         free_extent_buffer(next);
2818                         err |= -EIO;
2819                         break;
2820                 }
2821
2822                 *level = *level - 1;
2823                 free_extent_buffer(path->nodes[*level]);
2824                 path->nodes[*level] = next;
2825                 path->slots[*level] = 0;
2826                 account_file_data = 0;
2827
2828                 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2829         }
2830         return err;
2831 }
2832
2833 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2834                         struct walk_control *wc, int *level)
2835 {
2836         int i;
2837         struct extent_buffer *leaf;
2838
2839         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2840                 leaf = path->nodes[i];
2841                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2842                         path->slots[i]++;
2843                         *level = i;
2844                         return 0;
2845                 } else {
2846                         free_extent_buffer(path->nodes[*level]);
2847                         path->nodes[*level] = NULL;
2848                         BUG_ON(*level > wc->active_node);
2849                         if (*level == wc->active_node)
2850                                 leave_shared_node(root, wc, *level);
2851                         *level = i + 1;
2852                 }
2853         }
2854         return 1;
2855 }
2856
2857 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2858                            int *level)
2859 {
2860         int i;
2861         struct extent_buffer *leaf;
2862
2863         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2864                 leaf = path->nodes[i];
2865                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2866                         path->slots[i]++;
2867                         *level = i;
2868                         return 0;
2869                 } else {
2870                         free_extent_buffer(path->nodes[*level]);
2871                         path->nodes[*level] = NULL;
2872                         *level = i + 1;
2873                 }
2874         }
2875         return 1;
2876 }
2877
2878 static int check_root_dir(struct inode_record *rec)
2879 {
2880         struct inode_backref *backref;
2881         int ret = -1;
2882
2883         if (!rec->found_inode_item || rec->errors)
2884                 goto out;
2885         if (rec->nlink != 1 || rec->found_link != 0)
2886                 goto out;
2887         if (list_empty(&rec->backrefs))
2888                 goto out;
2889         backref = to_inode_backref(rec->backrefs.next);
2890         if (!backref->found_inode_ref)
2891                 goto out;
2892         if (backref->index != 0 || backref->namelen != 2 ||
2893             memcmp(backref->name, "..", 2))
2894                 goto out;
2895         if (backref->found_dir_index || backref->found_dir_item)
2896                 goto out;
2897         ret = 0;
2898 out:
2899         return ret;
2900 }
2901
2902 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2903                               struct btrfs_root *root, struct btrfs_path *path,
2904                               struct inode_record *rec)
2905 {
2906         struct btrfs_inode_item *ei;
2907         struct btrfs_key key;
2908         int ret;
2909
2910         key.objectid = rec->ino;
2911         key.type = BTRFS_INODE_ITEM_KEY;
2912         key.offset = (u64)-1;
2913
2914         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2915         if (ret < 0)
2916                 goto out;
2917         if (ret) {
2918                 if (!path->slots[0]) {
2919                         ret = -ENOENT;
2920                         goto out;
2921                 }
2922                 path->slots[0]--;
2923                 ret = 0;
2924         }
2925         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2926         if (key.objectid != rec->ino) {
2927                 ret = -ENOENT;
2928                 goto out;
2929         }
2930
2931         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2932                             struct btrfs_inode_item);
2933         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2934         btrfs_mark_buffer_dirty(path->nodes[0]);
2935         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2936         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2937                root->root_key.objectid);
2938 out:
2939         btrfs_release_path(path);
2940         return ret;
2941 }
2942
2943 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2944                                     struct btrfs_root *root,
2945                                     struct btrfs_path *path,
2946                                     struct inode_record *rec)
2947 {
2948         int ret;
2949
2950         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2951         btrfs_release_path(path);
2952         if (!ret)
2953                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2954         return ret;
2955 }
2956
2957 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2958                                struct btrfs_root *root,
2959                                struct btrfs_path *path,
2960                                struct inode_record *rec)
2961 {
2962         struct btrfs_inode_item *ei;
2963         struct btrfs_key key;
2964         int ret = 0;
2965
2966         key.objectid = rec->ino;
2967         key.type = BTRFS_INODE_ITEM_KEY;
2968         key.offset = 0;
2969
2970         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2971         if (ret) {
2972                 if (ret > 0)
2973                         ret = -ENOENT;
2974                 goto out;
2975         }
2976
2977         /* Since ret == 0, no need to check anything */
2978         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2979                             struct btrfs_inode_item);
2980         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2981         btrfs_mark_buffer_dirty(path->nodes[0]);
2982         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2983         printf("reset nbytes for ino %llu root %llu\n",
2984                rec->ino, root->root_key.objectid);
2985 out:
2986         btrfs_release_path(path);
2987         return ret;
2988 }
2989
2990 static int add_missing_dir_index(struct btrfs_root *root,
2991                                  struct cache_tree *inode_cache,
2992                                  struct inode_record *rec,
2993                                  struct inode_backref *backref)
2994 {
2995         struct btrfs_path path;
2996         struct btrfs_trans_handle *trans;
2997         struct btrfs_dir_item *dir_item;
2998         struct extent_buffer *leaf;
2999         struct btrfs_key key;
3000         struct btrfs_disk_key disk_key;
3001         struct inode_record *dir_rec;
3002         unsigned long name_ptr;
3003         u32 data_size = sizeof(*dir_item) + backref->namelen;
3004         int ret;
3005
3006         trans = btrfs_start_transaction(root, 1);
3007         if (IS_ERR(trans))
3008                 return PTR_ERR(trans);
3009
3010         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
3011                 (unsigned long long)rec->ino);
3012
3013         btrfs_init_path(&path);
3014         key.objectid = backref->dir;
3015         key.type = BTRFS_DIR_INDEX_KEY;
3016         key.offset = backref->index;
3017         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
3018         BUG_ON(ret);
3019
3020         leaf = path.nodes[0];
3021         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
3022
3023         disk_key.objectid = cpu_to_le64(rec->ino);
3024         disk_key.type = BTRFS_INODE_ITEM_KEY;
3025         disk_key.offset = 0;
3026
3027         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
3028         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
3029         btrfs_set_dir_data_len(leaf, dir_item, 0);
3030         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
3031         name_ptr = (unsigned long)(dir_item + 1);
3032         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
3033         btrfs_mark_buffer_dirty(leaf);
3034         btrfs_release_path(&path);
3035         btrfs_commit_transaction(trans, root);
3036
3037         backref->found_dir_index = 1;
3038         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
3039         BUG_ON(IS_ERR(dir_rec));
3040         if (!dir_rec)
3041                 return 0;
3042         dir_rec->found_size += backref->namelen;
3043         if (dir_rec->found_size == dir_rec->isize &&
3044             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
3045                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
3046         if (dir_rec->found_size != dir_rec->isize)
3047                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
3048
3049         return 0;
3050 }
3051
3052 static int delete_dir_index(struct btrfs_root *root,
3053                             struct inode_backref *backref)
3054 {
3055         struct btrfs_trans_handle *trans;
3056         struct btrfs_dir_item *di;
3057         struct btrfs_path path;
3058         int ret = 0;
3059
3060         trans = btrfs_start_transaction(root, 1);
3061         if (IS_ERR(trans))
3062                 return PTR_ERR(trans);
3063
3064         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
3065                 (unsigned long long)backref->dir,
3066                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
3067                 (unsigned long long)root->objectid);
3068
3069         btrfs_init_path(&path);
3070         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
3071                                     backref->name, backref->namelen,
3072                                     backref->index, -1);
3073         if (IS_ERR(di)) {
3074                 ret = PTR_ERR(di);
3075                 btrfs_release_path(&path);
3076                 btrfs_commit_transaction(trans, root);
3077                 if (ret == -ENOENT)
3078                         return 0;
3079                 return ret;
3080         }
3081
3082         if (!di)
3083                 ret = btrfs_del_item(trans, root, &path);
3084         else
3085                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
3086         BUG_ON(ret);
3087         btrfs_release_path(&path);
3088         btrfs_commit_transaction(trans, root);
3089         return ret;
3090 }
3091
3092 static int __create_inode_item(struct btrfs_trans_handle *trans,
3093                                struct btrfs_root *root, u64 ino, u64 size,
3094                                u64 nbytes, u64 nlink, u32 mode)
3095 {
3096         struct btrfs_inode_item ii;
3097         time_t now = time(NULL);
3098         int ret;
3099
3100         btrfs_set_stack_inode_size(&ii, size);
3101         btrfs_set_stack_inode_nbytes(&ii, nbytes);
3102         btrfs_set_stack_inode_nlink(&ii, nlink);
3103         btrfs_set_stack_inode_mode(&ii, mode);
3104         btrfs_set_stack_inode_generation(&ii, trans->transid);
3105         btrfs_set_stack_timespec_nsec(&ii.atime, 0);
3106         btrfs_set_stack_timespec_sec(&ii.ctime, now);
3107         btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
3108         btrfs_set_stack_timespec_sec(&ii.mtime, now);
3109         btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
3110         btrfs_set_stack_timespec_sec(&ii.otime, 0);
3111         btrfs_set_stack_timespec_nsec(&ii.otime, 0);
3112
3113         ret = btrfs_insert_inode(trans, root, ino, &ii);
3114         ASSERT(!ret);
3115
3116         warning("root %llu inode %llu recreating inode item, this may "
3117                 "be incomplete, please check permissions and content after "
3118                 "the fsck completes.\n", (unsigned long long)root->objectid,
3119                 (unsigned long long)ino);
3120
3121         return 0;
3122 }
3123
3124 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
3125                                     struct btrfs_root *root, u64 ino,
3126                                     u8 filetype)
3127 {
3128         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
3129
3130         return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
3131 }
3132
3133 static int create_inode_item(struct btrfs_root *root,
3134                              struct inode_record *rec, int root_dir)
3135 {
3136         struct btrfs_trans_handle *trans;
3137         u64 nlink = 0;
3138         u32 mode = 0;
3139         u64 size = 0;
3140         int ret;
3141
3142         trans = btrfs_start_transaction(root, 1);
3143         if (IS_ERR(trans)) {
3144                 ret = PTR_ERR(trans);
3145                 return ret;
3146         }
3147
3148         nlink = root_dir ? 1 : rec->found_link;
3149         if (rec->found_dir_item) {
3150                 if (rec->found_file_extent)
3151                         fprintf(stderr, "root %llu inode %llu has both a dir "
3152                                 "item and extents, unsure if it is a dir or a "
3153                                 "regular file so setting it as a directory\n",
3154                                 (unsigned long long)root->objectid,
3155                                 (unsigned long long)rec->ino);
3156                 mode = S_IFDIR | 0755;
3157                 size = rec->found_size;
3158         } else if (!rec->found_dir_item) {
3159                 size = rec->extent_end;
3160                 mode =  S_IFREG | 0755;
3161         }
3162
3163         ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
3164                                   nlink, mode);
3165         btrfs_commit_transaction(trans, root);
3166         return 0;
3167 }
3168
3169 static int repair_inode_backrefs(struct btrfs_root *root,
3170                                  struct inode_record *rec,
3171                                  struct cache_tree *inode_cache,
3172                                  int delete)
3173 {
3174         struct inode_backref *tmp, *backref;
3175         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3176         int ret = 0;
3177         int repaired = 0;
3178
3179         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3180                 if (!delete && rec->ino == root_dirid) {
3181                         if (!rec->found_inode_item) {
3182                                 ret = create_inode_item(root, rec, 1);
3183                                 if (ret)
3184                                         break;
3185                                 repaired++;
3186                         }
3187                 }
3188
3189                 /* Index 0 for root dir's are special, don't mess with it */
3190                 if (rec->ino == root_dirid && backref->index == 0)
3191                         continue;
3192
3193                 if (delete &&
3194                     ((backref->found_dir_index && !backref->found_inode_ref) ||
3195                      (backref->found_dir_index && backref->found_inode_ref &&
3196                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
3197                         ret = delete_dir_index(root, backref);
3198                         if (ret)
3199                                 break;
3200                         repaired++;
3201                         list_del(&backref->list);
3202                         free(backref);
3203                         continue;
3204                 }
3205
3206                 if (!delete && !backref->found_dir_index &&
3207                     backref->found_dir_item && backref->found_inode_ref) {
3208                         ret = add_missing_dir_index(root, inode_cache, rec,
3209                                                     backref);
3210                         if (ret)
3211                                 break;
3212                         repaired++;
3213                         if (backref->found_dir_item &&
3214                             backref->found_dir_index) {
3215                                 if (!backref->errors &&
3216                                     backref->found_inode_ref) {
3217                                         list_del(&backref->list);
3218                                         free(backref);
3219                                         continue;
3220                                 }
3221                         }
3222                 }
3223
3224                 if (!delete && (!backref->found_dir_index &&
3225                                 !backref->found_dir_item &&
3226                                 backref->found_inode_ref)) {
3227                         struct btrfs_trans_handle *trans;
3228                         struct btrfs_key location;
3229
3230                         ret = check_dir_conflict(root, backref->name,
3231                                                  backref->namelen,
3232                                                  backref->dir,
3233                                                  backref->index);
3234                         if (ret) {
3235                                 /*
3236                                  * let nlink fixing routine to handle it,
3237                                  * which can do it better.
3238                                  */
3239                                 ret = 0;
3240                                 break;
3241                         }
3242                         location.objectid = rec->ino;
3243                         location.type = BTRFS_INODE_ITEM_KEY;
3244                         location.offset = 0;
3245
3246                         trans = btrfs_start_transaction(root, 1);
3247                         if (IS_ERR(trans)) {
3248                                 ret = PTR_ERR(trans);
3249                                 break;
3250                         }
3251                         fprintf(stderr, "adding missing dir index/item pair "
3252                                 "for inode %llu\n",
3253                                 (unsigned long long)rec->ino);
3254                         ret = btrfs_insert_dir_item(trans, root, backref->name,
3255                                                     backref->namelen,
3256                                                     backref->dir, &location,
3257                                                     imode_to_type(rec->imode),
3258                                                     backref->index);
3259                         BUG_ON(ret);
3260                         btrfs_commit_transaction(trans, root);
3261                         repaired++;
3262                 }
3263
3264                 if (!delete && (backref->found_inode_ref &&
3265                                 backref->found_dir_index &&
3266                                 backref->found_dir_item &&
3267                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
3268                                 !rec->found_inode_item)) {
3269                         ret = create_inode_item(root, rec, 0);
3270                         if (ret)
3271                                 break;
3272                         repaired++;
3273                 }
3274
3275         }
3276         return ret ? ret : repaired;
3277 }
3278
3279 /*
3280  * To determine the file type for nlink/inode_item repair
3281  *
3282  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
3283  * Return -ENOENT if file type is not found.
3284  */
3285 static int find_file_type(struct inode_record *rec, u8 *type)
3286 {
3287         struct inode_backref *backref;
3288
3289         /* For inode item recovered case */
3290         if (rec->found_inode_item) {
3291                 *type = imode_to_type(rec->imode);
3292                 return 0;
3293         }
3294
3295         list_for_each_entry(backref, &rec->backrefs, list) {
3296                 if (backref->found_dir_index || backref->found_dir_item) {
3297                         *type = backref->filetype;
3298                         return 0;
3299                 }
3300         }
3301         return -ENOENT;
3302 }
3303
3304 /*
3305  * To determine the file name for nlink repair
3306  *
3307  * Return 0 if file name is found, set name and namelen.
3308  * Return -ENOENT if file name is not found.
3309  */
3310 static int find_file_name(struct inode_record *rec,
3311                           char *name, int *namelen)
3312 {
3313         struct inode_backref *backref;
3314
3315         list_for_each_entry(backref, &rec->backrefs, list) {
3316                 if (backref->found_dir_index || backref->found_dir_item ||
3317                     backref->found_inode_ref) {
3318                         memcpy(name, backref->name, backref->namelen);
3319                         *namelen = backref->namelen;
3320                         return 0;
3321                 }
3322         }
3323         return -ENOENT;
3324 }
3325
3326 /* Reset the nlink of the inode to the correct one */
3327 static int reset_nlink(struct btrfs_trans_handle *trans,
3328                        struct btrfs_root *root,
3329                        struct btrfs_path *path,
3330                        struct inode_record *rec)
3331 {
3332         struct inode_backref *backref;
3333         struct inode_backref *tmp;
3334         struct btrfs_key key;
3335         struct btrfs_inode_item *inode_item;
3336         int ret = 0;
3337
3338         /* We don't believe this either, reset it and iterate backref */
3339         rec->found_link = 0;
3340
3341         /* Remove all backref including the valid ones */
3342         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3343                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3344                                    backref->index, backref->name,
3345                                    backref->namelen, 0);
3346                 if (ret < 0)
3347                         goto out;
3348
3349                 /* remove invalid backref, so it won't be added back */
3350                 if (!(backref->found_dir_index &&
3351                       backref->found_dir_item &&
3352                       backref->found_inode_ref)) {
3353                         list_del(&backref->list);
3354                         free(backref);
3355                 } else {
3356                         rec->found_link++;
3357                 }
3358         }
3359
3360         /* Set nlink to 0 */
3361         key.objectid = rec->ino;
3362         key.type = BTRFS_INODE_ITEM_KEY;
3363         key.offset = 0;
3364         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3365         if (ret < 0)
3366                 goto out;
3367         if (ret > 0) {
3368                 ret = -ENOENT;
3369                 goto out;
3370         }
3371         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3372                                     struct btrfs_inode_item);
3373         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3374         btrfs_mark_buffer_dirty(path->nodes[0]);
3375         btrfs_release_path(path);
3376
3377         /*
3378          * Add back valid inode_ref/dir_item/dir_index,
3379          * add_link() will handle the nlink inc, so new nlink must be correct
3380          */
3381         list_for_each_entry(backref, &rec->backrefs, list) {
3382                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3383                                      backref->name, backref->namelen,
3384                                      backref->filetype, &backref->index, 1, 0);
3385                 if (ret < 0)
3386                         goto out;
3387         }
3388 out:
3389         btrfs_release_path(path);
3390         return ret;
3391 }
3392
3393 static int get_highest_inode(struct btrfs_trans_handle *trans,
3394                                 struct btrfs_root *root,
3395                                 struct btrfs_path *path,
3396                                 u64 *highest_ino)
3397 {
3398         struct btrfs_key key, found_key;
3399         int ret;
3400
3401         btrfs_init_path(path);
3402         key.objectid = BTRFS_LAST_FREE_OBJECTID;
3403         key.offset = -1;
3404         key.type = BTRFS_INODE_ITEM_KEY;
3405         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3406         if (ret == 1) {
3407                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3408                                 path->slots[0] - 1);
3409                 *highest_ino = found_key.objectid;
3410                 ret = 0;
3411         }
3412         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3413                 ret = -EOVERFLOW;
3414         btrfs_release_path(path);
3415         return ret;
3416 }
3417
3418 /*
3419  * Link inode to dir 'lost+found'. Increase @ref_count.
3420  *
3421  * Returns 0 means success.
3422  * Returns <0 means failure.
3423  */
3424 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3425                                    struct btrfs_root *root,
3426                                    struct btrfs_path *path,
3427                                    u64 ino, char *namebuf, u32 name_len,
3428                                    u8 filetype, u64 *ref_count)
3429 {
3430         char *dir_name = "lost+found";
3431         u64 lost_found_ino;
3432         int ret;
3433         u32 mode = 0700;
3434
3435         btrfs_release_path(path);
3436         ret = get_highest_inode(trans, root, path, &lost_found_ino);
3437         if (ret < 0)
3438                 goto out;
3439         lost_found_ino++;
3440
3441         ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3442                           BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3443                           mode);
3444         if (ret < 0) {
3445                 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3446                 goto out;
3447         }
3448         ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3449                              namebuf, name_len, filetype, NULL, 1, 0);
3450         /*
3451          * Add ".INO" suffix several times to handle case where
3452          * "FILENAME.INO" is already taken by another file.
3453          */
3454         while (ret == -EEXIST) {
3455                 /*
3456                  * Conflicting file name, add ".INO" as suffix * +1 for '.'
3457                  */
3458                 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3459                         ret = -EFBIG;
3460                         goto out;
3461                 }
3462                 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3463                          ".%llu", ino);
3464                 name_len += count_digits(ino) + 1;
3465                 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3466                                      name_len, filetype, NULL, 1, 0);
3467         }
3468         if (ret < 0) {
3469                 error("failed to link the inode %llu to %s dir: %s",
3470                       ino, dir_name, strerror(-ret));
3471                 goto out;
3472         }
3473
3474         ++*ref_count;
3475         printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3476                name_len, namebuf, dir_name);
3477 out:
3478         btrfs_release_path(path);
3479         if (ret)
3480                 error("failed to move file '%.*s' to '%s' dir", name_len,
3481                                 namebuf, dir_name);
3482         return ret;
3483 }
3484
3485 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3486                                struct btrfs_root *root,
3487                                struct btrfs_path *path,
3488                                struct inode_record *rec)
3489 {
3490         char namebuf[BTRFS_NAME_LEN] = {0};
3491         u8 type = 0;
3492         int namelen = 0;
3493         int name_recovered = 0;
3494         int type_recovered = 0;
3495         int ret = 0;
3496
3497         /*
3498          * Get file name and type first before these invalid inode ref
3499          * are deleted by remove_all_invalid_backref()
3500          */
3501         name_recovered = !find_file_name(rec, namebuf, &namelen);
3502         type_recovered = !find_file_type(rec, &type);
3503
3504         if (!name_recovered) {
3505                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3506                        rec->ino, rec->ino);
3507                 namelen = count_digits(rec->ino);
3508                 sprintf(namebuf, "%llu", rec->ino);
3509                 name_recovered = 1;
3510         }
3511         if (!type_recovered) {
3512                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3513                        rec->ino);
3514                 type = BTRFS_FT_REG_FILE;
3515                 type_recovered = 1;
3516         }
3517
3518         ret = reset_nlink(trans, root, path, rec);
3519         if (ret < 0) {
3520                 fprintf(stderr,
3521                         "Failed to reset nlink for inode %llu: %s\n",
3522                         rec->ino, strerror(-ret));
3523                 goto out;
3524         }
3525
3526         if (rec->found_link == 0) {
3527                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3528                                               namebuf, namelen, type,
3529                                               (u64 *)&rec->found_link);
3530                 if (ret)
3531                         goto out;
3532         }
3533         printf("Fixed the nlink of inode %llu\n", rec->ino);
3534 out:
3535         /*
3536          * Clear the flag anyway, or we will loop forever for the same inode
3537          * as it will not be removed from the bad inode list and the dead loop
3538          * happens.
3539          */
3540         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3541         btrfs_release_path(path);
3542         return ret;
3543 }
3544
3545 /*
3546  * Check if there is any normal(reg or prealloc) file extent for given
3547  * ino.
3548  * This is used to determine the file type when neither its dir_index/item or
3549  * inode_item exists.
3550  *
3551  * This will *NOT* report error, if any error happens, just consider it does
3552  * not have any normal file extent.
3553  */
3554 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3555 {
3556         struct btrfs_path path;
3557         struct btrfs_key key;
3558         struct btrfs_key found_key;
3559         struct btrfs_file_extent_item *fi;
3560         u8 type;
3561         int ret = 0;
3562
3563         btrfs_init_path(&path);
3564         key.objectid = ino;
3565         key.type = BTRFS_EXTENT_DATA_KEY;
3566         key.offset = 0;
3567
3568         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3569         if (ret < 0) {
3570                 ret = 0;
3571                 goto out;
3572         }
3573         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3574                 ret = btrfs_next_leaf(root, &path);
3575                 if (ret) {
3576                         ret = 0;
3577                         goto out;
3578                 }
3579         }
3580         while (1) {
3581                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3582                                       path.slots[0]);
3583                 if (found_key.objectid != ino ||
3584                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3585                         break;
3586                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3587                                     struct btrfs_file_extent_item);
3588                 type = btrfs_file_extent_type(path.nodes[0], fi);
3589                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3590                         ret = 1;
3591                         goto out;
3592                 }
3593         }
3594 out:
3595         btrfs_release_path(&path);
3596         return ret;
3597 }
3598
3599 static u32 btrfs_type_to_imode(u8 type)
3600 {
3601         static u32 imode_by_btrfs_type[] = {
3602                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3603                 [BTRFS_FT_DIR]          = S_IFDIR,
3604                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3605                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3606                 [BTRFS_FT_FIFO]         = S_IFIFO,
3607                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3608                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3609         };
3610
3611         return imode_by_btrfs_type[(type)];
3612 }
3613
3614 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3615                                 struct btrfs_root *root,
3616                                 struct btrfs_path *path,
3617                                 struct inode_record *rec)
3618 {
3619         u8 filetype;
3620         u32 mode = 0700;
3621         int type_recovered = 0;
3622         int ret = 0;
3623
3624         printf("Trying to rebuild inode:%llu\n", rec->ino);
3625
3626         type_recovered = !find_file_type(rec, &filetype);
3627
3628         /*
3629          * Try to determine inode type if type not found.
3630          *
3631          * For found regular file extent, it must be FILE.
3632          * For found dir_item/index, it must be DIR.
3633          *
3634          * For undetermined one, use FILE as fallback.
3635          *
3636          * TODO:
3637          * 1. If found backref(inode_index/item is already handled) to it,
3638          *    it must be DIR.
3639          *    Need new inode-inode ref structure to allow search for that.
3640          */
3641         if (!type_recovered) {
3642                 if (rec->found_file_extent &&
3643                     find_normal_file_extent(root, rec->ino)) {
3644                         type_recovered = 1;
3645                         filetype = BTRFS_FT_REG_FILE;
3646                 } else if (rec->found_dir_item) {
3647                         type_recovered = 1;
3648                         filetype = BTRFS_FT_DIR;
3649                 } else if (!list_empty(&rec->orphan_extents)) {
3650                         type_recovered = 1;
3651                         filetype = BTRFS_FT_REG_FILE;
3652                 } else{
3653                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3654                                rec->ino);
3655                         type_recovered = 1;
3656                         filetype = BTRFS_FT_REG_FILE;
3657                 }
3658         }
3659
3660         ret = btrfs_new_inode(trans, root, rec->ino,
3661                               mode | btrfs_type_to_imode(filetype));
3662         if (ret < 0)
3663                 goto out;
3664
3665         /*
3666          * Here inode rebuild is done, we only rebuild the inode item,
3667          * don't repair the nlink(like move to lost+found).
3668          * That is the job of nlink repair.
3669          *
3670          * We just fill the record and return
3671          */
3672         rec->found_dir_item = 1;
3673         rec->imode = mode | btrfs_type_to_imode(filetype);
3674         rec->nlink = 0;
3675         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3676         /* Ensure the inode_nlinks repair function will be called */
3677         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3678 out:
3679         return ret;
3680 }
3681
3682 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3683                                       struct btrfs_root *root,
3684                                       struct btrfs_path *path,
3685                                       struct inode_record *rec)
3686 {
3687         struct orphan_data_extent *orphan;
3688         struct orphan_data_extent *tmp;
3689         int ret = 0;
3690
3691         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3692                 /*
3693                  * Check for conflicting file extents
3694                  *
3695                  * Here we don't know whether the extents is compressed or not,
3696                  * so we can only assume it not compressed nor data offset,
3697                  * and use its disk_len as extent length.
3698                  */
3699                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3700                                        orphan->offset, orphan->disk_len, 0);
3701                 btrfs_release_path(path);
3702                 if (ret < 0)
3703                         goto out;
3704                 if (!ret) {
3705                         fprintf(stderr,
3706                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3707                                 orphan->disk_bytenr, orphan->disk_len);
3708                         ret = btrfs_free_extent(trans,
3709                                         root->fs_info->extent_root,
3710                                         orphan->disk_bytenr, orphan->disk_len,
3711                                         0, root->objectid, orphan->objectid,
3712                                         orphan->offset);
3713                         if (ret < 0)
3714                                 goto out;
3715                 }
3716                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3717                                 orphan->offset, orphan->disk_bytenr,
3718                                 orphan->disk_len, orphan->disk_len);
3719                 if (ret < 0)
3720                         goto out;
3721
3722                 /* Update file size info */
3723                 rec->found_size += orphan->disk_len;
3724                 if (rec->found_size == rec->nbytes)
3725                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3726
3727                 /* Update the file extent hole info too */
3728                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3729                                            orphan->disk_len);
3730                 if (ret < 0)
3731                         goto out;
3732                 if (RB_EMPTY_ROOT(&rec->holes))
3733                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3734
3735                 list_del(&orphan->list);
3736                 free(orphan);
3737         }
3738         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3739 out:
3740         return ret;
3741 }
3742
3743 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3744                                         struct btrfs_root *root,
3745                                         struct btrfs_path *path,
3746                                         struct inode_record *rec)
3747 {
3748         struct rb_node *node;
3749         struct file_extent_hole *hole;
3750         int found = 0;
3751         int ret = 0;
3752
3753         node = rb_first(&rec->holes);
3754
3755         while (node) {
3756                 found = 1;
3757                 hole = rb_entry(node, struct file_extent_hole, node);
3758                 ret = btrfs_punch_hole(trans, root, rec->ino,
3759                                        hole->start, hole->len);
3760                 if (ret < 0)
3761                         goto out;
3762                 ret = del_file_extent_hole(&rec->holes, hole->start,
3763                                            hole->len);
3764                 if (ret < 0)
3765                         goto out;
3766                 if (RB_EMPTY_ROOT(&rec->holes))
3767                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3768                 node = rb_first(&rec->holes);
3769         }
3770         /* special case for a file losing all its file extent */
3771         if (!found) {
3772                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3773                                        round_up(rec->isize,
3774                                                 root->fs_info->sectorsize));
3775                 if (ret < 0)
3776                         goto out;
3777         }
3778         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3779                rec->ino, root->objectid);
3780 out:
3781         return ret;
3782 }
3783
3784 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3785 {
3786         struct btrfs_trans_handle *trans;
3787         struct btrfs_path path;
3788         int ret = 0;
3789
3790         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3791                              I_ERR_NO_ORPHAN_ITEM |
3792                              I_ERR_LINK_COUNT_WRONG |
3793                              I_ERR_NO_INODE_ITEM |
3794                              I_ERR_FILE_EXTENT_ORPHAN |
3795                              I_ERR_FILE_EXTENT_DISCOUNT|
3796                              I_ERR_FILE_NBYTES_WRONG)))
3797                 return rec->errors;
3798
3799         /*
3800          * For nlink repair, it may create a dir and add link, so
3801          * 2 for parent(256)'s dir_index and dir_item
3802          * 2 for lost+found dir's inode_item and inode_ref
3803          * 1 for the new inode_ref of the file
3804          * 2 for lost+found dir's dir_index and dir_item for the file
3805          */
3806         trans = btrfs_start_transaction(root, 7);
3807         if (IS_ERR(trans))
3808                 return PTR_ERR(trans);
3809
3810         btrfs_init_path(&path);
3811         if (rec->errors & I_ERR_NO_INODE_ITEM)
3812                 ret = repair_inode_no_item(trans, root, &path, rec);
3813         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3814                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3815         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3816                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3817         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3818                 ret = repair_inode_isize(trans, root, &path, rec);
3819         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3820                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3821         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3822                 ret = repair_inode_nlinks(trans, root, &path, rec);
3823         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3824                 ret = repair_inode_nbytes(trans, root, &path, rec);
3825         btrfs_commit_transaction(trans, root);
3826         btrfs_release_path(&path);
3827         return ret;
3828 }
3829
3830 static int check_inode_recs(struct btrfs_root *root,
3831                             struct cache_tree *inode_cache)
3832 {
3833         struct cache_extent *cache;
3834         struct ptr_node *node;
3835         struct inode_record *rec;
3836         struct inode_backref *backref;
3837         int stage = 0;
3838         int ret = 0;
3839         int err = 0;
3840         u64 error = 0;
3841         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3842
3843         if (btrfs_root_refs(&root->root_item) == 0) {
3844                 if (!cache_tree_empty(inode_cache))
3845                         fprintf(stderr, "warning line %d\n", __LINE__);
3846                 return 0;
3847         }
3848
3849         /*
3850          * We need to repair backrefs first because we could change some of the
3851          * errors in the inode recs.
3852          *
3853          * We also need to go through and delete invalid backrefs first and then
3854          * add the correct ones second.  We do this because we may get EEXIST
3855          * when adding back the correct index because we hadn't yet deleted the
3856          * invalid index.
3857          *
3858          * For example, if we were missing a dir index then the directories
3859          * isize would be wrong, so if we fixed the isize to what we thought it
3860          * would be and then fixed the backref we'd still have a invalid fs, so
3861          * we need to add back the dir index and then check to see if the isize
3862          * is still wrong.
3863          */
3864         while (stage < 3) {
3865                 stage++;
3866                 if (stage == 3 && !err)
3867                         break;
3868
3869                 cache = search_cache_extent(inode_cache, 0);
3870                 while (repair && cache) {
3871                         node = container_of(cache, struct ptr_node, cache);
3872                         rec = node->data;
3873                         cache = next_cache_extent(cache);
3874
3875                         /* Need to free everything up and rescan */
3876                         if (stage == 3) {
3877                                 remove_cache_extent(inode_cache, &node->cache);
3878                                 free(node);
3879                                 free_inode_rec(rec);
3880                                 continue;
3881                         }
3882
3883                         if (list_empty(&rec->backrefs))
3884                                 continue;
3885
3886                         ret = repair_inode_backrefs(root, rec, inode_cache,
3887                                                     stage == 1);
3888                         if (ret < 0) {
3889                                 err = ret;
3890                                 stage = 2;
3891                                 break;
3892                         } if (ret > 0) {
3893                                 err = -EAGAIN;
3894                         }
3895                 }
3896         }
3897         if (err)
3898                 return err;
3899
3900         rec = get_inode_rec(inode_cache, root_dirid, 0);
3901         BUG_ON(IS_ERR(rec));
3902         if (rec) {
3903                 ret = check_root_dir(rec);
3904                 if (ret) {
3905                         fprintf(stderr, "root %llu root dir %llu error\n",
3906                                 (unsigned long long)root->root_key.objectid,
3907                                 (unsigned long long)root_dirid);
3908                         print_inode_error(root, rec);
3909                         error++;
3910                 }
3911         } else {
3912                 if (repair) {
3913                         struct btrfs_trans_handle *trans;
3914
3915                         trans = btrfs_start_transaction(root, 1);
3916                         if (IS_ERR(trans)) {
3917                                 err = PTR_ERR(trans);
3918                                 return err;
3919                         }
3920
3921                         fprintf(stderr,
3922                                 "root %llu missing its root dir, recreating\n",
3923                                 (unsigned long long)root->objectid);
3924
3925                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3926                         BUG_ON(ret);
3927
3928                         btrfs_commit_transaction(trans, root);
3929                         return -EAGAIN;
3930                 }
3931
3932                 fprintf(stderr, "root %llu root dir %llu not found\n",
3933                         (unsigned long long)root->root_key.objectid,
3934                         (unsigned long long)root_dirid);
3935         }
3936
3937         while (1) {
3938                 cache = search_cache_extent(inode_cache, 0);
3939                 if (!cache)
3940                         break;
3941                 node = container_of(cache, struct ptr_node, cache);
3942                 rec = node->data;
3943                 remove_cache_extent(inode_cache, &node->cache);
3944                 free(node);
3945                 if (rec->ino == root_dirid ||
3946                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3947                         free_inode_rec(rec);
3948                         continue;
3949                 }
3950
3951                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3952                         ret = check_orphan_item(root, rec->ino);
3953                         if (ret == 0)
3954                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3955                         if (can_free_inode_rec(rec)) {
3956                                 free_inode_rec(rec);
3957                                 continue;
3958                         }
3959                 }
3960
3961                 if (!rec->found_inode_item)
3962                         rec->errors |= I_ERR_NO_INODE_ITEM;
3963                 if (rec->found_link != rec->nlink)
3964                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3965                 if (repair) {
3966                         ret = try_repair_inode(root, rec);
3967                         if (ret == 0 && can_free_inode_rec(rec)) {
3968                                 free_inode_rec(rec);
3969                                 continue;
3970                         }
3971                         ret = 0;
3972                 }
3973
3974                 if (!(repair && ret == 0))
3975                         error++;
3976                 print_inode_error(root, rec);
3977                 list_for_each_entry(backref, &rec->backrefs, list) {
3978                         if (!backref->found_dir_item)
3979                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3980                         if (!backref->found_dir_index)
3981                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3982                         if (!backref->found_inode_ref)
3983                                 backref->errors |= REF_ERR_NO_INODE_REF;
3984                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3985                                 " namelen %u name %s filetype %d errors %x",
3986                                 (unsigned long long)backref->dir,
3987                                 (unsigned long long)backref->index,
3988                                 backref->namelen, backref->name,
3989                                 backref->filetype, backref->errors);
3990                         print_ref_error(backref->errors);
3991                 }
3992                 free_inode_rec(rec);
3993         }
3994         return (error > 0) ? -1 : 0;
3995 }
3996
3997 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3998                                         u64 objectid)
3999 {
4000         struct cache_extent *cache;
4001         struct root_record *rec = NULL;
4002         int ret;
4003
4004         cache = lookup_cache_extent(root_cache, objectid, 1);
4005         if (cache) {
4006                 rec = container_of(cache, struct root_record, cache);
4007         } else {
4008                 rec = calloc(1, sizeof(*rec));
4009                 if (!rec)
4010                         return ERR_PTR(-ENOMEM);
4011                 rec->objectid = objectid;
4012                 INIT_LIST_HEAD(&rec->backrefs);
4013                 rec->cache.start = objectid;
4014                 rec->cache.size = 1;
4015
4016                 ret = insert_cache_extent(root_cache, &rec->cache);
4017                 if (ret)
4018                         return ERR_PTR(-EEXIST);
4019         }
4020         return rec;
4021 }
4022
4023 static struct root_backref *get_root_backref(struct root_record *rec,
4024                                              u64 ref_root, u64 dir, u64 index,
4025                                              const char *name, int namelen)
4026 {
4027         struct root_backref *backref;
4028
4029         list_for_each_entry(backref, &rec->backrefs, list) {
4030                 if (backref->ref_root != ref_root || backref->dir != dir ||
4031                     backref->namelen != namelen)
4032                         continue;
4033                 if (memcmp(name, backref->name, namelen))
4034                         continue;
4035                 return backref;
4036         }
4037
4038         backref = calloc(1, sizeof(*backref) + namelen + 1);
4039         if (!backref)
4040                 return NULL;
4041         backref->ref_root = ref_root;
4042         backref->dir = dir;
4043         backref->index = index;
4044         backref->namelen = namelen;
4045         memcpy(backref->name, name, namelen);
4046         backref->name[namelen] = '\0';
4047         list_add_tail(&backref->list, &rec->backrefs);
4048         return backref;
4049 }
4050
4051 static void free_root_record(struct cache_extent *cache)
4052 {
4053         struct root_record *rec;
4054         struct root_backref *backref;
4055
4056         rec = container_of(cache, struct root_record, cache);
4057         while (!list_empty(&rec->backrefs)) {
4058                 backref = to_root_backref(rec->backrefs.next);
4059                 list_del(&backref->list);
4060                 free(backref);
4061         }
4062
4063         free(rec);
4064 }
4065
4066 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
4067
4068 static int add_root_backref(struct cache_tree *root_cache,
4069                             u64 root_id, u64 ref_root, u64 dir, u64 index,
4070                             const char *name, int namelen,
4071                             int item_type, int errors)
4072 {
4073         struct root_record *rec;
4074         struct root_backref *backref;
4075
4076         rec = get_root_rec(root_cache, root_id);
4077         BUG_ON(IS_ERR(rec));
4078         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
4079         BUG_ON(!backref);
4080
4081         backref->errors |= errors;
4082
4083         if (item_type != BTRFS_DIR_ITEM_KEY) {
4084                 if (backref->found_dir_index || backref->found_back_ref ||
4085                     backref->found_forward_ref) {
4086                         if (backref->index != index)
4087                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
4088                 } else {
4089                         backref->index = index;
4090                 }
4091         }
4092
4093         if (item_type == BTRFS_DIR_ITEM_KEY) {
4094                 if (backref->found_forward_ref)
4095                         rec->found_ref++;
4096                 backref->found_dir_item = 1;
4097         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
4098                 backref->found_dir_index = 1;
4099         } else if (item_type == BTRFS_ROOT_REF_KEY) {
4100                 if (backref->found_forward_ref)
4101                         backref->errors |= REF_ERR_DUP_ROOT_REF;
4102                 else if (backref->found_dir_item)
4103                         rec->found_ref++;
4104                 backref->found_forward_ref = 1;
4105         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
4106                 if (backref->found_back_ref)
4107                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
4108                 backref->found_back_ref = 1;
4109         } else {
4110                 BUG_ON(1);
4111         }
4112
4113         if (backref->found_forward_ref && backref->found_dir_item)
4114                 backref->reachable = 1;
4115         return 0;
4116 }
4117
4118 static int merge_root_recs(struct btrfs_root *root,
4119                            struct cache_tree *src_cache,
4120                            struct cache_tree *dst_cache)
4121 {
4122         struct cache_extent *cache;
4123         struct ptr_node *node;
4124         struct inode_record *rec;
4125         struct inode_backref *backref;
4126         int ret = 0;
4127
4128         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4129                 free_inode_recs_tree(src_cache);
4130                 return 0;
4131         }
4132
4133         while (1) {
4134                 cache = search_cache_extent(src_cache, 0);
4135                 if (!cache)
4136                         break;
4137                 node = container_of(cache, struct ptr_node, cache);
4138                 rec = node->data;
4139                 remove_cache_extent(src_cache, &node->cache);
4140                 free(node);
4141
4142                 ret = is_child_root(root, root->objectid, rec->ino);
4143                 if (ret < 0)
4144                         break;
4145                 else if (ret == 0)
4146                         goto skip;
4147
4148                 list_for_each_entry(backref, &rec->backrefs, list) {
4149                         BUG_ON(backref->found_inode_ref);
4150                         if (backref->found_dir_item)
4151                                 add_root_backref(dst_cache, rec->ino,
4152                                         root->root_key.objectid, backref->dir,
4153                                         backref->index, backref->name,
4154                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
4155                                         backref->errors);
4156                         if (backref->found_dir_index)
4157                                 add_root_backref(dst_cache, rec->ino,
4158                                         root->root_key.objectid, backref->dir,
4159                                         backref->index, backref->name,
4160                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
4161                                         backref->errors);
4162                 }
4163 skip:
4164                 free_inode_rec(rec);
4165         }
4166         if (ret < 0)
4167                 return ret;
4168         return 0;
4169 }
4170
4171 static int check_root_refs(struct btrfs_root *root,
4172                            struct cache_tree *root_cache)
4173 {
4174         struct root_record *rec;
4175         struct root_record *ref_root;
4176         struct root_backref *backref;
4177         struct cache_extent *cache;
4178         int loop = 1;
4179         int ret;
4180         int error;
4181         int errors = 0;
4182
4183         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
4184         BUG_ON(IS_ERR(rec));
4185         rec->found_ref = 1;
4186
4187         /* fixme: this can not detect circular references */
4188         while (loop) {
4189                 loop = 0;
4190                 cache = search_cache_extent(root_cache, 0);
4191                 while (1) {
4192                         if (!cache)
4193                                 break;
4194                         rec = container_of(cache, struct root_record, cache);
4195                         cache = next_cache_extent(cache);
4196
4197                         if (rec->found_ref == 0)
4198                                 continue;
4199
4200                         list_for_each_entry(backref, &rec->backrefs, list) {
4201                                 if (!backref->reachable)
4202                                         continue;
4203
4204                                 ref_root = get_root_rec(root_cache,
4205                                                         backref->ref_root);
4206                                 BUG_ON(IS_ERR(ref_root));
4207                                 if (ref_root->found_ref > 0)
4208                                         continue;
4209
4210                                 backref->reachable = 0;
4211                                 rec->found_ref--;
4212                                 if (rec->found_ref == 0)
4213                                         loop = 1;
4214                         }
4215                 }
4216         }
4217
4218         cache = search_cache_extent(root_cache, 0);
4219         while (1) {
4220                 if (!cache)
4221                         break;
4222                 rec = container_of(cache, struct root_record, cache);
4223                 cache = next_cache_extent(cache);
4224
4225                 if (rec->found_ref == 0 &&
4226                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
4227                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
4228                         ret = check_orphan_item(root->fs_info->tree_root,
4229                                                 rec->objectid);
4230                         if (ret == 0)
4231                                 continue;
4232
4233                         /*
4234                          * If we don't have a root item then we likely just have
4235                          * a dir item in a snapshot for this root but no actual
4236                          * ref key or anything so it's meaningless.
4237                          */
4238                         if (!rec->found_root_item)
4239                                 continue;
4240                         errors++;
4241                         fprintf(stderr, "fs tree %llu not referenced\n",
4242                                 (unsigned long long)rec->objectid);
4243                 }
4244
4245                 error = 0;
4246                 if (rec->found_ref > 0 && !rec->found_root_item)
4247                         error = 1;
4248                 list_for_each_entry(backref, &rec->backrefs, list) {
4249                         if (!backref->found_dir_item)
4250                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
4251                         if (!backref->found_dir_index)
4252                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
4253                         if (!backref->found_back_ref)
4254                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
4255                         if (!backref->found_forward_ref)
4256                                 backref->errors |= REF_ERR_NO_ROOT_REF;
4257                         if (backref->reachable && backref->errors)
4258                                 error = 1;
4259                 }
4260                 if (!error)
4261                         continue;
4262
4263                 errors++;
4264                 fprintf(stderr, "fs tree %llu refs %u %s\n",
4265                         (unsigned long long)rec->objectid, rec->found_ref,
4266                          rec->found_root_item ? "" : "not found");
4267
4268                 list_for_each_entry(backref, &rec->backrefs, list) {
4269                         if (!backref->reachable)
4270                                 continue;
4271                         if (!backref->errors && rec->found_root_item)
4272                                 continue;
4273                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
4274                                 " index %llu namelen %u name %s errors %x\n",
4275                                 (unsigned long long)backref->ref_root,
4276                                 (unsigned long long)backref->dir,
4277                                 (unsigned long long)backref->index,
4278                                 backref->namelen, backref->name,
4279                                 backref->errors);
4280                         print_ref_error(backref->errors);
4281                 }
4282         }
4283         return errors > 0 ? 1 : 0;
4284 }
4285
4286 static int process_root_ref(struct extent_buffer *eb, int slot,
4287                             struct btrfs_key *key,
4288                             struct cache_tree *root_cache)
4289 {
4290         u64 dirid;
4291         u64 index;
4292         u32 len;
4293         u32 name_len;
4294         struct btrfs_root_ref *ref;
4295         char namebuf[BTRFS_NAME_LEN];
4296         int error;
4297
4298         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
4299
4300         dirid = btrfs_root_ref_dirid(eb, ref);
4301         index = btrfs_root_ref_sequence(eb, ref);
4302         name_len = btrfs_root_ref_name_len(eb, ref);
4303
4304         if (name_len <= BTRFS_NAME_LEN) {
4305                 len = name_len;
4306                 error = 0;
4307         } else {
4308                 len = BTRFS_NAME_LEN;
4309                 error = REF_ERR_NAME_TOO_LONG;
4310         }
4311         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
4312
4313         if (key->type == BTRFS_ROOT_REF_KEY) {
4314                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
4315                                  index, namebuf, len, key->type, error);
4316         } else {
4317                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
4318                                  index, namebuf, len, key->type, error);
4319         }
4320         return 0;
4321 }
4322
4323 static void free_corrupt_block(struct cache_extent *cache)
4324 {
4325         struct btrfs_corrupt_block *corrupt;
4326
4327         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4328         free(corrupt);
4329 }
4330
4331 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4332
4333 /*
4334  * Repair the btree of the given root.
4335  *
4336  * The fix is to remove the node key in corrupt_blocks cache_tree.
4337  * and rebalance the tree.
4338  * After the fix, the btree should be writeable.
4339  */
4340 static int repair_btree(struct btrfs_root *root,
4341                         struct cache_tree *corrupt_blocks)
4342 {
4343         struct btrfs_trans_handle *trans;
4344         struct btrfs_path path;
4345         struct btrfs_corrupt_block *corrupt;
4346         struct cache_extent *cache;
4347         struct btrfs_key key;
4348         u64 offset;
4349         int level;
4350         int ret = 0;
4351
4352         if (cache_tree_empty(corrupt_blocks))
4353                 return 0;
4354
4355         trans = btrfs_start_transaction(root, 1);
4356         if (IS_ERR(trans)) {
4357                 ret = PTR_ERR(trans);
4358                 fprintf(stderr, "Error starting transaction: %s\n",
4359                         strerror(-ret));
4360                 return ret;
4361         }
4362         btrfs_init_path(&path);
4363         cache = first_cache_extent(corrupt_blocks);
4364         while (cache) {
4365                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4366                                        cache);
4367                 level = corrupt->level;
4368                 path.lowest_level = level;
4369                 key.objectid = corrupt->key.objectid;
4370                 key.type = corrupt->key.type;
4371                 key.offset = corrupt->key.offset;
4372
4373                 /*
4374                  * Here we don't want to do any tree balance, since it may
4375                  * cause a balance with corrupted brother leaf/node,
4376                  * so ins_len set to 0 here.
4377                  * Balance will be done after all corrupt node/leaf is deleted.
4378                  */
4379                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4380                 if (ret < 0)
4381                         goto out;
4382                 offset = btrfs_node_blockptr(path.nodes[level],
4383                                              path.slots[level]);
4384
4385                 /* Remove the ptr */
4386                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4387                 if (ret < 0)
4388                         goto out;
4389                 /*
4390                  * Remove the corresponding extent
4391                  * return value is not concerned.
4392                  */
4393                 btrfs_release_path(&path);
4394                 ret = btrfs_free_extent(trans, root, offset,
4395                                 root->fs_info->nodesize, 0,
4396                                 root->root_key.objectid, level - 1, 0);
4397                 cache = next_cache_extent(cache);
4398         }
4399
4400         /* Balance the btree using btrfs_search_slot() */
4401         cache = first_cache_extent(corrupt_blocks);
4402         while (cache) {
4403                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4404                                        cache);
4405                 memcpy(&key, &corrupt->key, sizeof(key));
4406                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4407                 if (ret < 0)
4408                         goto out;
4409                 /* return will always >0 since it won't find the item */
4410                 ret = 0;
4411                 btrfs_release_path(&path);
4412                 cache = next_cache_extent(cache);
4413         }
4414 out:
4415         btrfs_commit_transaction(trans, root);
4416         btrfs_release_path(&path);
4417         return ret;
4418 }
4419
4420 static int check_fs_root(struct btrfs_root *root,
4421                          struct cache_tree *root_cache,
4422                          struct walk_control *wc)
4423 {
4424         int ret = 0;
4425         int err = 0;
4426         int wret;
4427         int level;
4428         struct btrfs_path path;
4429         struct shared_node root_node;
4430         struct root_record *rec;
4431         struct btrfs_root_item *root_item = &root->root_item;
4432         struct cache_tree corrupt_blocks;
4433         struct orphan_data_extent *orphan;
4434         struct orphan_data_extent *tmp;
4435         enum btrfs_tree_block_status status;
4436         struct node_refs nrefs;
4437
4438         /*
4439          * Reuse the corrupt_block cache tree to record corrupted tree block
4440          *
4441          * Unlike the usage in extent tree check, here we do it in a per
4442          * fs/subvol tree base.
4443          */
4444         cache_tree_init(&corrupt_blocks);
4445         root->fs_info->corrupt_blocks = &corrupt_blocks;
4446
4447         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4448                 rec = get_root_rec(root_cache, root->root_key.objectid);
4449                 BUG_ON(IS_ERR(rec));
4450                 if (btrfs_root_refs(root_item) > 0)
4451                         rec->found_root_item = 1;
4452         }
4453
4454         btrfs_init_path(&path);
4455         memset(&root_node, 0, sizeof(root_node));
4456         cache_tree_init(&root_node.root_cache);
4457         cache_tree_init(&root_node.inode_cache);
4458         memset(&nrefs, 0, sizeof(nrefs));
4459
4460         /* Move the orphan extent record to corresponding inode_record */
4461         list_for_each_entry_safe(orphan, tmp,
4462                                  &root->orphan_data_extents, list) {
4463                 struct inode_record *inode;
4464
4465                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4466                                       1);
4467                 BUG_ON(IS_ERR(inode));
4468                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4469                 list_move(&orphan->list, &inode->orphan_extents);
4470         }
4471
4472         level = btrfs_header_level(root->node);
4473         memset(wc->nodes, 0, sizeof(wc->nodes));
4474         wc->nodes[level] = &root_node;
4475         wc->active_node = level;
4476         wc->root_level = level;
4477
4478         /* We may not have checked the root block, lets do that now */
4479         if (btrfs_is_leaf(root->node))
4480                 status = btrfs_check_leaf(root, NULL, root->node);
4481         else
4482                 status = btrfs_check_node(root, NULL, root->node);
4483         if (status != BTRFS_TREE_BLOCK_CLEAN)
4484                 return -EIO;
4485
4486         if (btrfs_root_refs(root_item) > 0 ||
4487             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4488                 path.nodes[level] = root->node;
4489                 extent_buffer_get(root->node);
4490                 path.slots[level] = 0;
4491         } else {
4492                 struct btrfs_key key;
4493                 struct btrfs_disk_key found_key;
4494
4495                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4496                 level = root_item->drop_level;
4497                 path.lowest_level = level;
4498                 if (level > btrfs_header_level(root->node) ||
4499                     level >= BTRFS_MAX_LEVEL) {
4500                         error("ignoring invalid drop level: %u", level);
4501                         goto skip_walking;
4502                 }
4503                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4504                 if (wret < 0)
4505                         goto skip_walking;
4506                 btrfs_node_key(path.nodes[level], &found_key,
4507                                 path.slots[level]);
4508                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4509                                         sizeof(found_key)));
4510         }
4511
4512         while (1) {
4513                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4514                 if (wret < 0)
4515                         ret = wret;
4516                 if (wret != 0)
4517                         break;
4518
4519                 wret = walk_up_tree(root, &path, wc, &level);
4520                 if (wret < 0)
4521                         ret = wret;
4522                 if (wret != 0)
4523                         break;
4524         }
4525 skip_walking:
4526         btrfs_release_path(&path);
4527
4528         if (!cache_tree_empty(&corrupt_blocks)) {
4529                 struct cache_extent *cache;
4530                 struct btrfs_corrupt_block *corrupt;
4531
4532                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4533                        root->root_key.objectid);
4534                 cache = first_cache_extent(&corrupt_blocks);
4535                 while (cache) {
4536                         corrupt = container_of(cache,
4537                                                struct btrfs_corrupt_block,
4538                                                cache);
4539                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4540                                cache->start, corrupt->level,
4541                                corrupt->key.objectid, corrupt->key.type,
4542                                corrupt->key.offset);
4543                         cache = next_cache_extent(cache);
4544                 }
4545                 if (repair) {
4546                         printf("Try to repair the btree for root %llu\n",
4547                                root->root_key.objectid);
4548                         ret = repair_btree(root, &corrupt_blocks);
4549                         if (ret < 0)
4550                                 fprintf(stderr, "Failed to repair btree: %s\n",
4551                                         strerror(-ret));
4552                         if (!ret)
4553                                 printf("Btree for root %llu is fixed\n",
4554                                        root->root_key.objectid);
4555                 }
4556         }
4557
4558         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4559         if (err < 0)
4560                 ret = err;
4561
4562         if (root_node.current) {
4563                 root_node.current->checked = 1;
4564                 maybe_free_inode_rec(&root_node.inode_cache,
4565                                 root_node.current);
4566         }
4567
4568         err = check_inode_recs(root, &root_node.inode_cache);
4569         if (!ret)
4570                 ret = err;
4571
4572         free_corrupt_blocks_tree(&corrupt_blocks);
4573         root->fs_info->corrupt_blocks = NULL;
4574         free_orphan_data_extents(&root->orphan_data_extents);
4575         return ret;
4576 }
4577
4578 static int fs_root_objectid(u64 objectid)
4579 {
4580         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4581             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4582                 return 1;
4583         return is_fstree(objectid);
4584 }
4585
4586 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4587                           struct cache_tree *root_cache)
4588 {
4589         struct btrfs_path path;
4590         struct btrfs_key key;
4591         struct walk_control wc;
4592         struct extent_buffer *leaf, *tree_node;
4593         struct btrfs_root *tmp_root;
4594         struct btrfs_root *tree_root = fs_info->tree_root;
4595         int ret;
4596         int err = 0;
4597
4598         if (ctx.progress_enabled) {
4599                 ctx.tp = TASK_FS_ROOTS;
4600                 task_start(ctx.info);
4601         }
4602
4603         /*
4604          * Just in case we made any changes to the extent tree that weren't
4605          * reflected into the free space cache yet.
4606          */
4607         if (repair)
4608                 reset_cached_block_groups(fs_info);
4609         memset(&wc, 0, sizeof(wc));
4610         cache_tree_init(&wc.shared);
4611         btrfs_init_path(&path);
4612
4613 again:
4614         key.offset = 0;
4615         key.objectid = 0;
4616         key.type = BTRFS_ROOT_ITEM_KEY;
4617         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4618         if (ret < 0) {
4619                 err = 1;
4620                 goto out;
4621         }
4622         tree_node = tree_root->node;
4623         while (1) {
4624                 if (tree_node != tree_root->node) {
4625                         free_root_recs_tree(root_cache);
4626                         btrfs_release_path(&path);
4627                         goto again;
4628                 }
4629                 leaf = path.nodes[0];
4630                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4631                         ret = btrfs_next_leaf(tree_root, &path);
4632                         if (ret) {
4633                                 if (ret < 0)
4634                                         err = 1;
4635                                 break;
4636                         }
4637                         leaf = path.nodes[0];
4638                 }
4639                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4640                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4641                     fs_root_objectid(key.objectid)) {
4642                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4643                                 tmp_root = btrfs_read_fs_root_no_cache(
4644                                                 fs_info, &key);
4645                         } else {
4646                                 key.offset = (u64)-1;
4647                                 tmp_root = btrfs_read_fs_root(
4648                                                 fs_info, &key);
4649                         }
4650                         if (IS_ERR(tmp_root)) {
4651                                 err = 1;
4652                                 goto next;
4653                         }
4654                         ret = check_fs_root(tmp_root, root_cache, &wc);
4655                         if (ret == -EAGAIN) {
4656                                 free_root_recs_tree(root_cache);
4657                                 btrfs_release_path(&path);
4658                                 goto again;
4659                         }
4660                         if (ret)
4661                                 err = 1;
4662                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4663                                 btrfs_free_fs_root(tmp_root);
4664                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4665                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4666                         process_root_ref(leaf, path.slots[0], &key,
4667                                          root_cache);
4668                 }
4669 next:
4670                 path.slots[0]++;
4671         }
4672 out:
4673         btrfs_release_path(&path);
4674         if (err)
4675                 free_extent_cache_tree(&wc.shared);
4676         if (!cache_tree_empty(&wc.shared))
4677                 fprintf(stderr, "warning line %d\n", __LINE__);
4678
4679         task_stop(ctx.info);
4680
4681         return err;
4682 }
4683
4684 /*
4685  * Find the @index according by @ino and name.
4686  * Notice:time efficiency is O(N)
4687  *
4688  * @root:       the root of the fs/file tree
4689  * @index_ret:  the index as return value
4690  * @namebuf:    the name to match
4691  * @name_len:   the length of name to match
4692  * @file_type:  the file_type of INODE_ITEM to match
4693  *
4694  * Returns 0 if found and *@index_ret will be modified with right value
4695  * Returns< 0 not found and *@index_ret will be (u64)-1
4696  */
4697 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4698                           u64 *index_ret, char *namebuf, u32 name_len,
4699                           u8 file_type)
4700 {
4701         struct btrfs_path path;
4702         struct extent_buffer *node;
4703         struct btrfs_dir_item *di;
4704         struct btrfs_key key;
4705         struct btrfs_key location;
4706         char name[BTRFS_NAME_LEN] = {0};
4707
4708         u32 total;
4709         u32 cur = 0;
4710         u32 len;
4711         u32 data_len;
4712         u8 filetype;
4713         int slot;
4714         int ret;
4715
4716         ASSERT(index_ret);
4717
4718         /* search from the last index */
4719         key.objectid = dirid;
4720         key.offset = (u64)-1;
4721         key.type = BTRFS_DIR_INDEX_KEY;
4722
4723         btrfs_init_path(&path);
4724         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4725         if (ret < 0)
4726                 return ret;
4727
4728 loop:
4729         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4730         if (ret) {
4731                 ret = -ENOENT;
4732                 *index_ret = (64)-1;
4733                 goto out;
4734         }
4735         /* Check whether inode_id/filetype/name match */
4736         node = path.nodes[0];
4737         slot = path.slots[0];
4738         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4739         total = btrfs_item_size_nr(node, slot);
4740         while (cur < total) {
4741                 ret = -ENOENT;
4742                 len = btrfs_dir_name_len(node, di);
4743                 data_len = btrfs_dir_data_len(node, di);
4744
4745                 btrfs_dir_item_key_to_cpu(node, di, &location);
4746                 if (location.objectid != location_id ||
4747                     location.type != BTRFS_INODE_ITEM_KEY ||
4748                     location.offset != 0)
4749                         goto next;
4750
4751                 filetype = btrfs_dir_type(node, di);
4752                 if (file_type != filetype)
4753                         goto next;
4754
4755                 if (len > BTRFS_NAME_LEN)
4756                         len = BTRFS_NAME_LEN;
4757
4758                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4759                 if (len != name_len || strncmp(namebuf, name, len))
4760                         goto next;
4761
4762                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4763                 *index_ret = key.offset;
4764                 ret = 0;
4765                 goto out;
4766 next:
4767                 len += sizeof(*di) + data_len;
4768                 di = (struct btrfs_dir_item *)((char *)di + len);
4769                 cur += len;
4770         }
4771         goto loop;
4772
4773 out:
4774         btrfs_release_path(&path);
4775         return ret;
4776 }
4777
4778 /*
4779  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4780  * INODE_REF/INODE_EXTREF match.
4781  *
4782  * @root:       the root of the fs/file tree
4783  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4784  *              value while find index
4785  * @location_key: location key of the struct btrfs_dir_item to match
4786  * @name:       the name to match
4787  * @namelen:    the length of name
4788  * @file_type:  the type of file to math
4789  *
4790  * Return 0 if no error occurred.
4791  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4792  * DIR_ITEM/DIR_INDEX
4793  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4794  * and DIR_ITEM/DIR_INDEX mismatch
4795  */
4796 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4797                          struct btrfs_key *location_key, char *name,
4798                          u32 namelen, u8 file_type)
4799 {
4800         struct btrfs_path path;
4801         struct extent_buffer *node;
4802         struct btrfs_dir_item *di;
4803         struct btrfs_key location;
4804         char namebuf[BTRFS_NAME_LEN] = {0};
4805         u32 total;
4806         u32 cur = 0;
4807         u32 len;
4808         u32 data_len;
4809         u8 filetype;
4810         int slot;
4811         int ret;
4812
4813         /* get the index by traversing all index */
4814         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4815                 ret = find_dir_index(root, key->objectid,
4816                                      location_key->objectid, &key->offset,
4817                                      name, namelen, file_type);
4818                 if (ret)
4819                         ret = DIR_INDEX_MISSING;
4820                 return ret;
4821         }
4822
4823         btrfs_init_path(&path);
4824         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4825         if (ret) {
4826                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4827                         DIR_INDEX_MISSING;
4828                 goto out;
4829         }
4830
4831         /* Check whether inode_id/filetype/name match */
4832         node = path.nodes[0];
4833         slot = path.slots[0];
4834         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4835         total = btrfs_item_size_nr(node, slot);
4836         while (cur < total) {
4837                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4838                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4839
4840                 len = btrfs_dir_name_len(node, di);
4841                 data_len = btrfs_dir_data_len(node, di);
4842
4843                 btrfs_dir_item_key_to_cpu(node, di, &location);
4844                 if (location.objectid != location_key->objectid ||
4845                     location.type != location_key->type ||
4846                     location.offset != location_key->offset)
4847                         goto next;
4848
4849                 filetype = btrfs_dir_type(node, di);
4850                 if (file_type != filetype)
4851                         goto next;
4852
4853                 if (len > BTRFS_NAME_LEN) {
4854                         len = BTRFS_NAME_LEN;
4855                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4856                         root->objectid,
4857                         key->type == BTRFS_DIR_ITEM_KEY ?
4858                         "DIR_ITEM" : "DIR_INDEX",
4859                         key->objectid, key->offset, len);
4860                 }
4861                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4862                                    len);
4863                 if (len != namelen || strncmp(namebuf, name, len))
4864                         goto next;
4865
4866                 ret = 0;
4867                 goto out;
4868 next:
4869                 len += sizeof(*di) + data_len;
4870                 di = (struct btrfs_dir_item *)((char *)di + len);
4871                 cur += len;
4872         }
4873
4874 out:
4875         btrfs_release_path(&path);
4876         return ret;
4877 }
4878
4879 /*
4880  * Prints inode ref error message
4881  */
4882 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4883                                 u64 index, const char *namebuf, int name_len,
4884                                 u8 filetype, int err)
4885 {
4886         if (!err)
4887                 return;
4888
4889         /* root dir error */
4890         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4891                 error(
4892         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4893                       root->objectid, key->objectid, key->offset, namebuf);
4894                 return;
4895         }
4896
4897         /* normal error */
4898         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4899                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4900                       root->objectid, key->offset,
4901                       btrfs_name_hash(namebuf, name_len),
4902                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4903                       namebuf, filetype);
4904         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4905                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4906                       root->objectid, key->offset, index,
4907                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4908                       namebuf, filetype);
4909 }
4910
4911 /*
4912  * Insert the missing inode item.
4913  *
4914  * Returns 0 means success.
4915  * Returns <0 means error.
4916  */
4917 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4918                                      u8 filetype)
4919 {
4920         struct btrfs_key key;
4921         struct btrfs_trans_handle *trans;
4922         struct btrfs_path path;
4923         int ret;
4924
4925         key.objectid = ino;
4926         key.type = BTRFS_INODE_ITEM_KEY;
4927         key.offset = 0;
4928
4929         btrfs_init_path(&path);
4930         trans = btrfs_start_transaction(root, 1);
4931         if (IS_ERR(trans)) {
4932                 ret = -EIO;
4933                 goto out;
4934         }
4935
4936         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4937         if (ret < 0 || !ret)
4938                 goto fail;
4939
4940         /* insert inode item */
4941         create_inode_item_lowmem(trans, root, ino, filetype);
4942         ret = 0;
4943 fail:
4944         btrfs_commit_transaction(trans, root);
4945 out:
4946         if (ret)
4947                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4948                       root->objectid, ino);
4949         btrfs_release_path(&path);
4950         return ret;
4951 }
4952
4953 /*
4954  * The ternary means dir item, dir index and relative inode ref.
4955  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4956  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4957  * strategy:
4958  * If two of three is missing or mismatched, delete the existing one.
4959  * If one of three is missing or mismatched, add the missing one.
4960  *
4961  * returns 0 means success.
4962  * returns not 0 means on error;
4963  */
4964 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4965                           u64 index, char *name, int name_len, u8 filetype,
4966                           int err)
4967 {
4968         struct btrfs_trans_handle *trans;
4969         int stage = 0;
4970         int ret = 0;
4971
4972         /*
4973          * stage shall be one of following valild values:
4974          *      0: Fine, nothing to do.
4975          *      1: One of three is wrong, so add missing one.
4976          *      2: Two of three is wrong, so delete existed one.
4977          */
4978         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4979                 stage++;
4980         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4981                 stage++;
4982         if (err & (INODE_REF_MISSING))
4983                 stage++;
4984
4985         /* stage must be smllarer than 3 */
4986         ASSERT(stage < 3);
4987
4988         trans = btrfs_start_transaction(root, 1);
4989         if (stage == 2) {
4990                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4991                                    name_len, 0);
4992                 goto out;
4993         }
4994         if (stage == 1) {
4995                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4996                                filetype, &index, 1, 1);
4997                 goto out;
4998         }
4999 out:
5000         btrfs_commit_transaction(trans, root);
5001
5002         if (ret)
5003                 error("fail to repair inode %llu name %s filetype %u",
5004                       ino, name, filetype);
5005         else
5006                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
5007                        stage == 2 ? "Delete" : "Add",
5008                        ino, name, filetype);
5009
5010         return ret;
5011 }
5012
5013 /*
5014  * Traverse the given INODE_REF and call find_dir_item() to find related
5015  * DIR_ITEM/DIR_INDEX.
5016  *
5017  * @root:       the root of the fs/file tree
5018  * @ref_key:    the key of the INODE_REF
5019  * @path        the path provides node and slot
5020  * @refs:       the count of INODE_REF
5021  * @mode:       the st_mode of INODE_ITEM
5022  * @name_ret:   returns with the first ref's name
5023  * @name_len_ret:    len of the name_ret
5024  *
5025  * Return 0 if no error occurred.
5026  */
5027 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5028                            struct btrfs_path *path, char *name_ret,
5029                            u32 *namelen_ret, u64 *refs_ret, int mode)
5030 {
5031         struct btrfs_key key;
5032         struct btrfs_key location;
5033         struct btrfs_inode_ref *ref;
5034         struct extent_buffer *node;
5035         char namebuf[BTRFS_NAME_LEN] = {0};
5036         u32 total;
5037         u32 cur = 0;
5038         u32 len;
5039         u32 name_len;
5040         u64 index;
5041         int ret;
5042         int err = 0;
5043         int tmp_err;
5044         int slot;
5045         int need_research = 0;
5046         u64 refs;
5047
5048 begin:
5049         err = 0;
5050         cur = 0;
5051         refs = *refs_ret;
5052
5053         /* since after repair, path and the dir item may be changed */
5054         if (need_research) {
5055                 need_research = 0;
5056                 btrfs_release_path(path);
5057                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
5058                 /* the item was deleted, let path point to the last checked item */
5059                 if (ret > 0) {
5060                         if (path->slots[0] == 0)
5061                                 btrfs_prev_leaf(root, path);
5062                         else
5063                                 path->slots[0]--;
5064                 }
5065                 if (ret)
5066                         goto out;
5067         }
5068
5069         location.objectid = ref_key->objectid;
5070         location.type = BTRFS_INODE_ITEM_KEY;
5071         location.offset = 0;
5072         node = path->nodes[0];
5073         slot = path->slots[0];
5074
5075         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5076         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5077         total = btrfs_item_size_nr(node, slot);
5078
5079 next:
5080         /* Update inode ref count */
5081         refs++;
5082         tmp_err = 0;
5083         index = btrfs_inode_ref_index(node, ref);
5084         name_len = btrfs_inode_ref_name_len(node, ref);
5085
5086         if (name_len <= BTRFS_NAME_LEN) {
5087                 len = name_len;
5088         } else {
5089                 len = BTRFS_NAME_LEN;
5090                 warning("root %llu INODE_REF[%llu %llu] name too long",
5091                         root->objectid, ref_key->objectid, ref_key->offset);
5092         }
5093
5094         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
5095
5096         /* copy the first name found to name_ret */
5097         if (refs == 1 && name_ret) {
5098                 memcpy(name_ret, namebuf, len);
5099                 *namelen_ret = len;
5100         }
5101
5102         /* Check root dir ref */
5103         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
5104                 if (index != 0 || len != strlen("..") ||
5105                     strncmp("..", namebuf, len) ||
5106                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
5107                         /* set err bits then repair will delete the ref */
5108                         err |= DIR_INDEX_MISSING;
5109                         err |= DIR_ITEM_MISSING;
5110                 }
5111                 goto end;
5112         }
5113
5114         /* Find related DIR_INDEX */
5115         key.objectid = ref_key->offset;
5116         key.type = BTRFS_DIR_INDEX_KEY;
5117         key.offset = index;
5118         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5119                             imode_to_type(mode));
5120
5121         /* Find related dir_item */
5122         key.objectid = ref_key->offset;
5123         key.type = BTRFS_DIR_ITEM_KEY;
5124         key.offset = btrfs_name_hash(namebuf, len);
5125         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
5126                             imode_to_type(mode));
5127 end:
5128         if (tmp_err && repair) {
5129                 ret = repair_ternary_lowmem(root, ref_key->offset,
5130                                             ref_key->objectid, index, namebuf,
5131                                             name_len, imode_to_type(mode),
5132                                             tmp_err);
5133                 if (!ret) {
5134                         need_research = 1;
5135                         goto begin;
5136                 }
5137         }
5138         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
5139                             imode_to_type(mode), tmp_err);
5140         err |= tmp_err;
5141         len = sizeof(*ref) + name_len;
5142         ref = (struct btrfs_inode_ref *)((char *)ref + len);
5143         cur += len;
5144         if (cur < total)
5145                 goto next;
5146
5147 out:
5148         *refs_ret = refs;
5149         return err;
5150 }
5151
5152 /*
5153  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
5154  * DIR_ITEM/DIR_INDEX.
5155  *
5156  * @root:       the root of the fs/file tree
5157  * @ref_key:    the key of the INODE_EXTREF
5158  * @refs:       the count of INODE_EXTREF
5159  * @mode:       the st_mode of INODE_ITEM
5160  *
5161  * Return 0 if no error occurred.
5162  */
5163 static int check_inode_extref(struct btrfs_root *root,
5164                               struct btrfs_key *ref_key,
5165                               struct extent_buffer *node, int slot, u64 *refs,
5166                               int mode)
5167 {
5168         struct btrfs_key key;
5169         struct btrfs_key location;
5170         struct btrfs_inode_extref *extref;
5171         char namebuf[BTRFS_NAME_LEN] = {0};
5172         u32 total;
5173         u32 cur = 0;
5174         u32 len;
5175         u32 name_len;
5176         u64 index;
5177         u64 parent;
5178         int ret;
5179         int err = 0;
5180
5181         location.objectid = ref_key->objectid;
5182         location.type = BTRFS_INODE_ITEM_KEY;
5183         location.offset = 0;
5184
5185         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5186         total = btrfs_item_size_nr(node, slot);
5187
5188 next:
5189         /* update inode ref count */
5190         (*refs)++;
5191         name_len = btrfs_inode_extref_name_len(node, extref);
5192         index = btrfs_inode_extref_index(node, extref);
5193         parent = btrfs_inode_extref_parent(node, extref);
5194         if (name_len <= BTRFS_NAME_LEN) {
5195                 len = name_len;
5196         } else {
5197                 len = BTRFS_NAME_LEN;
5198                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
5199                         root->objectid, ref_key->objectid, ref_key->offset);
5200         }
5201         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
5202
5203         /* Check root dir ref name */
5204         if (index == 0 && strncmp(namebuf, "..", name_len)) {
5205                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
5206                       root->objectid, ref_key->objectid, ref_key->offset,
5207                       namebuf);
5208                 err |= ROOT_DIR_ERROR;
5209         }
5210
5211         /* find related dir_index */
5212         key.objectid = parent;
5213         key.type = BTRFS_DIR_INDEX_KEY;
5214         key.offset = index;
5215         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5216         err |= ret;
5217
5218         /* find related dir_item */
5219         key.objectid = parent;
5220         key.type = BTRFS_DIR_ITEM_KEY;
5221         key.offset = btrfs_name_hash(namebuf, len);
5222         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
5223         err |= ret;
5224
5225         len = sizeof(*extref) + name_len;
5226         extref = (struct btrfs_inode_extref *)((char *)extref + len);
5227         cur += len;
5228
5229         if (cur < total)
5230                 goto next;
5231
5232         return err;
5233 }
5234
5235 /*
5236  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
5237  * DIR_ITEM/DIR_INDEX match.
5238  * Return with @index_ret.
5239  *
5240  * @root:       the root of the fs/file tree
5241  * @key:        the key of the INODE_REF/INODE_EXTREF
5242  * @name:       the name in the INODE_REF/INODE_EXTREF
5243  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
5244  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
5245  *              value (64)-1 means do not check index
5246  * @ext_ref:    the EXTENDED_IREF feature
5247  *
5248  * Return 0 if no error occurred.
5249  * Return >0 for error bitmap
5250  */
5251 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
5252                           char *name, int namelen, u64 *index_ret,
5253                           unsigned int ext_ref)
5254 {
5255         struct btrfs_path path;
5256         struct btrfs_inode_ref *ref;
5257         struct btrfs_inode_extref *extref;
5258         struct extent_buffer *node;
5259         char ref_namebuf[BTRFS_NAME_LEN] = {0};
5260         u32 total;
5261         u32 cur = 0;
5262         u32 len;
5263         u32 ref_namelen;
5264         u64 ref_index;
5265         u64 parent;
5266         u64 dir_id;
5267         int slot;
5268         int ret;
5269
5270         ASSERT(index_ret);
5271
5272         btrfs_init_path(&path);
5273         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5274         if (ret) {
5275                 ret = INODE_REF_MISSING;
5276                 goto extref;
5277         }
5278
5279         node = path.nodes[0];
5280         slot = path.slots[0];
5281
5282         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5283         total = btrfs_item_size_nr(node, slot);
5284
5285         /* Iterate all entry of INODE_REF */
5286         while (cur < total) {
5287                 ret = INODE_REF_MISSING;
5288
5289                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
5290                 ref_index = btrfs_inode_ref_index(node, ref);
5291                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5292                         goto next_ref;
5293
5294                 if (cur + sizeof(*ref) + ref_namelen > total ||
5295                     ref_namelen > BTRFS_NAME_LEN) {
5296                         warning("root %llu INODE %s[%llu %llu] name too long",
5297                                 root->objectid,
5298                                 key->type == BTRFS_INODE_REF_KEY ?
5299                                         "REF" : "EXTREF",
5300                                 key->objectid, key->offset);
5301
5302                         if (cur + sizeof(*ref) > total)
5303                                 break;
5304                         len = min_t(u32, total - cur - sizeof(*ref),
5305                                     BTRFS_NAME_LEN);
5306                 } else {
5307                         len = ref_namelen;
5308                 }
5309
5310                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
5311                                    len);
5312
5313                 if (len != namelen || strncmp(ref_namebuf, name, len))
5314                         goto next_ref;
5315
5316                 *index_ret = ref_index;
5317                 ret = 0;
5318                 goto out;
5319 next_ref:
5320                 len = sizeof(*ref) + ref_namelen;
5321                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5322                 cur += len;
5323         }
5324
5325 extref:
5326         /* Skip if not support EXTENDED_IREF feature */
5327         if (!ext_ref)
5328                 goto out;
5329
5330         btrfs_release_path(&path);
5331         btrfs_init_path(&path);
5332
5333         dir_id = key->offset;
5334         key->type = BTRFS_INODE_EXTREF_KEY;
5335         key->offset = btrfs_extref_hash(dir_id, name, namelen);
5336
5337         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5338         if (ret) {
5339                 ret = INODE_REF_MISSING;
5340                 goto out;
5341         }
5342
5343         node = path.nodes[0];
5344         slot = path.slots[0];
5345
5346         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5347         cur = 0;
5348         total = btrfs_item_size_nr(node, slot);
5349
5350         /* Iterate all entry of INODE_EXTREF */
5351         while (cur < total) {
5352                 ret = INODE_REF_MISSING;
5353
5354                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5355                 ref_index = btrfs_inode_extref_index(node, extref);
5356                 parent = btrfs_inode_extref_parent(node, extref);
5357                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5358                         goto next_extref;
5359
5360                 if (parent != dir_id)
5361                         goto next_extref;
5362
5363                 if (ref_namelen <= BTRFS_NAME_LEN) {
5364                         len = ref_namelen;
5365                 } else {
5366                         len = BTRFS_NAME_LEN;
5367                         warning("root %llu INODE %s[%llu %llu] name too long",
5368                                 root->objectid,
5369                                 key->type == BTRFS_INODE_REF_KEY ?
5370                                         "REF" : "EXTREF",
5371                                 key->objectid, key->offset);
5372                 }
5373                 read_extent_buffer(node, ref_namebuf,
5374                                    (unsigned long)(extref + 1), len);
5375
5376                 if (len != namelen || strncmp(ref_namebuf, name, len))
5377                         goto next_extref;
5378
5379                 *index_ret = ref_index;
5380                 ret = 0;
5381                 goto out;
5382
5383 next_extref:
5384                 len = sizeof(*extref) + ref_namelen;
5385                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5386                 cur += len;
5387
5388         }
5389 out:
5390         btrfs_release_path(&path);
5391         return ret;
5392 }
5393
5394 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5395                                u64 ino, u64 index, const char *namebuf,
5396                                int name_len, u8 filetype, int err)
5397 {
5398         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5399                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5400                       root->objectid, key->objectid, key->offset, namebuf,
5401                       filetype,
5402                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5403         }
5404
5405         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5406                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5407                       root->objectid, key->objectid, index, namebuf, filetype,
5408                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5409         }
5410
5411         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5412                 error(
5413                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5414                       root->objectid, ino, index, namebuf, filetype,
5415                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5416         }
5417
5418         if (err & INODE_REF_MISSING)
5419                 error(
5420                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5421                       root->objectid, ino, key->objectid, namebuf, filetype);
5422
5423 }
5424
5425 /*
5426  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5427  *
5428  * Returns error after repair
5429  */
5430 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5431                            u64 index, u8 filetype, char *namebuf, u32 name_len,
5432                            int err)
5433 {
5434         int ret;
5435
5436         if (err & INODE_ITEM_MISSING) {
5437                 ret = repair_inode_item_missing(root, ino, filetype);
5438                 if (!ret)
5439                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5440         }
5441
5442         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5443                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5444                                             name_len, filetype, err);
5445                 if (!ret) {
5446                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5447                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5448                         err &= ~(INODE_REF_MISSING);
5449                 }
5450         }
5451         return err;
5452 }
5453
5454 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5455                 u64 *size_ret)
5456 {
5457         struct btrfs_key key;
5458         struct btrfs_path path;
5459         u32 len;
5460         struct btrfs_dir_item *di;
5461         int ret;
5462         int cur = 0;
5463         int total = 0;
5464
5465         ASSERT(size_ret);
5466         *size_ret = 0;
5467
5468         key.objectid = ino;
5469         key.type = type;
5470         key.offset = (u64)-1;
5471
5472         btrfs_init_path(&path);
5473         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5474         if (ret < 0) {
5475                 ret = -EIO;
5476                 goto out;
5477         }
5478         /* if found, go to spacial case */
5479         if (ret == 0)
5480                 goto special_case;
5481
5482 loop:
5483         ret = btrfs_previous_item(root, &path, ino, type);
5484
5485         if (ret) {
5486                 ret = 0;
5487                 goto out;
5488         }
5489
5490 special_case:
5491         di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5492         cur = 0;
5493         total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5494
5495         while (cur < total) {
5496                 len = btrfs_dir_name_len(path.nodes[0], di);
5497                 if (len > BTRFS_NAME_LEN)
5498                         len = BTRFS_NAME_LEN;
5499                 *size_ret += len;
5500
5501                 len += btrfs_dir_data_len(path.nodes[0], di);
5502                 len += sizeof(*di);
5503                 di = (struct btrfs_dir_item *)((char *)di + len);
5504                 cur += len;
5505         }
5506         goto loop;
5507
5508 out:
5509         btrfs_release_path(&path);
5510         return ret;
5511 }
5512
5513 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5514 {
5515         u64 item_size;
5516         u64 index_size;
5517         int ret;
5518
5519         ASSERT(size);
5520         ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5521         if (ret)
5522                 goto out;
5523
5524         ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5525         if (ret)
5526                 goto out;
5527
5528         *size = item_size + index_size;
5529
5530 out:
5531         if (ret)
5532                 error("failed to count root %llu INODE[%llu] root size",
5533                       root->objectid, ino);
5534         return ret;
5535 }
5536
5537 /*
5538  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5539  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5540  *
5541  * @root:       the root of the fs/file tree
5542  * @key:        the key of the INODE_REF/INODE_EXTREF
5543  * @path:       the path
5544  * @size:       the st_size of the INODE_ITEM
5545  * @ext_ref:    the EXTENDED_IREF feature
5546  *
5547  * Return 0 if no error occurred.
5548  * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5549  */
5550 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5551                           struct btrfs_path *path, u64 *size,
5552                           unsigned int ext_ref)
5553 {
5554         struct btrfs_dir_item *di;
5555         struct btrfs_inode_item *ii;
5556         struct btrfs_key key;
5557         struct btrfs_key location;
5558         struct extent_buffer *node;
5559         int slot;
5560         char namebuf[BTRFS_NAME_LEN] = {0};
5561         u32 total;
5562         u32 cur = 0;
5563         u32 len;
5564         u32 name_len;
5565         u32 data_len;
5566         u8 filetype;
5567         u32 mode = 0;
5568         u64 index;
5569         int ret;
5570         int err;
5571         int tmp_err;
5572         int need_research = 0;
5573
5574         /*
5575          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5576          * ignore index check.
5577          */
5578         if (di_key->type == BTRFS_DIR_INDEX_KEY)
5579                 index = di_key->offset;
5580         else
5581                 index = (u64)-1;
5582 begin:
5583         err = 0;
5584         cur = 0;
5585
5586         /* since after repair, path and the dir item may be changed */
5587         if (need_research) {
5588                 need_research = 0;
5589                 err |= DIR_COUNT_AGAIN;
5590                 btrfs_release_path(path);
5591                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5592                 /* the item was deleted, let path point the last checked item */
5593                 if (ret > 0) {
5594                         if (path->slots[0] == 0)
5595                                 btrfs_prev_leaf(root, path);
5596                         else
5597                                 path->slots[0]--;
5598                 }
5599                 if (ret)
5600                         goto out;
5601         }
5602
5603         node = path->nodes[0];
5604         slot = path->slots[0];
5605
5606         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5607         total = btrfs_item_size_nr(node, slot);
5608         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5609
5610         while (cur < total) {
5611                 data_len = btrfs_dir_data_len(node, di);
5612                 tmp_err = 0;
5613                 if (data_len)
5614                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5615                               root->objectid,
5616               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5617                               di_key->objectid, di_key->offset, data_len);
5618
5619                 name_len = btrfs_dir_name_len(node, di);
5620                 if (name_len <= BTRFS_NAME_LEN) {
5621                         len = name_len;
5622                 } else {
5623                         len = BTRFS_NAME_LEN;
5624                         warning("root %llu %s[%llu %llu] name too long",
5625                                 root->objectid,
5626                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5627                                 di_key->objectid, di_key->offset);
5628                 }
5629                 (*size) += name_len;
5630                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5631                                    len);
5632                 filetype = btrfs_dir_type(node, di);
5633
5634                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5635                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5636                         err |= -EIO;
5637                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5638                         root->objectid, di_key->objectid, di_key->offset,
5639                         namebuf, len, filetype, di_key->offset,
5640                         btrfs_name_hash(namebuf, len));
5641                 }
5642
5643                 btrfs_dir_item_key_to_cpu(node, di, &location);
5644                 /* Ignore related ROOT_ITEM check */
5645                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5646                         goto next;
5647
5648                 btrfs_release_path(path);
5649                 /* Check relative INODE_ITEM(existence/filetype) */
5650                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5651                 if (ret) {
5652                         tmp_err |= INODE_ITEM_MISSING;
5653                         goto next;
5654                 }
5655
5656                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5657                                     struct btrfs_inode_item);
5658                 mode = btrfs_inode_mode(path->nodes[0], ii);
5659                 if (imode_to_type(mode) != filetype) {
5660                         tmp_err |= INODE_ITEM_MISMATCH;
5661                         goto next;
5662                 }
5663
5664                 /* Check relative INODE_REF/INODE_EXTREF */
5665                 key.objectid = location.objectid;
5666                 key.type = BTRFS_INODE_REF_KEY;
5667                 key.offset = di_key->objectid;
5668                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5669                                           &index, ext_ref);
5670
5671                 /* check relative INDEX/ITEM */
5672                 key.objectid = di_key->objectid;
5673                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5674                         key.type = BTRFS_DIR_INDEX_KEY;
5675                         key.offset = index;
5676                 } else {
5677                         key.type = BTRFS_DIR_ITEM_KEY;
5678                         key.offset = btrfs_name_hash(namebuf, name_len);
5679                 }
5680
5681                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5682                                          name_len, filetype);
5683                 /* find_dir_item may find index */
5684                 if (key.type == BTRFS_DIR_INDEX_KEY)
5685                         index = key.offset;
5686 next:
5687
5688                 if (tmp_err && repair) {
5689                         ret = repair_dir_item(root, di_key->objectid,
5690                                               location.objectid, index,
5691                                               imode_to_type(mode), namebuf,
5692                                               name_len, tmp_err);
5693                         if (ret != tmp_err) {
5694                                 need_research = 1;
5695                                 goto begin;
5696                         }
5697                 }
5698                 btrfs_release_path(path);
5699                 print_dir_item_err(root, di_key, location.objectid, index,
5700                                    namebuf, name_len, filetype, tmp_err);
5701                 err |= tmp_err;
5702                 len = sizeof(*di) + name_len + data_len;
5703                 di = (struct btrfs_dir_item *)((char *)di + len);
5704                 cur += len;
5705
5706                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5707                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5708                               root->objectid, di_key->objectid,
5709                               di_key->offset);
5710                         break;
5711                 }
5712         }
5713 out:
5714         /* research path */
5715         btrfs_release_path(path);
5716         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5717         if (ret)
5718                 err |= ret > 0 ? -ENOENT : ret;
5719         return err;
5720 }
5721
5722 /*
5723  * Wrapper function of btrfs_punch_hole.
5724  *
5725  * Returns 0 means success.
5726  * Returns not 0 means error.
5727  */
5728 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5729                              u64 len)
5730 {
5731         struct btrfs_trans_handle *trans;
5732         int ret = 0;
5733
5734         trans = btrfs_start_transaction(root, 1);
5735         if (IS_ERR(trans))
5736                 return PTR_ERR(trans);
5737
5738         ret = btrfs_punch_hole(trans, root, ino, start, len);
5739         if (ret)
5740                 error("failed to add hole [%llu, %llu] in inode [%llu]",
5741                       start, len, ino);
5742         else
5743                 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5744                        ino);
5745
5746         btrfs_commit_transaction(trans, root);
5747         return ret;
5748 }
5749
5750 /*
5751  * Check file extent datasum/hole, update the size of the file extents,
5752  * check and update the last offset of the file extent.
5753  *
5754  * @root:       the root of fs/file tree.
5755  * @fkey:       the key of the file extent.
5756  * @nodatasum:  INODE_NODATASUM feature.
5757  * @size:       the sum of all EXTENT_DATA items size for this inode.
5758  * @end:        the offset of the last extent.
5759  *
5760  * Return 0 if no error occurred.
5761  */
5762 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5763                              struct extent_buffer *node, int slot,
5764                              unsigned int nodatasum, u64 *size, u64 *end)
5765 {
5766         struct btrfs_file_extent_item *fi;
5767         u64 disk_bytenr;
5768         u64 disk_num_bytes;
5769         u64 extent_num_bytes;
5770         u64 extent_offset;
5771         u64 csum_found;         /* In byte size, sectorsize aligned */
5772         u64 search_start;       /* Logical range start we search for csum */
5773         u64 search_len;         /* Logical range len we search for csum */
5774         unsigned int extent_type;
5775         unsigned int is_hole;
5776         int compressed = 0;
5777         int ret;
5778         int err = 0;
5779
5780         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5781
5782         /* Check inline extent */
5783         extent_type = btrfs_file_extent_type(node, fi);
5784         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5785                 struct btrfs_item *e = btrfs_item_nr(slot);
5786                 u32 item_inline_len;
5787
5788                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5789                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5790                 compressed = btrfs_file_extent_compression(node, fi);
5791                 if (extent_num_bytes == 0) {
5792                         error(
5793                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5794                                 root->objectid, fkey->objectid, fkey->offset);
5795                         err |= FILE_EXTENT_ERROR;
5796                 }
5797                 if (!compressed && extent_num_bytes != item_inline_len) {
5798                         error(
5799                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5800                                 root->objectid, fkey->objectid, fkey->offset,
5801                                 extent_num_bytes, item_inline_len);
5802                         err |= FILE_EXTENT_ERROR;
5803                 }
5804                 *end += extent_num_bytes;
5805                 *size += extent_num_bytes;
5806                 return err;
5807         }
5808
5809         /* Check extent type */
5810         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5811                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5812                 err |= FILE_EXTENT_ERROR;
5813                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5814                       root->objectid, fkey->objectid, fkey->offset);
5815                 return err;
5816         }
5817
5818         /* Check REG_EXTENT/PREALLOC_EXTENT */
5819         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5820         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5821         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5822         extent_offset = btrfs_file_extent_offset(node, fi);
5823         compressed = btrfs_file_extent_compression(node, fi);
5824         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5825
5826         /*
5827          * Check EXTENT_DATA csum
5828          *
5829          * For plain (uncompressed) extent, we should only check the range
5830          * we're referring to, as it's possible that part of prealloc extent
5831          * has been written, and has csum:
5832          *
5833          * |<--- Original large preallocated extent A ---->|
5834          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5835          *      No csum                         Has csum
5836          *
5837          * For compressed extent, we should check the whole range.
5838          */
5839         if (!compressed) {
5840                 search_start = disk_bytenr + extent_offset;
5841                 search_len = extent_num_bytes;
5842         } else {
5843                 search_start = disk_bytenr;
5844                 search_len = disk_num_bytes;
5845         }
5846         ret = count_csum_range(root, search_start, search_len, &csum_found);
5847         if (csum_found > 0 && nodatasum) {
5848                 err |= ODD_CSUM_ITEM;
5849                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5850                       root->objectid, fkey->objectid, fkey->offset);
5851         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5852                    !is_hole && (ret < 0 || csum_found < search_len)) {
5853                 err |= CSUM_ITEM_MISSING;
5854                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5855                       root->objectid, fkey->objectid, fkey->offset,
5856                       csum_found, search_len);
5857         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5858                 err |= ODD_CSUM_ITEM;
5859                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5860                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5861         }
5862
5863         /* Check EXTENT_DATA hole */
5864         if (!no_holes && *end != fkey->offset) {
5865                 if (repair)
5866                         ret = punch_extent_hole(root, fkey->objectid,
5867                                                 *end, fkey->offset - *end);
5868                 if (!repair || ret) {
5869                         err |= FILE_EXTENT_ERROR;
5870                         error("root %llu EXTENT_DATA[%llu %llu] interrupt",
5871                               root->objectid, fkey->objectid, fkey->offset);
5872                 }
5873         }
5874
5875         *end += extent_num_bytes;
5876         if (!is_hole)
5877                 *size += extent_num_bytes;
5878
5879         return err;
5880 }
5881
5882 /*
5883  * Set inode item nbytes to @nbytes
5884  *
5885  * Returns  0     on success
5886  * Returns  != 0  on error
5887  */
5888 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5889                                       struct btrfs_path *path,
5890                                       u64 ino, u64 nbytes)
5891 {
5892         struct btrfs_trans_handle *trans;
5893         struct btrfs_inode_item *ii;
5894         struct btrfs_key key;
5895         struct btrfs_key research_key;
5896         int err = 0;
5897         int ret;
5898
5899         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5900
5901         key.objectid = ino;
5902         key.type = BTRFS_INODE_ITEM_KEY;
5903         key.offset = 0;
5904
5905         trans = btrfs_start_transaction(root, 1);
5906         if (IS_ERR(trans)) {
5907                 ret = PTR_ERR(trans);
5908                 err |= ret;
5909                 goto out;
5910         }
5911
5912         btrfs_release_path(path);
5913         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5914         if (ret > 0)
5915                 ret = -ENOENT;
5916         if (ret) {
5917                 err |= ret;
5918                 goto fail;
5919         }
5920
5921         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5922                             struct btrfs_inode_item);
5923         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5924         btrfs_mark_buffer_dirty(path->nodes[0]);
5925 fail:
5926         btrfs_commit_transaction(trans, root);
5927 out:
5928         if (ret)
5929                 error("failed to set nbytes in inode %llu root %llu",
5930                       ino, root->root_key.objectid);
5931         else
5932                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5933                        root->root_key.objectid, nbytes);
5934
5935         /* research path */
5936         btrfs_release_path(path);
5937         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5938         err |= ret;
5939
5940         return err;
5941 }
5942
5943 /*
5944  * Set directory inode isize to @isize.
5945  *
5946  * Returns 0     on success.
5947  * Returns != 0  on error.
5948  */
5949 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5950                                    struct btrfs_path *path,
5951                                    u64 ino, u64 isize)
5952 {
5953         struct btrfs_trans_handle *trans;
5954         struct btrfs_inode_item *ii;
5955         struct btrfs_key key;
5956         struct btrfs_key research_key;
5957         int ret;
5958         int err = 0;
5959
5960         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5961
5962         key.objectid = ino;
5963         key.type = BTRFS_INODE_ITEM_KEY;
5964         key.offset = 0;
5965
5966         trans = btrfs_start_transaction(root, 1);
5967         if (IS_ERR(trans)) {
5968                 ret = PTR_ERR(trans);
5969                 err |= ret;
5970                 goto out;
5971         }
5972
5973         btrfs_release_path(path);
5974         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5975         if (ret > 0)
5976                 ret = -ENOENT;
5977         if (ret) {
5978                 err |= ret;
5979                 goto fail;
5980         }
5981
5982         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5983                             struct btrfs_inode_item);
5984         btrfs_set_inode_size(path->nodes[0], ii, isize);
5985         btrfs_mark_buffer_dirty(path->nodes[0]);
5986 fail:
5987         btrfs_commit_transaction(trans, root);
5988 out:
5989         if (ret)
5990                 error("failed to set isize in inode %llu root %llu",
5991                       ino, root->root_key.objectid);
5992         else
5993                 printf("Set isize in inode %llu root %llu to %llu\n",
5994                        ino, root->root_key.objectid, isize);
5995
5996         btrfs_release_path(path);
5997         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5998         err |= ret;
5999
6000         return err;
6001 }
6002
6003 /*
6004  * Wrapper function for btrfs_add_orphan_item().
6005  *
6006  * Returns 0     on success.
6007  * Returns != 0  on error.
6008  */
6009 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
6010                                            struct btrfs_path *path, u64 ino)
6011 {
6012         struct btrfs_trans_handle *trans;
6013         struct btrfs_key research_key;
6014         int ret;
6015         int err = 0;
6016
6017         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
6018
6019         trans = btrfs_start_transaction(root, 1);
6020         if (IS_ERR(trans)) {
6021                 ret = PTR_ERR(trans);
6022                 err |= ret;
6023                 goto out;
6024         }
6025
6026         btrfs_release_path(path);
6027         ret = btrfs_add_orphan_item(trans, root, path, ino);
6028         err |= ret;
6029         btrfs_commit_transaction(trans, root);
6030 out:
6031         if (ret)
6032                 error("failed to add inode %llu as orphan item root %llu",
6033                       ino, root->root_key.objectid);
6034         else
6035                 printf("Added inode %llu as orphan item root %llu\n",
6036                        ino, root->root_key.objectid);
6037
6038         btrfs_release_path(path);
6039         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
6040         err |= ret;
6041
6042         return err;
6043 }
6044
6045 /* Set inode_item nlink to @ref_count.
6046  * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
6047  *
6048  * Returns 0 on success
6049  */
6050 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
6051                                       struct btrfs_path *path, u64 ino,
6052                                       const char *name, u32 namelen,
6053                                       u64 ref_count, u8 filetype, u64 *nlink)
6054 {
6055         struct btrfs_trans_handle *trans;
6056         struct btrfs_inode_item *ii;
6057         struct btrfs_key key;
6058         struct btrfs_key old_key;
6059         char namebuf[BTRFS_NAME_LEN] = {0};
6060         int name_len;
6061         int ret;
6062         int ret2;
6063
6064         /* save the key */
6065         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
6066
6067         if (name && namelen) {
6068                 ASSERT(namelen <= BTRFS_NAME_LEN);
6069                 memcpy(namebuf, name, namelen);
6070                 name_len = namelen;
6071         } else {
6072                 sprintf(namebuf, "%llu", ino);
6073                 name_len = count_digits(ino);
6074                 printf("Can't find file name for inode %llu, use %s instead\n",
6075                        ino, namebuf);
6076         }
6077
6078         trans = btrfs_start_transaction(root, 1);
6079         if (IS_ERR(trans)) {
6080                 ret = PTR_ERR(trans);
6081                 goto out;
6082         }
6083
6084         btrfs_release_path(path);
6085         /* if refs is 0, put it into lostfound */
6086         if (ref_count == 0) {
6087                 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
6088                                               name_len, filetype, &ref_count);
6089                 if (ret)
6090                         goto fail;
6091         }
6092
6093         /* reset inode_item's nlink to ref_count */
6094         key.objectid = ino;
6095         key.type = BTRFS_INODE_ITEM_KEY;
6096         key.offset = 0;
6097
6098         btrfs_release_path(path);
6099         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6100         if (ret > 0)
6101                 ret = -ENOENT;
6102         if (ret)
6103                 goto fail;
6104
6105         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
6106                             struct btrfs_inode_item);
6107         btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
6108         btrfs_mark_buffer_dirty(path->nodes[0]);
6109
6110         if (nlink)
6111                 *nlink = ref_count;
6112 fail:
6113         btrfs_commit_transaction(trans, root);
6114 out:
6115         if (ret)
6116                 error(
6117         "fail to repair nlink of inode %llu root %llu name %s filetype %u",
6118                        root->objectid, ino, namebuf, filetype);
6119         else
6120                 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
6121                        root->objectid, ino, namebuf, filetype);
6122
6123         /* research */
6124         btrfs_release_path(path);
6125         ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
6126         if (ret2 < 0)
6127                 return ret |= ret2;
6128         return ret;
6129 }
6130
6131 /*
6132  * Check INODE_ITEM and related ITEMs (the same inode number)
6133  * 1. check link count
6134  * 2. check inode ref/extref
6135  * 3. check dir item/index
6136  *
6137  * @ext_ref:    the EXTENDED_IREF feature
6138  *
6139  * Return 0 if no error occurred.
6140  * Return >0 for error or hit the traversal is done(by error bitmap)
6141  */
6142 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
6143                             unsigned int ext_ref)
6144 {
6145         struct extent_buffer *node;
6146         struct btrfs_inode_item *ii;
6147         struct btrfs_key key;
6148         struct btrfs_key last_key;
6149         u64 inode_id;
6150         u32 mode;
6151         u64 nlink;
6152         u64 nbytes;
6153         u64 isize;
6154         u64 size = 0;
6155         u64 refs = 0;
6156         u64 extent_end = 0;
6157         u64 extent_size = 0;
6158         unsigned int dir;
6159         unsigned int nodatasum;
6160         int slot;
6161         int ret;
6162         int err = 0;
6163         char namebuf[BTRFS_NAME_LEN] = {0};
6164         u32 name_len = 0;
6165
6166         node = path->nodes[0];
6167         slot = path->slots[0];
6168
6169         btrfs_item_key_to_cpu(node, &key, slot);
6170         inode_id = key.objectid;
6171
6172         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
6173                 ret = btrfs_next_item(root, path);
6174                 if (ret > 0)
6175                         err |= LAST_ITEM;
6176                 return err;
6177         }
6178
6179         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
6180         isize = btrfs_inode_size(node, ii);
6181         nbytes = btrfs_inode_nbytes(node, ii);
6182         mode = btrfs_inode_mode(node, ii);
6183         dir = imode_to_type(mode) == BTRFS_FT_DIR;
6184         nlink = btrfs_inode_nlink(node, ii);
6185         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
6186
6187         while (1) {
6188                 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
6189                 ret = btrfs_next_item(root, path);
6190                 if (ret < 0) {
6191                         /* out will fill 'err' rusing current statistics */
6192                         goto out;
6193                 } else if (ret > 0) {
6194                         err |= LAST_ITEM;
6195                         goto out;
6196                 }
6197
6198                 node = path->nodes[0];
6199                 slot = path->slots[0];
6200                 btrfs_item_key_to_cpu(node, &key, slot);
6201                 if (key.objectid != inode_id)
6202                         goto out;
6203
6204                 switch (key.type) {
6205                 case BTRFS_INODE_REF_KEY:
6206                         ret = check_inode_ref(root, &key, path, namebuf,
6207                                               &name_len, &refs, mode);
6208                         err |= ret;
6209                         break;
6210                 case BTRFS_INODE_EXTREF_KEY:
6211                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
6212                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
6213                                         root->objectid, key.objectid,
6214                                         key.offset);
6215                         ret = check_inode_extref(root, &key, node, slot, &refs,
6216                                                  mode);
6217                         err |= ret;
6218                         break;
6219                 case BTRFS_DIR_ITEM_KEY:
6220                 case BTRFS_DIR_INDEX_KEY:
6221                         if (!dir) {
6222                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
6223                                         root->objectid, inode_id,
6224                                         imode_to_type(mode), key.objectid,
6225                                         key.offset);
6226                         }
6227                         ret = check_dir_item(root, &key, path, &size, ext_ref);
6228                         err |= ret;
6229                         break;
6230                 case BTRFS_EXTENT_DATA_KEY:
6231                         if (dir) {
6232                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
6233                                         root->objectid, inode_id, key.objectid,
6234                                         key.offset);
6235                         }
6236                         ret = check_file_extent(root, &key, node, slot,
6237                                                 nodatasum, &extent_size,
6238                                                 &extent_end);
6239                         err |= ret;
6240                         break;
6241                 case BTRFS_XATTR_ITEM_KEY:
6242                         break;
6243                 default:
6244                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
6245                               key.objectid, key.type, key.offset);
6246                 }
6247         }
6248
6249 out:
6250         if (err & LAST_ITEM) {
6251                 btrfs_release_path(path);
6252                 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
6253                 if (ret)
6254                         return err;
6255         }
6256
6257         /* verify INODE_ITEM nlink/isize/nbytes */
6258         if (dir) {
6259                 if (repair && (err & DIR_COUNT_AGAIN)) {
6260                         err &= ~DIR_COUNT_AGAIN;
6261                         count_dir_isize(root, inode_id, &size);
6262                 }
6263
6264                 if ((nlink != 1 || refs != 1) && repair) {
6265                         ret = repair_inode_nlinks_lowmem(root, path, inode_id,
6266                                 namebuf, name_len, refs, imode_to_type(mode),
6267                                 &nlink);
6268                 }
6269
6270                 if (nlink != 1) {
6271                         err |= LINK_COUNT_ERROR;
6272                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
6273                               root->objectid, inode_id, nlink);
6274                 }
6275
6276                 /*
6277                  * Just a warning, as dir inode nbytes is just an
6278                  * instructive value.
6279                  */
6280                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
6281                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
6282                                 root->objectid, inode_id,
6283                                 root->fs_info->nodesize);
6284                 }
6285
6286                 if (isize != size) {
6287                         if (repair)
6288                                 ret = repair_dir_isize_lowmem(root, path,
6289                                                               inode_id, size);
6290                         if (!repair || ret) {
6291                                 err |= ISIZE_ERROR;
6292                                 error(
6293                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
6294                                       root->objectid, inode_id, isize, size);
6295                         }
6296                 }
6297         } else {
6298                 if (nlink != refs) {
6299                         if (repair)
6300                                 ret = repair_inode_nlinks_lowmem(root, path,
6301                                          inode_id, namebuf, name_len, refs,
6302                                          imode_to_type(mode), &nlink);
6303                         if (!repair || ret) {
6304                                 err |= LINK_COUNT_ERROR;
6305                                 error(
6306                 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
6307                                       root->objectid, inode_id, nlink, refs);
6308                         }
6309                 } else if (!nlink) {
6310                         if (repair)
6311                                 ret = repair_inode_orphan_item_lowmem(root,
6312                                                               path, inode_id);
6313                         if (!repair || ret) {
6314                                 err |= ORPHAN_ITEM;
6315                                 error("root %llu INODE[%llu] is orphan item",
6316                                       root->objectid, inode_id);
6317                         }
6318                 }
6319
6320                 if (!nbytes && !no_holes && extent_end < isize) {
6321                         if (repair)
6322                                 ret = punch_extent_hole(root, inode_id,
6323                                                 extent_end, isize - extent_end);
6324                         if (!repair || ret) {
6325                                 err |= NBYTES_ERROR;
6326                                 error(
6327         "root %llu INODE[%llu] size %llu should have a file extent hole",
6328                                       root->objectid, inode_id, isize);
6329                         }
6330                 }
6331
6332                 if (nbytes != extent_size) {
6333                         if (repair)
6334                                 ret = repair_inode_nbytes_lowmem(root, path,
6335                                                          inode_id, extent_size);
6336                         if (!repair || ret) {
6337                                 err |= NBYTES_ERROR;
6338                                 error(
6339         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6340                                       root->objectid, inode_id, nbytes,
6341                                       extent_size);
6342                         }
6343                 }
6344         }
6345
6346         if (err & LAST_ITEM)
6347                 btrfs_next_item(root, path);
6348         return err;
6349 }
6350
6351 /*
6352  * Insert the missing inode item and inode ref.
6353  *
6354  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6355  * Root dir should be handled specially because root dir is the root of fs.
6356  *
6357  * returns err (>0 or 0) after repair
6358  */
6359 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6360 {
6361         struct btrfs_trans_handle *trans;
6362         struct btrfs_key key;
6363         struct btrfs_path path;
6364         int filetype = BTRFS_FT_DIR;
6365         int ret = 0;
6366
6367         btrfs_init_path(&path);
6368
6369         if (err & INODE_REF_MISSING) {
6370                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6371                 key.type = BTRFS_INODE_REF_KEY;
6372                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6373
6374                 trans = btrfs_start_transaction(root, 1);
6375                 if (IS_ERR(trans)) {
6376                         ret = PTR_ERR(trans);
6377                         goto out;
6378                 }
6379
6380                 btrfs_release_path(&path);
6381                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6382                 if (ret)
6383                         goto trans_fail;
6384
6385                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6386                                              BTRFS_FIRST_FREE_OBJECTID,
6387                                              BTRFS_FIRST_FREE_OBJECTID, 0);
6388                 if (ret)
6389                         goto trans_fail;
6390
6391                 printf("Add INODE_REF[%llu %llu] name %s\n",
6392                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6393                        "..");
6394                 err &= ~INODE_REF_MISSING;
6395 trans_fail:
6396                 if (ret)
6397                         error("fail to insert first inode's ref");
6398                 btrfs_commit_transaction(trans, root);
6399         }
6400
6401         if (err & INODE_ITEM_MISSING) {
6402                 ret = repair_inode_item_missing(root,
6403                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
6404                 if (ret)
6405                         goto out;
6406                 err &= ~INODE_ITEM_MISSING;
6407         }
6408 out:
6409         if (ret)
6410                 error("fail to repair first inode");
6411         btrfs_release_path(&path);
6412         return err;
6413 }
6414
6415 /*
6416  * check first root dir's inode_item and inode_ref
6417  *
6418  * returns 0 means no error
6419  * returns >0 means error
6420  * returns <0 means fatal error
6421  */
6422 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6423 {
6424         struct btrfs_path path;
6425         struct btrfs_key key;
6426         struct btrfs_inode_item *ii;
6427         u64 index;
6428         u32 mode;
6429         int err = 0;
6430         int ret;
6431
6432         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6433         key.type = BTRFS_INODE_ITEM_KEY;
6434         key.offset = 0;
6435
6436         /* For root being dropped, we don't need to check first inode */
6437         if (btrfs_root_refs(&root->root_item) == 0 &&
6438             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6439             BTRFS_FIRST_FREE_OBJECTID)
6440                 return 0;
6441
6442         btrfs_init_path(&path);
6443         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6444         if (ret < 0)
6445                 goto out;
6446         if (ret > 0) {
6447                 ret = 0;
6448                 err |= INODE_ITEM_MISSING;
6449         } else {
6450                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6451                                     struct btrfs_inode_item);
6452                 mode = btrfs_inode_mode(path.nodes[0], ii);
6453                 if (imode_to_type(mode) != BTRFS_FT_DIR)
6454                         err |= INODE_ITEM_MISMATCH;
6455         }
6456
6457         /* lookup first inode ref */
6458         key.offset = BTRFS_FIRST_FREE_OBJECTID;
6459         key.type = BTRFS_INODE_REF_KEY;
6460         /* special index value */
6461         index = 0;
6462
6463         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6464         if (ret < 0)
6465                 goto out;
6466         err |= ret;
6467
6468 out:
6469         btrfs_release_path(&path);
6470
6471         if (err && repair)
6472                 err = repair_fs_first_inode(root, err);
6473
6474         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6475                 error("root dir INODE_ITEM is %s",
6476                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6477         if (err & INODE_REF_MISSING)
6478                 error("root dir INODE_REF is missing");
6479
6480         return ret < 0 ? ret : err;
6481 }
6482
6483 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6484                                                 u64 parent, u64 root)
6485 {
6486         struct rb_node *node;
6487         struct tree_backref *back = NULL;
6488         struct tree_backref match = {
6489                 .node = {
6490                         .is_data = 0,
6491                 },
6492         };
6493
6494         if (parent) {
6495                 match.parent = parent;
6496                 match.node.full_backref = 1;
6497         } else {
6498                 match.root = root;
6499         }
6500
6501         node = rb_search(&rec->backref_tree, &match.node.node,
6502                          (rb_compare_keys)compare_extent_backref, NULL);
6503         if (node)
6504                 back = to_tree_backref(rb_node_to_extent_backref(node));
6505
6506         return back;
6507 }
6508
6509 static struct data_backref *find_data_backref(struct extent_record *rec,
6510                                                 u64 parent, u64 root,
6511                                                 u64 owner, u64 offset,
6512                                                 int found_ref,
6513                                                 u64 disk_bytenr, u64 bytes)
6514 {
6515         struct rb_node *node;
6516         struct data_backref *back = NULL;
6517         struct data_backref match = {
6518                 .node = {
6519                         .is_data = 1,
6520                 },
6521                 .owner = owner,
6522                 .offset = offset,
6523                 .bytes = bytes,
6524                 .found_ref = found_ref,
6525                 .disk_bytenr = disk_bytenr,
6526         };
6527
6528         if (parent) {
6529                 match.parent = parent;
6530                 match.node.full_backref = 1;
6531         } else {
6532                 match.root = root;
6533         }
6534
6535         node = rb_search(&rec->backref_tree, &match.node.node,
6536                          (rb_compare_keys)compare_extent_backref, NULL);
6537         if (node)
6538                 back = to_data_backref(rb_node_to_extent_backref(node));
6539
6540         return back;
6541 }
6542 /*
6543  * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6544  * blocks and integrity of fs tree items.
6545  *
6546  * @root:         the root of the tree to be checked.
6547  * @ext_ref       feature EXTENDED_IREF is enable or not.
6548  * @account       if NOT 0 means check the tree (including tree)'s treeblocks.
6549  *                otherwise means check fs tree(s) items relationship and
6550  *                @root MUST be a fs tree root.
6551  * Returns 0      represents OK.
6552  * Returns not 0  represents error.
6553  */
6554 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6555                             struct btrfs_root *root, unsigned int ext_ref,
6556                             int check_all)
6557
6558 {
6559         struct btrfs_path path;
6560         struct node_refs nrefs;
6561         struct btrfs_root_item *root_item = &root->root_item;
6562         int ret;
6563         int level;
6564         int err = 0;
6565
6566         memset(&nrefs, 0, sizeof(nrefs));
6567         if (!check_all) {
6568                 /*
6569                  * We need to manually check the first inode item (256)
6570                  * As the following traversal function will only start from
6571                  * the first inode item in the leaf, if inode item (256) is
6572                  * missing we will skip it forever.
6573                  */
6574                 ret = check_fs_first_inode(root, ext_ref);
6575                 if (ret < 0)
6576                         return ret;
6577         }
6578
6579
6580         level = btrfs_header_level(root->node);
6581         btrfs_init_path(&path);
6582
6583         if (btrfs_root_refs(root_item) > 0 ||
6584             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6585                 path.nodes[level] = root->node;
6586                 path.slots[level] = 0;
6587                 extent_buffer_get(root->node);
6588         } else {
6589                 struct btrfs_key key;
6590
6591                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6592                 level = root_item->drop_level;
6593                 path.lowest_level = level;
6594                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6595                 if (ret < 0)
6596                         goto out;
6597                 ret = 0;
6598         }
6599
6600         while (1) {
6601                 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6602                                         ext_ref, check_all);
6603
6604                 err |= !!ret;
6605
6606                 /* if ret is negative, walk shall stop */
6607                 if (ret < 0) {
6608                         ret = err;
6609                         break;
6610                 }
6611
6612                 ret = walk_up_tree_v2(root, &path, &level);
6613                 if (ret != 0) {
6614                         /* Normal exit, reset ret to err */
6615                         ret = err;
6616                         break;
6617                 }
6618         }
6619
6620 out:
6621         btrfs_release_path(&path);
6622         return ret;
6623 }
6624
6625 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info);
6626
6627 /*
6628  * Iterate all items in the tree and call check_inode_item() to check.
6629  *
6630  * @root:       the root of the tree to be checked.
6631  * @ext_ref:    the EXTENDED_IREF feature
6632  *
6633  * Return 0 if no error found.
6634  * Return <0 for error.
6635  */
6636 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6637 {
6638         reset_cached_block_groups(root->fs_info);
6639         return check_btrfs_root(NULL, root, ext_ref, 0);
6640 }
6641
6642 /*
6643  * Find the relative ref for root_ref and root_backref.
6644  *
6645  * @root:       the root of the root tree.
6646  * @ref_key:    the key of the root ref.
6647  *
6648  * Return 0 if no error occurred.
6649  */
6650 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6651                           struct extent_buffer *node, int slot)
6652 {
6653         struct btrfs_path path;
6654         struct btrfs_key key;
6655         struct btrfs_root_ref *ref;
6656         struct btrfs_root_ref *backref;
6657         char ref_name[BTRFS_NAME_LEN] = {0};
6658         char backref_name[BTRFS_NAME_LEN] = {0};
6659         u64 ref_dirid;
6660         u64 ref_seq;
6661         u32 ref_namelen;
6662         u64 backref_dirid;
6663         u64 backref_seq;
6664         u32 backref_namelen;
6665         u32 len;
6666         int ret;
6667         int err = 0;
6668
6669         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6670         ref_dirid = btrfs_root_ref_dirid(node, ref);
6671         ref_seq = btrfs_root_ref_sequence(node, ref);
6672         ref_namelen = btrfs_root_ref_name_len(node, ref);
6673
6674         if (ref_namelen <= BTRFS_NAME_LEN) {
6675                 len = ref_namelen;
6676         } else {
6677                 len = BTRFS_NAME_LEN;
6678                 warning("%s[%llu %llu] ref_name too long",
6679                         ref_key->type == BTRFS_ROOT_REF_KEY ?
6680                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6681                         ref_key->offset);
6682         }
6683         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6684
6685         /* Find relative root_ref */
6686         key.objectid = ref_key->offset;
6687         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6688         key.offset = ref_key->objectid;
6689
6690         btrfs_init_path(&path);
6691         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6692         if (ret) {
6693                 err |= ROOT_REF_MISSING;
6694                 error("%s[%llu %llu] couldn't find relative ref",
6695                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6696                       "ROOT_REF" : "ROOT_BACKREF",
6697                       ref_key->objectid, ref_key->offset);
6698                 goto out;
6699         }
6700
6701         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6702                                  struct btrfs_root_ref);
6703         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6704         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6705         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6706
6707         if (backref_namelen <= BTRFS_NAME_LEN) {
6708                 len = backref_namelen;
6709         } else {
6710                 len = BTRFS_NAME_LEN;
6711                 warning("%s[%llu %llu] ref_name too long",
6712                         key.type == BTRFS_ROOT_REF_KEY ?
6713                         "ROOT_REF" : "ROOT_BACKREF",
6714                         key.objectid, key.offset);
6715         }
6716         read_extent_buffer(path.nodes[0], backref_name,
6717                            (unsigned long)(backref + 1), len);
6718
6719         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6720             ref_namelen != backref_namelen ||
6721             strncmp(ref_name, backref_name, len)) {
6722                 err |= ROOT_REF_MISMATCH;
6723                 error("%s[%llu %llu] mismatch relative ref",
6724                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6725                       "ROOT_REF" : "ROOT_BACKREF",
6726                       ref_key->objectid, ref_key->offset);
6727         }
6728 out:
6729         btrfs_release_path(&path);
6730         return err;
6731 }
6732
6733 /*
6734  * Check all fs/file tree in low_memory mode.
6735  *
6736  * 1. for fs tree root item, call check_fs_root_v2()
6737  * 2. for fs tree root ref/backref, call check_root_ref()
6738  *
6739  * Return 0 if no error occurred.
6740  */
6741 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6742 {
6743         struct btrfs_root *tree_root = fs_info->tree_root;
6744         struct btrfs_root *cur_root = NULL;
6745         struct btrfs_path path;
6746         struct btrfs_key key;
6747         struct extent_buffer *node;
6748         unsigned int ext_ref;
6749         int slot;
6750         int ret;
6751         int err = 0;
6752
6753         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6754
6755         btrfs_init_path(&path);
6756         key.objectid = BTRFS_FS_TREE_OBJECTID;
6757         key.offset = 0;
6758         key.type = BTRFS_ROOT_ITEM_KEY;
6759
6760         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6761         if (ret < 0) {
6762                 err = ret;
6763                 goto out;
6764         } else if (ret > 0) {
6765                 err = -ENOENT;
6766                 goto out;
6767         }
6768
6769         while (1) {
6770                 node = path.nodes[0];
6771                 slot = path.slots[0];
6772                 btrfs_item_key_to_cpu(node, &key, slot);
6773                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6774                         goto out;
6775                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6776                     fs_root_objectid(key.objectid)) {
6777                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6778                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6779                                                                        &key);
6780                         } else {
6781                                 key.offset = (u64)-1;
6782                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6783                         }
6784
6785                         if (IS_ERR(cur_root)) {
6786                                 error("Fail to read fs/subvol tree: %lld",
6787                                       key.objectid);
6788                                 err = -EIO;
6789                                 goto next;
6790                         }
6791
6792                         ret = check_fs_root_v2(cur_root, ext_ref);
6793                         err |= ret;
6794
6795                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6796                                 btrfs_free_fs_root(cur_root);
6797                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6798                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6799                         ret = check_root_ref(tree_root, &key, node, slot);
6800                         err |= ret;
6801                 }
6802 next:
6803                 ret = btrfs_next_item(tree_root, &path);
6804                 if (ret > 0)
6805                         goto out;
6806                 if (ret < 0) {
6807                         err = ret;
6808                         goto out;
6809                 }
6810         }
6811
6812 out:
6813         btrfs_release_path(&path);
6814         return err;
6815 }
6816
6817 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6818                           struct cache_tree *root_cache)
6819 {
6820         int ret;
6821
6822         if (!ctx.progress_enabled)
6823                 fprintf(stderr, "checking fs roots\n");
6824         if (check_mode == CHECK_MODE_LOWMEM)
6825                 ret = check_fs_roots_v2(fs_info);
6826         else
6827                 ret = check_fs_roots(fs_info, root_cache);
6828
6829         return ret;
6830 }
6831
6832 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6833 {
6834         struct extent_backref *back, *tmp;
6835         struct tree_backref *tback;
6836         struct data_backref *dback;
6837         u64 found = 0;
6838         int err = 0;
6839
6840         rbtree_postorder_for_each_entry_safe(back, tmp,
6841                                              &rec->backref_tree, node) {
6842                 if (!back->found_extent_tree) {
6843                         err = 1;
6844                         if (!print_errs)
6845                                 goto out;
6846                         if (back->is_data) {
6847                                 dback = to_data_backref(back);
6848                                 fprintf(stderr, "Data backref %llu %s %llu"
6849                                         " owner %llu offset %llu num_refs %lu"
6850                                         " not found in extent tree\n",
6851                                         (unsigned long long)rec->start,
6852                                         back->full_backref ?
6853                                         "parent" : "root",
6854                                         back->full_backref ?
6855                                         (unsigned long long)dback->parent:
6856                                         (unsigned long long)dback->root,
6857                                         (unsigned long long)dback->owner,
6858                                         (unsigned long long)dback->offset,
6859                                         (unsigned long)dback->num_refs);
6860                         } else {
6861                                 tback = to_tree_backref(back);
6862                                 fprintf(stderr, "Tree backref %llu parent %llu"
6863                                         " root %llu not found in extent tree\n",
6864                                         (unsigned long long)rec->start,
6865                                         (unsigned long long)tback->parent,
6866                                         (unsigned long long)tback->root);
6867                         }
6868                 }
6869                 if (!back->is_data && !back->found_ref) {
6870                         err = 1;
6871                         if (!print_errs)
6872                                 goto out;
6873                         tback = to_tree_backref(back);
6874                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6875                                 (unsigned long long)rec->start,
6876                                 back->full_backref ? "parent" : "root",
6877                                 back->full_backref ?
6878                                 (unsigned long long)tback->parent :
6879                                 (unsigned long long)tback->root, back);
6880                 }
6881                 if (back->is_data) {
6882                         dback = to_data_backref(back);
6883                         if (dback->found_ref != dback->num_refs) {
6884                                 err = 1;
6885                                 if (!print_errs)
6886                                         goto out;
6887                                 fprintf(stderr, "Incorrect local backref count"
6888                                         " on %llu %s %llu owner %llu"
6889                                         " offset %llu found %u wanted %u back %p\n",
6890                                         (unsigned long long)rec->start,
6891                                         back->full_backref ?
6892                                         "parent" : "root",
6893                                         back->full_backref ?
6894                                         (unsigned long long)dback->parent:
6895                                         (unsigned long long)dback->root,
6896                                         (unsigned long long)dback->owner,
6897                                         (unsigned long long)dback->offset,
6898                                         dback->found_ref, dback->num_refs, back);
6899                         }
6900                         if (dback->disk_bytenr != rec->start) {
6901                                 err = 1;
6902                                 if (!print_errs)
6903                                         goto out;
6904                                 fprintf(stderr, "Backref disk bytenr does not"
6905                                         " match extent record, bytenr=%llu, "
6906                                         "ref bytenr=%llu\n",
6907                                         (unsigned long long)rec->start,
6908                                         (unsigned long long)dback->disk_bytenr);
6909                         }
6910
6911                         if (dback->bytes != rec->nr) {
6912                                 err = 1;
6913                                 if (!print_errs)
6914                                         goto out;
6915                                 fprintf(stderr, "Backref bytes do not match "
6916                                         "extent backref, bytenr=%llu, ref "
6917                                         "bytes=%llu, backref bytes=%llu\n",
6918                                         (unsigned long long)rec->start,
6919                                         (unsigned long long)rec->nr,
6920                                         (unsigned long long)dback->bytes);
6921                         }
6922                 }
6923                 if (!back->is_data) {
6924                         found += 1;
6925                 } else {
6926                         dback = to_data_backref(back);
6927                         found += dback->found_ref;
6928                 }
6929         }
6930         if (found != rec->refs) {
6931                 err = 1;
6932                 if (!print_errs)
6933                         goto out;
6934                 fprintf(stderr, "Incorrect global backref count "
6935                         "on %llu found %llu wanted %llu\n",
6936                         (unsigned long long)rec->start,
6937                         (unsigned long long)found,
6938                         (unsigned long long)rec->refs);
6939         }
6940 out:
6941         return err;
6942 }
6943
6944 static void __free_one_backref(struct rb_node *node)
6945 {
6946         struct extent_backref *back = rb_node_to_extent_backref(node);
6947
6948         free(back);
6949 }
6950
6951 static void free_all_extent_backrefs(struct extent_record *rec)
6952 {
6953         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6954 }
6955
6956 static void free_extent_record_cache(struct cache_tree *extent_cache)
6957 {
6958         struct cache_extent *cache;
6959         struct extent_record *rec;
6960
6961         while (1) {
6962                 cache = first_cache_extent(extent_cache);
6963                 if (!cache)
6964                         break;
6965                 rec = container_of(cache, struct extent_record, cache);
6966                 remove_cache_extent(extent_cache, cache);
6967                 free_all_extent_backrefs(rec);
6968                 free(rec);
6969         }
6970 }
6971
6972 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6973                                  struct extent_record *rec)
6974 {
6975         if (rec->content_checked && rec->owner_ref_checked &&
6976             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6977             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6978             !rec->bad_full_backref && !rec->crossing_stripes &&
6979             !rec->wrong_chunk_type) {
6980                 remove_cache_extent(extent_cache, &rec->cache);
6981                 free_all_extent_backrefs(rec);
6982                 list_del_init(&rec->list);
6983                 free(rec);
6984         }
6985         return 0;
6986 }
6987
6988 static int check_owner_ref(struct btrfs_root *root,
6989                             struct extent_record *rec,
6990                             struct extent_buffer *buf)
6991 {
6992         struct extent_backref *node, *tmp;
6993         struct tree_backref *back;
6994         struct btrfs_root *ref_root;
6995         struct btrfs_key key;
6996         struct btrfs_path path;
6997         struct extent_buffer *parent;
6998         int level;
6999         int found = 0;
7000         int ret;
7001
7002         rbtree_postorder_for_each_entry_safe(node, tmp,
7003                                              &rec->backref_tree, node) {
7004                 if (node->is_data)
7005                         continue;
7006                 if (!node->found_ref)
7007                         continue;
7008                 if (node->full_backref)
7009                         continue;
7010                 back = to_tree_backref(node);
7011                 if (btrfs_header_owner(buf) == back->root)
7012                         return 0;
7013         }
7014         BUG_ON(rec->is_root);
7015
7016         /* try to find the block by search corresponding fs tree */
7017         key.objectid = btrfs_header_owner(buf);
7018         key.type = BTRFS_ROOT_ITEM_KEY;
7019         key.offset = (u64)-1;
7020
7021         ref_root = btrfs_read_fs_root(root->fs_info, &key);
7022         if (IS_ERR(ref_root))
7023                 return 1;
7024
7025         level = btrfs_header_level(buf);
7026         if (level == 0)
7027                 btrfs_item_key_to_cpu(buf, &key, 0);
7028         else
7029                 btrfs_node_key_to_cpu(buf, &key, 0);
7030
7031         btrfs_init_path(&path);
7032         path.lowest_level = level + 1;
7033         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
7034         if (ret < 0)
7035                 return 0;
7036
7037         parent = path.nodes[level + 1];
7038         if (parent && buf->start == btrfs_node_blockptr(parent,
7039                                                         path.slots[level + 1]))
7040                 found = 1;
7041
7042         btrfs_release_path(&path);
7043         return found ? 0 : 1;
7044 }
7045
7046 static int is_extent_tree_record(struct extent_record *rec)
7047 {
7048         struct extent_backref *node, *tmp;
7049         struct tree_backref *back;
7050         int is_extent = 0;
7051
7052         rbtree_postorder_for_each_entry_safe(node, tmp,
7053                                              &rec->backref_tree, node) {
7054                 if (node->is_data)
7055                         return 0;
7056                 back = to_tree_backref(node);
7057                 if (node->full_backref)
7058                         return 0;
7059                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
7060                         is_extent = 1;
7061         }
7062         return is_extent;
7063 }
7064
7065
7066 static int record_bad_block_io(struct btrfs_fs_info *info,
7067                                struct cache_tree *extent_cache,
7068                                u64 start, u64 len)
7069 {
7070         struct extent_record *rec;
7071         struct cache_extent *cache;
7072         struct btrfs_key key;
7073
7074         cache = lookup_cache_extent(extent_cache, start, len);
7075         if (!cache)
7076                 return 0;
7077
7078         rec = container_of(cache, struct extent_record, cache);
7079         if (!is_extent_tree_record(rec))
7080                 return 0;
7081
7082         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
7083         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
7084 }
7085
7086 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
7087                        struct extent_buffer *buf, int slot)
7088 {
7089         if (btrfs_header_level(buf)) {
7090                 struct btrfs_key_ptr ptr1, ptr2;
7091
7092                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
7093                                    sizeof(struct btrfs_key_ptr));
7094                 read_extent_buffer(buf, &ptr2,
7095                                    btrfs_node_key_ptr_offset(slot + 1),
7096                                    sizeof(struct btrfs_key_ptr));
7097                 write_extent_buffer(buf, &ptr1,
7098                                     btrfs_node_key_ptr_offset(slot + 1),
7099                                     sizeof(struct btrfs_key_ptr));
7100                 write_extent_buffer(buf, &ptr2,
7101                                     btrfs_node_key_ptr_offset(slot),
7102                                     sizeof(struct btrfs_key_ptr));
7103                 if (slot == 0) {
7104                         struct btrfs_disk_key key;
7105                         btrfs_node_key(buf, &key, 0);
7106                         btrfs_fixup_low_keys(root, path, &key,
7107                                              btrfs_header_level(buf) + 1);
7108                 }
7109         } else {
7110                 struct btrfs_item *item1, *item2;
7111                 struct btrfs_key k1, k2;
7112                 char *item1_data, *item2_data;
7113                 u32 item1_offset, item2_offset, item1_size, item2_size;
7114
7115                 item1 = btrfs_item_nr(slot);
7116                 item2 = btrfs_item_nr(slot + 1);
7117                 btrfs_item_key_to_cpu(buf, &k1, slot);
7118                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
7119                 item1_offset = btrfs_item_offset(buf, item1);
7120                 item2_offset = btrfs_item_offset(buf, item2);
7121                 item1_size = btrfs_item_size(buf, item1);
7122                 item2_size = btrfs_item_size(buf, item2);
7123
7124                 item1_data = malloc(item1_size);
7125                 if (!item1_data)
7126                         return -ENOMEM;
7127                 item2_data = malloc(item2_size);
7128                 if (!item2_data) {
7129                         free(item1_data);
7130                         return -ENOMEM;
7131                 }
7132
7133                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
7134                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
7135
7136                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
7137                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
7138                 free(item1_data);
7139                 free(item2_data);
7140
7141                 btrfs_set_item_offset(buf, item1, item2_offset);
7142                 btrfs_set_item_offset(buf, item2, item1_offset);
7143                 btrfs_set_item_size(buf, item1, item2_size);
7144                 btrfs_set_item_size(buf, item2, item1_size);
7145
7146                 path->slots[0] = slot;
7147                 btrfs_set_item_key_unsafe(root, path, &k2);
7148                 path->slots[0] = slot + 1;
7149                 btrfs_set_item_key_unsafe(root, path, &k1);
7150         }
7151         return 0;
7152 }
7153
7154 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
7155 {
7156         struct extent_buffer *buf;
7157         struct btrfs_key k1, k2;
7158         int i;
7159         int level = path->lowest_level;
7160         int ret = -EIO;
7161
7162         buf = path->nodes[level];
7163         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
7164                 if (level) {
7165                         btrfs_node_key_to_cpu(buf, &k1, i);
7166                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
7167                 } else {
7168                         btrfs_item_key_to_cpu(buf, &k1, i);
7169                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
7170                 }
7171                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
7172                         continue;
7173                 ret = swap_values(root, path, buf, i);
7174                 if (ret)
7175                         break;
7176                 btrfs_mark_buffer_dirty(buf);
7177                 i = 0;
7178         }
7179         return ret;
7180 }
7181
7182 static int delete_bogus_item(struct btrfs_root *root,
7183                              struct btrfs_path *path,
7184                              struct extent_buffer *buf, int slot)
7185 {
7186         struct btrfs_key key;
7187         int nritems = btrfs_header_nritems(buf);
7188
7189         btrfs_item_key_to_cpu(buf, &key, slot);
7190
7191         /* These are all the keys we can deal with missing. */
7192         if (key.type != BTRFS_DIR_INDEX_KEY &&
7193             key.type != BTRFS_EXTENT_ITEM_KEY &&
7194             key.type != BTRFS_METADATA_ITEM_KEY &&
7195             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7196             key.type != BTRFS_EXTENT_DATA_REF_KEY)
7197                 return -1;
7198
7199         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
7200                (unsigned long long)key.objectid, key.type,
7201                (unsigned long long)key.offset, slot, buf->start);
7202         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
7203                               btrfs_item_nr_offset(slot + 1),
7204                               sizeof(struct btrfs_item) *
7205                               (nritems - slot - 1));
7206         btrfs_set_header_nritems(buf, nritems - 1);
7207         if (slot == 0) {
7208                 struct btrfs_disk_key disk_key;
7209
7210                 btrfs_item_key(buf, &disk_key, 0);
7211                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
7212         }
7213         btrfs_mark_buffer_dirty(buf);
7214         return 0;
7215 }
7216
7217 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
7218 {
7219         struct extent_buffer *buf;
7220         int i;
7221         int ret = 0;
7222
7223         /* We should only get this for leaves */
7224         BUG_ON(path->lowest_level);
7225         buf = path->nodes[0];
7226 again:
7227         for (i = 0; i < btrfs_header_nritems(buf); i++) {
7228                 unsigned int shift = 0, offset;
7229
7230                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
7231                     BTRFS_LEAF_DATA_SIZE(root)) {
7232                         if (btrfs_item_end_nr(buf, i) >
7233                             BTRFS_LEAF_DATA_SIZE(root)) {
7234                                 ret = delete_bogus_item(root, path, buf, i);
7235                                 if (!ret)
7236                                         goto again;
7237                                 fprintf(stderr, "item is off the end of the "
7238                                         "leaf, can't fix\n");
7239                                 ret = -EIO;
7240                                 break;
7241                         }
7242                         shift = BTRFS_LEAF_DATA_SIZE(root) -
7243                                 btrfs_item_end_nr(buf, i);
7244                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
7245                            btrfs_item_offset_nr(buf, i - 1)) {
7246                         if (btrfs_item_end_nr(buf, i) >
7247                             btrfs_item_offset_nr(buf, i - 1)) {
7248                                 ret = delete_bogus_item(root, path, buf, i);
7249                                 if (!ret)
7250                                         goto again;
7251                                 fprintf(stderr, "items overlap, can't fix\n");
7252                                 ret = -EIO;
7253                                 break;
7254                         }
7255                         shift = btrfs_item_offset_nr(buf, i - 1) -
7256                                 btrfs_item_end_nr(buf, i);
7257                 }
7258                 if (!shift)
7259                         continue;
7260
7261                 printf("Shifting item nr %d by %u bytes in block %llu\n",
7262                        i, shift, (unsigned long long)buf->start);
7263                 offset = btrfs_item_offset_nr(buf, i);
7264                 memmove_extent_buffer(buf,
7265                                       btrfs_leaf_data(buf) + offset + shift,
7266                                       btrfs_leaf_data(buf) + offset,
7267                                       btrfs_item_size_nr(buf, i));
7268                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
7269                                       offset + shift);
7270                 btrfs_mark_buffer_dirty(buf);
7271         }
7272
7273         /*
7274          * We may have moved things, in which case we want to exit so we don't
7275          * write those changes out.  Once we have proper abort functionality in
7276          * progs this can be changed to something nicer.
7277          */
7278         BUG_ON(ret);
7279         return ret;
7280 }
7281
7282 /*
7283  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
7284  * then just return -EIO.
7285  */
7286 static int try_to_fix_bad_block(struct btrfs_root *root,
7287                                 struct extent_buffer *buf,
7288                                 enum btrfs_tree_block_status status)
7289 {
7290         struct btrfs_trans_handle *trans;
7291         struct ulist *roots;
7292         struct ulist_node *node;
7293         struct btrfs_root *search_root;
7294         struct btrfs_path path;
7295         struct ulist_iterator iter;
7296         struct btrfs_key root_key, key;
7297         int ret;
7298
7299         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
7300             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7301                 return -EIO;
7302
7303         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
7304         if (ret)
7305                 return -EIO;
7306
7307         btrfs_init_path(&path);
7308         ULIST_ITER_INIT(&iter);
7309         while ((node = ulist_next(roots, &iter))) {
7310                 root_key.objectid = node->val;
7311                 root_key.type = BTRFS_ROOT_ITEM_KEY;
7312                 root_key.offset = (u64)-1;
7313
7314                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
7315                 if (IS_ERR(root)) {
7316                         ret = -EIO;
7317                         break;
7318                 }
7319
7320
7321                 trans = btrfs_start_transaction(search_root, 0);
7322                 if (IS_ERR(trans)) {
7323                         ret = PTR_ERR(trans);
7324                         break;
7325                 }
7326
7327                 path.lowest_level = btrfs_header_level(buf);
7328                 path.skip_check_block = 1;
7329                 if (path.lowest_level)
7330                         btrfs_node_key_to_cpu(buf, &key, 0);
7331                 else
7332                         btrfs_item_key_to_cpu(buf, &key, 0);
7333                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7334                 if (ret) {
7335                         ret = -EIO;
7336                         btrfs_commit_transaction(trans, search_root);
7337                         break;
7338                 }
7339                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7340                         ret = fix_key_order(search_root, &path);
7341                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7342                         ret = fix_item_offset(search_root, &path);
7343                 if (ret) {
7344                         btrfs_commit_transaction(trans, search_root);
7345                         break;
7346                 }
7347                 btrfs_release_path(&path);
7348                 btrfs_commit_transaction(trans, search_root);
7349         }
7350         ulist_free(roots);
7351         btrfs_release_path(&path);
7352         return ret;
7353 }
7354
7355 static int check_block(struct btrfs_root *root,
7356                        struct cache_tree *extent_cache,
7357                        struct extent_buffer *buf, u64 flags)
7358 {
7359         struct extent_record *rec;
7360         struct cache_extent *cache;
7361         struct btrfs_key key;
7362         enum btrfs_tree_block_status status;
7363         int ret = 0;
7364         int level;
7365
7366         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7367         if (!cache)
7368                 return 1;
7369         rec = container_of(cache, struct extent_record, cache);
7370         rec->generation = btrfs_header_generation(buf);
7371
7372         level = btrfs_header_level(buf);
7373         if (btrfs_header_nritems(buf) > 0) {
7374
7375                 if (level == 0)
7376                         btrfs_item_key_to_cpu(buf, &key, 0);
7377                 else
7378                         btrfs_node_key_to_cpu(buf, &key, 0);
7379
7380                 rec->info_objectid = key.objectid;
7381         }
7382         rec->info_level = level;
7383
7384         if (btrfs_is_leaf(buf))
7385                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7386         else
7387                 status = btrfs_check_node(root, &rec->parent_key, buf);
7388
7389         if (status != BTRFS_TREE_BLOCK_CLEAN) {
7390                 if (repair)
7391                         status = try_to_fix_bad_block(root, buf, status);
7392                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7393                         ret = -EIO;
7394                         fprintf(stderr, "bad block %llu\n",
7395                                 (unsigned long long)buf->start);
7396                 } else {
7397                         /*
7398                          * Signal to callers we need to start the scan over
7399                          * again since we'll have cowed blocks.
7400                          */
7401                         ret = -EAGAIN;
7402                 }
7403         } else {
7404                 rec->content_checked = 1;
7405                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7406                         rec->owner_ref_checked = 1;
7407                 else {
7408                         ret = check_owner_ref(root, rec, buf);
7409                         if (!ret)
7410                                 rec->owner_ref_checked = 1;
7411                 }
7412         }
7413         if (!ret)
7414                 maybe_free_extent_rec(extent_cache, rec);
7415         return ret;
7416 }
7417
7418 #if 0
7419 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7420                                                 u64 parent, u64 root)
7421 {
7422         struct list_head *cur = rec->backrefs.next;
7423         struct extent_backref *node;
7424         struct tree_backref *back;
7425
7426         while(cur != &rec->backrefs) {
7427                 node = to_extent_backref(cur);
7428                 cur = cur->next;
7429                 if (node->is_data)
7430                         continue;
7431                 back = to_tree_backref(node);
7432                 if (parent > 0) {
7433                         if (!node->full_backref)
7434                                 continue;
7435                         if (parent == back->parent)
7436                                 return back;
7437                 } else {
7438                         if (node->full_backref)
7439                                 continue;
7440                         if (back->root == root)
7441                                 return back;
7442                 }
7443         }
7444         return NULL;
7445 }
7446 #endif
7447
7448 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7449                                                 u64 parent, u64 root)
7450 {
7451         struct tree_backref *ref = malloc(sizeof(*ref));
7452
7453         if (!ref)
7454                 return NULL;
7455         memset(&ref->node, 0, sizeof(ref->node));
7456         if (parent > 0) {
7457                 ref->parent = parent;
7458                 ref->node.full_backref = 1;
7459         } else {
7460                 ref->root = root;
7461                 ref->node.full_backref = 0;
7462         }
7463
7464         return ref;
7465 }
7466
7467 #if 0
7468 static struct data_backref *find_data_backref(struct extent_record *rec,
7469                                                 u64 parent, u64 root,
7470                                                 u64 owner, u64 offset,
7471                                                 int found_ref,
7472                                                 u64 disk_bytenr, u64 bytes)
7473 {
7474         struct list_head *cur = rec->backrefs.next;
7475         struct extent_backref *node;
7476         struct data_backref *back;
7477
7478         while(cur != &rec->backrefs) {
7479                 node = to_extent_backref(cur);
7480                 cur = cur->next;
7481                 if (!node->is_data)
7482                         continue;
7483                 back = to_data_backref(node);
7484                 if (parent > 0) {
7485                         if (!node->full_backref)
7486                                 continue;
7487                         if (parent == back->parent)
7488                                 return back;
7489                 } else {
7490                         if (node->full_backref)
7491                                 continue;
7492                         if (back->root == root && back->owner == owner &&
7493                             back->offset == offset) {
7494                                 if (found_ref && node->found_ref &&
7495                                     (back->bytes != bytes ||
7496                                     back->disk_bytenr != disk_bytenr))
7497                                         continue;
7498                                 return back;
7499                         }
7500                 }
7501         }
7502         return NULL;
7503 }
7504 #endif
7505
7506 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7507                                                 u64 parent, u64 root,
7508                                                 u64 owner, u64 offset,
7509                                                 u64 max_size)
7510 {
7511         struct data_backref *ref = malloc(sizeof(*ref));
7512
7513         if (!ref)
7514                 return NULL;
7515         memset(&ref->node, 0, sizeof(ref->node));
7516         ref->node.is_data = 1;
7517
7518         if (parent > 0) {
7519                 ref->parent = parent;
7520                 ref->owner = 0;
7521                 ref->offset = 0;
7522                 ref->node.full_backref = 1;
7523         } else {
7524                 ref->root = root;
7525                 ref->owner = owner;
7526                 ref->offset = offset;
7527                 ref->node.full_backref = 0;
7528         }
7529         ref->bytes = max_size;
7530         ref->found_ref = 0;
7531         ref->num_refs = 0;
7532         if (max_size > rec->max_size)
7533                 rec->max_size = max_size;
7534         return ref;
7535 }
7536
7537 /* Check if the type of extent matches with its chunk */
7538 static void check_extent_type(struct extent_record *rec)
7539 {
7540         struct btrfs_block_group_cache *bg_cache;
7541
7542         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7543         if (!bg_cache)
7544                 return;
7545
7546         /* data extent, check chunk directly*/
7547         if (!rec->metadata) {
7548                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7549                         rec->wrong_chunk_type = 1;
7550                 return;
7551         }
7552
7553         /* metadata extent, check the obvious case first */
7554         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7555                                  BTRFS_BLOCK_GROUP_METADATA))) {
7556                 rec->wrong_chunk_type = 1;
7557                 return;
7558         }
7559
7560         /*
7561          * Check SYSTEM extent, as it's also marked as metadata, we can only
7562          * make sure it's a SYSTEM extent by its backref
7563          */
7564         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7565                 struct extent_backref *node;
7566                 struct tree_backref *tback;
7567                 u64 bg_type;
7568
7569                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7570                 if (node->is_data) {
7571                         /* tree block shouldn't have data backref */
7572                         rec->wrong_chunk_type = 1;
7573                         return;
7574                 }
7575                 tback = container_of(node, struct tree_backref, node);
7576
7577                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7578                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7579                 else
7580                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
7581                 if (!(bg_cache->flags & bg_type))
7582                         rec->wrong_chunk_type = 1;
7583         }
7584 }
7585
7586 /*
7587  * Allocate a new extent record, fill default values from @tmpl and insert int
7588  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7589  * the cache, otherwise it fails.
7590  */
7591 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7592                 struct extent_record *tmpl)
7593 {
7594         struct extent_record *rec;
7595         int ret = 0;
7596
7597         BUG_ON(tmpl->max_size == 0);
7598         rec = malloc(sizeof(*rec));
7599         if (!rec)
7600                 return -ENOMEM;
7601         rec->start = tmpl->start;
7602         rec->max_size = tmpl->max_size;
7603         rec->nr = max(tmpl->nr, tmpl->max_size);
7604         rec->found_rec = tmpl->found_rec;
7605         rec->content_checked = tmpl->content_checked;
7606         rec->owner_ref_checked = tmpl->owner_ref_checked;
7607         rec->num_duplicates = 0;
7608         rec->metadata = tmpl->metadata;
7609         rec->flag_block_full_backref = FLAG_UNSET;
7610         rec->bad_full_backref = 0;
7611         rec->crossing_stripes = 0;
7612         rec->wrong_chunk_type = 0;
7613         rec->is_root = tmpl->is_root;
7614         rec->refs = tmpl->refs;
7615         rec->extent_item_refs = tmpl->extent_item_refs;
7616         rec->parent_generation = tmpl->parent_generation;
7617         INIT_LIST_HEAD(&rec->backrefs);
7618         INIT_LIST_HEAD(&rec->dups);
7619         INIT_LIST_HEAD(&rec->list);
7620         rec->backref_tree = RB_ROOT;
7621         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7622         rec->cache.start = tmpl->start;
7623         rec->cache.size = tmpl->nr;
7624         ret = insert_cache_extent(extent_cache, &rec->cache);
7625         if (ret) {
7626                 free(rec);
7627                 return ret;
7628         }
7629         bytes_used += rec->nr;
7630
7631         if (tmpl->metadata)
7632                 rec->crossing_stripes = check_crossing_stripes(global_info,
7633                                 rec->start, global_info->nodesize);
7634         check_extent_type(rec);
7635         return ret;
7636 }
7637
7638 /*
7639  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7640  * some are hints:
7641  * - refs              - if found, increase refs
7642  * - is_root           - if found, set
7643  * - content_checked   - if found, set
7644  * - owner_ref_checked - if found, set
7645  *
7646  * If not found, create a new one, initialize and insert.
7647  */
7648 static int add_extent_rec(struct cache_tree *extent_cache,
7649                 struct extent_record *tmpl)
7650 {
7651         struct extent_record *rec;
7652         struct cache_extent *cache;
7653         int ret = 0;
7654         int dup = 0;
7655
7656         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7657         if (cache) {
7658                 rec = container_of(cache, struct extent_record, cache);
7659                 if (tmpl->refs)
7660                         rec->refs++;
7661                 if (rec->nr == 1)
7662                         rec->nr = max(tmpl->nr, tmpl->max_size);
7663
7664                 /*
7665                  * We need to make sure to reset nr to whatever the extent
7666                  * record says was the real size, this way we can compare it to
7667                  * the backrefs.
7668                  */
7669                 if (tmpl->found_rec) {
7670                         if (tmpl->start != rec->start || rec->found_rec) {
7671                                 struct extent_record *tmp;
7672
7673                                 dup = 1;
7674                                 if (list_empty(&rec->list))
7675                                         list_add_tail(&rec->list,
7676                                                       &duplicate_extents);
7677
7678                                 /*
7679                                  * We have to do this song and dance in case we
7680                                  * find an extent record that falls inside of
7681                                  * our current extent record but does not have
7682                                  * the same objectid.
7683                                  */
7684                                 tmp = malloc(sizeof(*tmp));
7685                                 if (!tmp)
7686                                         return -ENOMEM;
7687                                 tmp->start = tmpl->start;
7688                                 tmp->max_size = tmpl->max_size;
7689                                 tmp->nr = tmpl->nr;
7690                                 tmp->found_rec = 1;
7691                                 tmp->metadata = tmpl->metadata;
7692                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7693                                 INIT_LIST_HEAD(&tmp->list);
7694                                 list_add_tail(&tmp->list, &rec->dups);
7695                                 rec->num_duplicates++;
7696                         } else {
7697                                 rec->nr = tmpl->nr;
7698                                 rec->found_rec = 1;
7699                         }
7700                 }
7701
7702                 if (tmpl->extent_item_refs && !dup) {
7703                         if (rec->extent_item_refs) {
7704                                 fprintf(stderr, "block %llu rec "
7705                                         "extent_item_refs %llu, passed %llu\n",
7706                                         (unsigned long long)tmpl->start,
7707                                         (unsigned long long)
7708                                                         rec->extent_item_refs,
7709                                         (unsigned long long)tmpl->extent_item_refs);
7710                         }
7711                         rec->extent_item_refs = tmpl->extent_item_refs;
7712                 }
7713                 if (tmpl->is_root)
7714                         rec->is_root = 1;
7715                 if (tmpl->content_checked)
7716                         rec->content_checked = 1;
7717                 if (tmpl->owner_ref_checked)
7718                         rec->owner_ref_checked = 1;
7719                 memcpy(&rec->parent_key, &tmpl->parent_key,
7720                                 sizeof(tmpl->parent_key));
7721                 if (tmpl->parent_generation)
7722                         rec->parent_generation = tmpl->parent_generation;
7723                 if (rec->max_size < tmpl->max_size)
7724                         rec->max_size = tmpl->max_size;
7725
7726                 /*
7727                  * A metadata extent can't cross stripe_len boundary, otherwise
7728                  * kernel scrub won't be able to handle it.
7729                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7730                  * it.
7731                  */
7732                 if (tmpl->metadata)
7733                         rec->crossing_stripes = check_crossing_stripes(
7734                                         global_info, rec->start,
7735                                         global_info->nodesize);
7736                 check_extent_type(rec);
7737                 maybe_free_extent_rec(extent_cache, rec);
7738                 return ret;
7739         }
7740
7741         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7742
7743         return ret;
7744 }
7745
7746 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7747                             u64 parent, u64 root, int found_ref)
7748 {
7749         struct extent_record *rec;
7750         struct tree_backref *back;
7751         struct cache_extent *cache;
7752         int ret;
7753         bool insert = false;
7754
7755         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7756         if (!cache) {
7757                 struct extent_record tmpl;
7758
7759                 memset(&tmpl, 0, sizeof(tmpl));
7760                 tmpl.start = bytenr;
7761                 tmpl.nr = 1;
7762                 tmpl.metadata = 1;
7763                 tmpl.max_size = 1;
7764
7765                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7766                 if (ret)
7767                         return ret;
7768
7769                 /* really a bug in cache_extent implement now */
7770                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7771                 if (!cache)
7772                         return -ENOENT;
7773         }
7774
7775         rec = container_of(cache, struct extent_record, cache);
7776         if (rec->start != bytenr) {
7777                 /*
7778                  * Several cause, from unaligned bytenr to over lapping extents
7779                  */
7780                 return -EEXIST;
7781         }
7782
7783         back = find_tree_backref(rec, parent, root);
7784         if (!back) {
7785                 back = alloc_tree_backref(rec, parent, root);
7786                 if (!back)
7787                         return -ENOMEM;
7788                 insert = true;
7789         }
7790
7791         if (found_ref) {
7792                 if (back->node.found_ref) {
7793                         fprintf(stderr, "Extent back ref already exists "
7794                                 "for %llu parent %llu root %llu \n",
7795                                 (unsigned long long)bytenr,
7796                                 (unsigned long long)parent,
7797                                 (unsigned long long)root);
7798                 }
7799                 back->node.found_ref = 1;
7800         } else {
7801                 if (back->node.found_extent_tree) {
7802                         fprintf(stderr, "Extent back ref already exists "
7803                                 "for %llu parent %llu root %llu \n",
7804                                 (unsigned long long)bytenr,
7805                                 (unsigned long long)parent,
7806                                 (unsigned long long)root);
7807                 }
7808                 back->node.found_extent_tree = 1;
7809         }
7810         if (insert)
7811                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7812                         compare_extent_backref));
7813         check_extent_type(rec);
7814         maybe_free_extent_rec(extent_cache, rec);
7815         return 0;
7816 }
7817
7818 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7819                             u64 parent, u64 root, u64 owner, u64 offset,
7820                             u32 num_refs, int found_ref, u64 max_size)
7821 {
7822         struct extent_record *rec;
7823         struct data_backref *back;
7824         struct cache_extent *cache;
7825         int ret;
7826         bool insert = false;
7827
7828         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7829         if (!cache) {
7830                 struct extent_record tmpl;
7831
7832                 memset(&tmpl, 0, sizeof(tmpl));
7833                 tmpl.start = bytenr;
7834                 tmpl.nr = 1;
7835                 tmpl.max_size = max_size;
7836
7837                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7838                 if (ret)
7839                         return ret;
7840
7841                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7842                 if (!cache)
7843                         abort();
7844         }
7845
7846         rec = container_of(cache, struct extent_record, cache);
7847         if (rec->max_size < max_size)
7848                 rec->max_size = max_size;
7849
7850         /*
7851          * If found_ref is set then max_size is the real size and must match the
7852          * existing refs.  So if we have already found a ref then we need to
7853          * make sure that this ref matches the existing one, otherwise we need
7854          * to add a new backref so we can notice that the backrefs don't match
7855          * and we need to figure out who is telling the truth.  This is to
7856          * account for that awful fsync bug I introduced where we'd end up with
7857          * a btrfs_file_extent_item that would have its length include multiple
7858          * prealloc extents or point inside of a prealloc extent.
7859          */
7860         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7861                                  bytenr, max_size);
7862         if (!back) {
7863                 back = alloc_data_backref(rec, parent, root, owner, offset,
7864                                           max_size);
7865                 BUG_ON(!back);
7866                 insert = true;
7867         }
7868
7869         if (found_ref) {
7870                 BUG_ON(num_refs != 1);
7871                 if (back->node.found_ref)
7872                         BUG_ON(back->bytes != max_size);
7873                 back->node.found_ref = 1;
7874                 back->found_ref += 1;
7875                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7876                         back->bytes = max_size;
7877                         back->disk_bytenr = bytenr;
7878
7879                         /* Need to reinsert if not already in the tree */
7880                         if (!insert) {
7881                                 rb_erase(&back->node.node, &rec->backref_tree);
7882                                 insert = true;
7883                         }
7884                 }
7885                 rec->refs += 1;
7886                 rec->content_checked = 1;
7887                 rec->owner_ref_checked = 1;
7888         } else {
7889                 if (back->node.found_extent_tree) {
7890                         fprintf(stderr, "Extent back ref already exists "
7891                                 "for %llu parent %llu root %llu "
7892                                 "owner %llu offset %llu num_refs %lu\n",
7893                                 (unsigned long long)bytenr,
7894                                 (unsigned long long)parent,
7895                                 (unsigned long long)root,
7896                                 (unsigned long long)owner,
7897                                 (unsigned long long)offset,
7898                                 (unsigned long)num_refs);
7899                 }
7900                 back->num_refs = num_refs;
7901                 back->node.found_extent_tree = 1;
7902         }
7903         if (insert)
7904                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7905                         compare_extent_backref));
7906
7907         maybe_free_extent_rec(extent_cache, rec);
7908         return 0;
7909 }
7910
7911 static int add_pending(struct cache_tree *pending,
7912                        struct cache_tree *seen, u64 bytenr, u32 size)
7913 {
7914         int ret;
7915         ret = add_cache_extent(seen, bytenr, size);
7916         if (ret)
7917                 return ret;
7918         add_cache_extent(pending, bytenr, size);
7919         return 0;
7920 }
7921
7922 static int pick_next_pending(struct cache_tree *pending,
7923                         struct cache_tree *reada,
7924                         struct cache_tree *nodes,
7925                         u64 last, struct block_info *bits, int bits_nr,
7926                         int *reada_bits)
7927 {
7928         unsigned long node_start = last;
7929         struct cache_extent *cache;
7930         int ret;
7931
7932         cache = search_cache_extent(reada, 0);
7933         if (cache) {
7934                 bits[0].start = cache->start;
7935                 bits[0].size = cache->size;
7936                 *reada_bits = 1;
7937                 return 1;
7938         }
7939         *reada_bits = 0;
7940         if (node_start > 32768)
7941                 node_start -= 32768;
7942
7943         cache = search_cache_extent(nodes, node_start);
7944         if (!cache)
7945                 cache = search_cache_extent(nodes, 0);
7946
7947         if (!cache) {
7948                  cache = search_cache_extent(pending, 0);
7949                  if (!cache)
7950                          return 0;
7951                  ret = 0;
7952                  do {
7953                          bits[ret].start = cache->start;
7954                          bits[ret].size = cache->size;
7955                          cache = next_cache_extent(cache);
7956                          ret++;
7957                  } while (cache && ret < bits_nr);
7958                  return ret;
7959         }
7960
7961         ret = 0;
7962         do {
7963                 bits[ret].start = cache->start;
7964                 bits[ret].size = cache->size;
7965                 cache = next_cache_extent(cache);
7966                 ret++;
7967         } while (cache && ret < bits_nr);
7968
7969         if (bits_nr - ret > 8) {
7970                 u64 lookup = bits[0].start + bits[0].size;
7971                 struct cache_extent *next;
7972                 next = search_cache_extent(pending, lookup);
7973                 while(next) {
7974                         if (next->start - lookup > 32768)
7975                                 break;
7976                         bits[ret].start = next->start;
7977                         bits[ret].size = next->size;
7978                         lookup = next->start + next->size;
7979                         ret++;
7980                         if (ret == bits_nr)
7981                                 break;
7982                         next = next_cache_extent(next);
7983                         if (!next)
7984                                 break;
7985                 }
7986         }
7987         return ret;
7988 }
7989
7990 static void free_chunk_record(struct cache_extent *cache)
7991 {
7992         struct chunk_record *rec;
7993
7994         rec = container_of(cache, struct chunk_record, cache);
7995         list_del_init(&rec->list);
7996         list_del_init(&rec->dextents);
7997         free(rec);
7998 }
7999
8000 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
8001 {
8002         cache_tree_free_extents(chunk_cache, free_chunk_record);
8003 }
8004
8005 static void free_device_record(struct rb_node *node)
8006 {
8007         struct device_record *rec;
8008
8009         rec = container_of(node, struct device_record, node);
8010         free(rec);
8011 }
8012
8013 FREE_RB_BASED_TREE(device_cache, free_device_record);
8014
8015 int insert_block_group_record(struct block_group_tree *tree,
8016                               struct block_group_record *bg_rec)
8017 {
8018         int ret;
8019
8020         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
8021         if (ret)
8022                 return ret;
8023
8024         list_add_tail(&bg_rec->list, &tree->block_groups);
8025         return 0;
8026 }
8027
8028 static void free_block_group_record(struct cache_extent *cache)
8029 {
8030         struct block_group_record *rec;
8031
8032         rec = container_of(cache, struct block_group_record, cache);
8033         list_del_init(&rec->list);
8034         free(rec);
8035 }
8036
8037 void free_block_group_tree(struct block_group_tree *tree)
8038 {
8039         cache_tree_free_extents(&tree->tree, free_block_group_record);
8040 }
8041
8042 int insert_device_extent_record(struct device_extent_tree *tree,
8043                                 struct device_extent_record *de_rec)
8044 {
8045         int ret;
8046
8047         /*
8048          * Device extent is a bit different from the other extents, because
8049          * the extents which belong to the different devices may have the
8050          * same start and size, so we need use the special extent cache
8051          * search/insert functions.
8052          */
8053         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
8054         if (ret)
8055                 return ret;
8056
8057         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
8058         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
8059         return 0;
8060 }
8061
8062 static void free_device_extent_record(struct cache_extent *cache)
8063 {
8064         struct device_extent_record *rec;
8065
8066         rec = container_of(cache, struct device_extent_record, cache);
8067         if (!list_empty(&rec->chunk_list))
8068                 list_del_init(&rec->chunk_list);
8069         if (!list_empty(&rec->device_list))
8070                 list_del_init(&rec->device_list);
8071         free(rec);
8072 }
8073
8074 void free_device_extent_tree(struct device_extent_tree *tree)
8075 {
8076         cache_tree_free_extents(&tree->tree, free_device_extent_record);
8077 }
8078
8079 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8080 static int process_extent_ref_v0(struct cache_tree *extent_cache,
8081                                  struct extent_buffer *leaf, int slot)
8082 {
8083         struct btrfs_extent_ref_v0 *ref0;
8084         struct btrfs_key key;
8085         int ret;
8086
8087         btrfs_item_key_to_cpu(leaf, &key, slot);
8088         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
8089         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
8090                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
8091                                 0, 0);
8092         } else {
8093                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
8094                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
8095         }
8096         return ret;
8097 }
8098 #endif
8099
8100 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
8101                                             struct btrfs_key *key,
8102                                             int slot)
8103 {
8104         struct btrfs_chunk *ptr;
8105         struct chunk_record *rec;
8106         int num_stripes, i;
8107
8108         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
8109         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
8110
8111         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
8112         if (!rec) {
8113                 fprintf(stderr, "memory allocation failed\n");
8114                 exit(-1);
8115         }
8116
8117         INIT_LIST_HEAD(&rec->list);
8118         INIT_LIST_HEAD(&rec->dextents);
8119         rec->bg_rec = NULL;
8120
8121         rec->cache.start = key->offset;
8122         rec->cache.size = btrfs_chunk_length(leaf, ptr);
8123
8124         rec->generation = btrfs_header_generation(leaf);
8125
8126         rec->objectid = key->objectid;
8127         rec->type = key->type;
8128         rec->offset = key->offset;
8129
8130         rec->length = rec->cache.size;
8131         rec->owner = btrfs_chunk_owner(leaf, ptr);
8132         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
8133         rec->type_flags = btrfs_chunk_type(leaf, ptr);
8134         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
8135         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
8136         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
8137         rec->num_stripes = num_stripes;
8138         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
8139
8140         for (i = 0; i < rec->num_stripes; ++i) {
8141                 rec->stripes[i].devid =
8142                         btrfs_stripe_devid_nr(leaf, ptr, i);
8143                 rec->stripes[i].offset =
8144                         btrfs_stripe_offset_nr(leaf, ptr, i);
8145                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
8146                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
8147                                 BTRFS_UUID_SIZE);
8148         }
8149
8150         return rec;
8151 }
8152
8153 static int process_chunk_item(struct cache_tree *chunk_cache,
8154                               struct btrfs_key *key, struct extent_buffer *eb,
8155                               int slot)
8156 {
8157         struct chunk_record *rec;
8158         struct btrfs_chunk *chunk;
8159         int ret = 0;
8160
8161         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
8162         /*
8163          * Do extra check for this chunk item,
8164          *
8165          * It's still possible one can craft a leaf with CHUNK_ITEM, with
8166          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
8167          * and owner<->key_type check.
8168          */
8169         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
8170                                       key->offset);
8171         if (ret < 0) {
8172                 error("chunk(%llu, %llu) is not valid, ignore it",
8173                       key->offset, btrfs_chunk_length(eb, chunk));
8174                 return 0;
8175         }
8176         rec = btrfs_new_chunk_record(eb, key, slot);
8177         ret = insert_cache_extent(chunk_cache, &rec->cache);
8178         if (ret) {
8179                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
8180                         rec->offset, rec->length);
8181                 free(rec);
8182         }
8183
8184         return ret;
8185 }
8186
8187 static int process_device_item(struct rb_root *dev_cache,
8188                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
8189 {
8190         struct btrfs_dev_item *ptr;
8191         struct device_record *rec;
8192         int ret = 0;
8193
8194         ptr = btrfs_item_ptr(eb,
8195                 slot, struct btrfs_dev_item);
8196
8197         rec = malloc(sizeof(*rec));
8198         if (!rec) {
8199                 fprintf(stderr, "memory allocation failed\n");
8200                 return -ENOMEM;
8201         }
8202
8203         rec->devid = key->offset;
8204         rec->generation = btrfs_header_generation(eb);
8205
8206         rec->objectid = key->objectid;
8207         rec->type = key->type;
8208         rec->offset = key->offset;
8209
8210         rec->devid = btrfs_device_id(eb, ptr);
8211         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
8212         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
8213
8214         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
8215         if (ret) {
8216                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
8217                 free(rec);
8218         }
8219
8220         return ret;
8221 }
8222
8223 struct block_group_record *
8224 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
8225                              int slot)
8226 {
8227         struct btrfs_block_group_item *ptr;
8228         struct block_group_record *rec;
8229
8230         rec = calloc(1, sizeof(*rec));
8231         if (!rec) {
8232                 fprintf(stderr, "memory allocation failed\n");
8233                 exit(-1);
8234         }
8235
8236         rec->cache.start = key->objectid;
8237         rec->cache.size = key->offset;
8238
8239         rec->generation = btrfs_header_generation(leaf);
8240
8241         rec->objectid = key->objectid;
8242         rec->type = key->type;
8243         rec->offset = key->offset;
8244
8245         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
8246         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
8247
8248         INIT_LIST_HEAD(&rec->list);
8249
8250         return rec;
8251 }
8252
8253 static int process_block_group_item(struct block_group_tree *block_group_cache,
8254                                     struct btrfs_key *key,
8255                                     struct extent_buffer *eb, int slot)
8256 {
8257         struct block_group_record *rec;
8258         int ret = 0;
8259
8260         rec = btrfs_new_block_group_record(eb, key, slot);
8261         ret = insert_block_group_record(block_group_cache, rec);
8262         if (ret) {
8263                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
8264                         rec->objectid, rec->offset);
8265                 free(rec);
8266         }
8267
8268         return ret;
8269 }
8270
8271 struct device_extent_record *
8272 btrfs_new_device_extent_record(struct extent_buffer *leaf,
8273                                struct btrfs_key *key, int slot)
8274 {
8275         struct device_extent_record *rec;
8276         struct btrfs_dev_extent *ptr;
8277
8278         rec = calloc(1, sizeof(*rec));
8279         if (!rec) {
8280                 fprintf(stderr, "memory allocation failed\n");
8281                 exit(-1);
8282         }
8283
8284         rec->cache.objectid = key->objectid;
8285         rec->cache.start = key->offset;
8286
8287         rec->generation = btrfs_header_generation(leaf);
8288
8289         rec->objectid = key->objectid;
8290         rec->type = key->type;
8291         rec->offset = key->offset;
8292
8293         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
8294         rec->chunk_objecteid =
8295                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
8296         rec->chunk_offset =
8297                 btrfs_dev_extent_chunk_offset(leaf, ptr);
8298         rec->length = btrfs_dev_extent_length(leaf, ptr);
8299         rec->cache.size = rec->length;
8300
8301         INIT_LIST_HEAD(&rec->chunk_list);
8302         INIT_LIST_HEAD(&rec->device_list);
8303
8304         return rec;
8305 }
8306
8307 static int
8308 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
8309                            struct btrfs_key *key, struct extent_buffer *eb,
8310                            int slot)
8311 {
8312         struct device_extent_record *rec;
8313         int ret;
8314
8315         rec = btrfs_new_device_extent_record(eb, key, slot);
8316         ret = insert_device_extent_record(dev_extent_cache, rec);
8317         if (ret) {
8318                 fprintf(stderr,
8319                         "Device extent[%llu, %llu, %llu] existed.\n",
8320                         rec->objectid, rec->offset, rec->length);
8321                 free(rec);
8322         }
8323
8324         return ret;
8325 }
8326
8327 static int process_extent_item(struct btrfs_root *root,
8328                                struct cache_tree *extent_cache,
8329                                struct extent_buffer *eb, int slot)
8330 {
8331         struct btrfs_extent_item *ei;
8332         struct btrfs_extent_inline_ref *iref;
8333         struct btrfs_extent_data_ref *dref;
8334         struct btrfs_shared_data_ref *sref;
8335         struct btrfs_key key;
8336         struct extent_record tmpl;
8337         unsigned long end;
8338         unsigned long ptr;
8339         int ret;
8340         int type;
8341         u32 item_size = btrfs_item_size_nr(eb, slot);
8342         u64 refs = 0;
8343         u64 offset;
8344         u64 num_bytes;
8345         int metadata = 0;
8346
8347         btrfs_item_key_to_cpu(eb, &key, slot);
8348
8349         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8350                 metadata = 1;
8351                 num_bytes = root->fs_info->nodesize;
8352         } else {
8353                 num_bytes = key.offset;
8354         }
8355
8356         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8357                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8358                       key.objectid, root->fs_info->sectorsize);
8359                 return -EIO;
8360         }
8361         if (item_size < sizeof(*ei)) {
8362 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8363                 struct btrfs_extent_item_v0 *ei0;
8364                 BUG_ON(item_size != sizeof(*ei0));
8365                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8366                 refs = btrfs_extent_refs_v0(eb, ei0);
8367 #else
8368                 BUG();
8369 #endif
8370                 memset(&tmpl, 0, sizeof(tmpl));
8371                 tmpl.start = key.objectid;
8372                 tmpl.nr = num_bytes;
8373                 tmpl.extent_item_refs = refs;
8374                 tmpl.metadata = metadata;
8375                 tmpl.found_rec = 1;
8376                 tmpl.max_size = num_bytes;
8377
8378                 return add_extent_rec(extent_cache, &tmpl);
8379         }
8380
8381         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8382         refs = btrfs_extent_refs(eb, ei);
8383         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8384                 metadata = 1;
8385         else
8386                 metadata = 0;
8387         if (metadata && num_bytes != root->fs_info->nodesize) {
8388                 error("ignore invalid metadata extent, length %llu does not equal to %u",
8389                       num_bytes, root->fs_info->nodesize);
8390                 return -EIO;
8391         }
8392         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8393                 error("ignore invalid data extent, length %llu is not aligned to %u",
8394                       num_bytes, root->fs_info->sectorsize);
8395                 return -EIO;
8396         }
8397
8398         memset(&tmpl, 0, sizeof(tmpl));
8399         tmpl.start = key.objectid;
8400         tmpl.nr = num_bytes;
8401         tmpl.extent_item_refs = refs;
8402         tmpl.metadata = metadata;
8403         tmpl.found_rec = 1;
8404         tmpl.max_size = num_bytes;
8405         add_extent_rec(extent_cache, &tmpl);
8406
8407         ptr = (unsigned long)(ei + 1);
8408         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8409             key.type == BTRFS_EXTENT_ITEM_KEY)
8410                 ptr += sizeof(struct btrfs_tree_block_info);
8411
8412         end = (unsigned long)ei + item_size;
8413         while (ptr < end) {
8414                 iref = (struct btrfs_extent_inline_ref *)ptr;
8415                 type = btrfs_extent_inline_ref_type(eb, iref);
8416                 offset = btrfs_extent_inline_ref_offset(eb, iref);
8417                 switch (type) {
8418                 case BTRFS_TREE_BLOCK_REF_KEY:
8419                         ret = add_tree_backref(extent_cache, key.objectid,
8420                                         0, offset, 0);
8421                         if (ret < 0)
8422                                 error(
8423                         "add_tree_backref failed (extent items tree block): %s",
8424                                       strerror(-ret));
8425                         break;
8426                 case BTRFS_SHARED_BLOCK_REF_KEY:
8427                         ret = add_tree_backref(extent_cache, key.objectid,
8428                                         offset, 0, 0);
8429                         if (ret < 0)
8430                                 error(
8431                         "add_tree_backref failed (extent items shared block): %s",
8432                                       strerror(-ret));
8433                         break;
8434                 case BTRFS_EXTENT_DATA_REF_KEY:
8435                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8436                         add_data_backref(extent_cache, key.objectid, 0,
8437                                         btrfs_extent_data_ref_root(eb, dref),
8438                                         btrfs_extent_data_ref_objectid(eb,
8439                                                                        dref),
8440                                         btrfs_extent_data_ref_offset(eb, dref),
8441                                         btrfs_extent_data_ref_count(eb, dref),
8442                                         0, num_bytes);
8443                         break;
8444                 case BTRFS_SHARED_DATA_REF_KEY:
8445                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
8446                         add_data_backref(extent_cache, key.objectid, offset,
8447                                         0, 0, 0,
8448                                         btrfs_shared_data_ref_count(eb, sref),
8449                                         0, num_bytes);
8450                         break;
8451                 default:
8452                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8453                                 key.objectid, key.type, num_bytes);
8454                         goto out;
8455                 }
8456                 ptr += btrfs_extent_inline_ref_size(type);
8457         }
8458         WARN_ON(ptr > end);
8459 out:
8460         return 0;
8461 }
8462
8463 static int check_cache_range(struct btrfs_root *root,
8464                              struct btrfs_block_group_cache *cache,
8465                              u64 offset, u64 bytes)
8466 {
8467         struct btrfs_free_space *entry;
8468         u64 *logical;
8469         u64 bytenr;
8470         int stripe_len;
8471         int i, nr, ret;
8472
8473         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8474                 bytenr = btrfs_sb_offset(i);
8475                 ret = btrfs_rmap_block(root->fs_info,
8476                                        cache->key.objectid, bytenr, 0,
8477                                        &logical, &nr, &stripe_len);
8478                 if (ret)
8479                         return ret;
8480
8481                 while (nr--) {
8482                         if (logical[nr] + stripe_len <= offset)
8483                                 continue;
8484                         if (offset + bytes <= logical[nr])
8485                                 continue;
8486                         if (logical[nr] == offset) {
8487                                 if (stripe_len >= bytes) {
8488                                         free(logical);
8489                                         return 0;
8490                                 }
8491                                 bytes -= stripe_len;
8492                                 offset += stripe_len;
8493                         } else if (logical[nr] < offset) {
8494                                 if (logical[nr] + stripe_len >=
8495                                     offset + bytes) {
8496                                         free(logical);
8497                                         return 0;
8498                                 }
8499                                 bytes = (offset + bytes) -
8500                                         (logical[nr] + stripe_len);
8501                                 offset = logical[nr] + stripe_len;
8502                         } else {
8503                                 /*
8504                                  * Could be tricky, the super may land in the
8505                                  * middle of the area we're checking.  First
8506                                  * check the easiest case, it's at the end.
8507                                  */
8508                                 if (logical[nr] + stripe_len >=
8509                                     bytes + offset) {
8510                                         bytes = logical[nr] - offset;
8511                                         continue;
8512                                 }
8513
8514                                 /* Check the left side */
8515                                 ret = check_cache_range(root, cache,
8516                                                         offset,
8517                                                         logical[nr] - offset);
8518                                 if (ret) {
8519                                         free(logical);
8520                                         return ret;
8521                                 }
8522
8523                                 /* Now we continue with the right side */
8524                                 bytes = (offset + bytes) -
8525                                         (logical[nr] + stripe_len);
8526                                 offset = logical[nr] + stripe_len;
8527                         }
8528                 }
8529
8530                 free(logical);
8531         }
8532
8533         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8534         if (!entry) {
8535                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8536                         offset, offset+bytes);
8537                 return -EINVAL;
8538         }
8539
8540         if (entry->offset != offset) {
8541                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8542                         entry->offset);
8543                 return -EINVAL;
8544         }
8545
8546         if (entry->bytes != bytes) {
8547                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8548                         bytes, entry->bytes, offset);
8549                 return -EINVAL;
8550         }
8551
8552         unlink_free_space(cache->free_space_ctl, entry);
8553         free(entry);
8554         return 0;
8555 }
8556
8557 static int verify_space_cache(struct btrfs_root *root,
8558                               struct btrfs_block_group_cache *cache)
8559 {
8560         struct btrfs_path path;
8561         struct extent_buffer *leaf;
8562         struct btrfs_key key;
8563         u64 last;
8564         int ret = 0;
8565
8566         root = root->fs_info->extent_root;
8567
8568         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8569
8570         btrfs_init_path(&path);
8571         key.objectid = last;
8572         key.offset = 0;
8573         key.type = BTRFS_EXTENT_ITEM_KEY;
8574         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8575         if (ret < 0)
8576                 goto out;
8577         ret = 0;
8578         while (1) {
8579                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8580                         ret = btrfs_next_leaf(root, &path);
8581                         if (ret < 0)
8582                                 goto out;
8583                         if (ret > 0) {
8584                                 ret = 0;
8585                                 break;
8586                         }
8587                 }
8588                 leaf = path.nodes[0];
8589                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8590                 if (key.objectid >= cache->key.offset + cache->key.objectid)
8591                         break;
8592                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8593                     key.type != BTRFS_METADATA_ITEM_KEY) {
8594                         path.slots[0]++;
8595                         continue;
8596                 }
8597
8598                 if (last == key.objectid) {
8599                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
8600                                 last = key.objectid + key.offset;
8601                         else
8602                                 last = key.objectid + root->fs_info->nodesize;
8603                         path.slots[0]++;
8604                         continue;
8605                 }
8606
8607                 ret = check_cache_range(root, cache, last,
8608                                         key.objectid - last);
8609                 if (ret)
8610                         break;
8611                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8612                         last = key.objectid + key.offset;
8613                 else
8614                         last = key.objectid + root->fs_info->nodesize;
8615                 path.slots[0]++;
8616         }
8617
8618         if (last < cache->key.objectid + cache->key.offset)
8619                 ret = check_cache_range(root, cache, last,
8620                                         cache->key.objectid +
8621                                         cache->key.offset - last);
8622
8623 out:
8624         btrfs_release_path(&path);
8625
8626         if (!ret &&
8627             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8628                 fprintf(stderr, "There are still entries left in the space "
8629                         "cache\n");
8630                 ret = -EINVAL;
8631         }
8632
8633         return ret;
8634 }
8635
8636 static int check_space_cache(struct btrfs_root *root)
8637 {
8638         struct btrfs_block_group_cache *cache;
8639         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8640         int ret;
8641         int error = 0;
8642
8643         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8644             btrfs_super_generation(root->fs_info->super_copy) !=
8645             btrfs_super_cache_generation(root->fs_info->super_copy)) {
8646                 printf("cache and super generation don't match, space cache "
8647                        "will be invalidated\n");
8648                 return 0;
8649         }
8650
8651         if (ctx.progress_enabled) {
8652                 ctx.tp = TASK_FREE_SPACE;
8653                 task_start(ctx.info);
8654         }
8655
8656         while (1) {
8657                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8658                 if (!cache)
8659                         break;
8660
8661                 start = cache->key.objectid + cache->key.offset;
8662                 if (!cache->free_space_ctl) {
8663                         if (btrfs_init_free_space_ctl(cache,
8664                                                 root->fs_info->sectorsize)) {
8665                                 ret = -ENOMEM;
8666                                 break;
8667                         }
8668                 } else {
8669                         btrfs_remove_free_space_cache(cache);
8670                 }
8671
8672                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8673                         ret = exclude_super_stripes(root, cache);
8674                         if (ret) {
8675                                 fprintf(stderr, "could not exclude super stripes: %s\n",
8676                                         strerror(-ret));
8677                                 error++;
8678                                 continue;
8679                         }
8680                         ret = load_free_space_tree(root->fs_info, cache);
8681                         free_excluded_extents(root, cache);
8682                         if (ret < 0) {
8683                                 fprintf(stderr, "could not load free space tree: %s\n",
8684                                         strerror(-ret));
8685                                 error++;
8686                                 continue;
8687                         }
8688                         error += ret;
8689                 } else {
8690                         ret = load_free_space_cache(root->fs_info, cache);
8691                         if (!ret)
8692                                 continue;
8693                 }
8694
8695                 ret = verify_space_cache(root, cache);
8696                 if (ret) {
8697                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8698                                 cache->key.objectid);
8699                         error++;
8700                 }
8701         }
8702
8703         task_stop(ctx.info);
8704
8705         return error ? -EINVAL : 0;
8706 }
8707
8708 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8709                         u64 num_bytes, unsigned long leaf_offset,
8710                         struct extent_buffer *eb) {
8711
8712         struct btrfs_fs_info *fs_info = root->fs_info;
8713         u64 offset = 0;
8714         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8715         char *data;
8716         unsigned long csum_offset;
8717         u32 csum;
8718         u32 csum_expected;
8719         u64 read_len;
8720         u64 data_checked = 0;
8721         u64 tmp;
8722         int ret = 0;
8723         int mirror;
8724         int num_copies;
8725
8726         if (num_bytes % fs_info->sectorsize)
8727                 return -EINVAL;
8728
8729         data = malloc(num_bytes);
8730         if (!data)
8731                 return -ENOMEM;
8732
8733         while (offset < num_bytes) {
8734                 mirror = 0;
8735 again:
8736                 read_len = num_bytes - offset;
8737                 /* read as much space once a time */
8738                 ret = read_extent_data(fs_info, data + offset,
8739                                 bytenr + offset, &read_len, mirror);
8740                 if (ret)
8741                         goto out;
8742                 data_checked = 0;
8743                 /* verify every 4k data's checksum */
8744                 while (data_checked < read_len) {
8745                         csum = ~(u32)0;
8746                         tmp = offset + data_checked;
8747
8748                         csum = btrfs_csum_data((char *)data + tmp,
8749                                                csum, fs_info->sectorsize);
8750                         btrfs_csum_final(csum, (u8 *)&csum);
8751
8752                         csum_offset = leaf_offset +
8753                                  tmp / fs_info->sectorsize * csum_size;
8754                         read_extent_buffer(eb, (char *)&csum_expected,
8755                                            csum_offset, csum_size);
8756                         /* try another mirror */
8757                         if (csum != csum_expected) {
8758                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8759                                                 mirror, bytenr + tmp,
8760                                                 csum, csum_expected);
8761                                 num_copies = btrfs_num_copies(root->fs_info,
8762                                                 bytenr, num_bytes);
8763                                 if (mirror < num_copies - 1) {
8764                                         mirror += 1;
8765                                         goto again;
8766                                 }
8767                         }
8768                         data_checked += fs_info->sectorsize;
8769                 }
8770                 offset += read_len;
8771         }
8772 out:
8773         free(data);
8774         return ret;
8775 }
8776
8777 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8778                                u64 num_bytes)
8779 {
8780         struct btrfs_path path;
8781         struct extent_buffer *leaf;
8782         struct btrfs_key key;
8783         int ret;
8784
8785         btrfs_init_path(&path);
8786         key.objectid = bytenr;
8787         key.type = BTRFS_EXTENT_ITEM_KEY;
8788         key.offset = (u64)-1;
8789
8790 again:
8791         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8792                                 0, 0);
8793         if (ret < 0) {
8794                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8795                 btrfs_release_path(&path);
8796                 return ret;
8797         } else if (ret) {
8798                 if (path.slots[0] > 0) {
8799                         path.slots[0]--;
8800                 } else {
8801                         ret = btrfs_prev_leaf(root, &path);
8802                         if (ret < 0) {
8803                                 goto out;
8804                         } else if (ret > 0) {
8805                                 ret = 0;
8806                                 goto out;
8807                         }
8808                 }
8809         }
8810
8811         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8812
8813         /*
8814          * Block group items come before extent items if they have the same
8815          * bytenr, so walk back one more just in case.  Dear future traveller,
8816          * first congrats on mastering time travel.  Now if it's not too much
8817          * trouble could you go back to 2006 and tell Chris to make the
8818          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8819          * EXTENT_ITEM_KEY please?
8820          */
8821         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8822                 if (path.slots[0] > 0) {
8823                         path.slots[0]--;
8824                 } else {
8825                         ret = btrfs_prev_leaf(root, &path);
8826                         if (ret < 0) {
8827                                 goto out;
8828                         } else if (ret > 0) {
8829                                 ret = 0;
8830                                 goto out;
8831                         }
8832                 }
8833                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8834         }
8835
8836         while (num_bytes) {
8837                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8838                         ret = btrfs_next_leaf(root, &path);
8839                         if (ret < 0) {
8840                                 fprintf(stderr, "Error going to next leaf "
8841                                         "%d\n", ret);
8842                                 btrfs_release_path(&path);
8843                                 return ret;
8844                         } else if (ret) {
8845                                 break;
8846                         }
8847                 }
8848                 leaf = path.nodes[0];
8849                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8850                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8851                         path.slots[0]++;
8852                         continue;
8853                 }
8854                 if (key.objectid + key.offset < bytenr) {
8855                         path.slots[0]++;
8856                         continue;
8857                 }
8858                 if (key.objectid > bytenr + num_bytes)
8859                         break;
8860
8861                 if (key.objectid == bytenr) {
8862                         if (key.offset >= num_bytes) {
8863                                 num_bytes = 0;
8864                                 break;
8865                         }
8866                         num_bytes -= key.offset;
8867                         bytenr += key.offset;
8868                 } else if (key.objectid < bytenr) {
8869                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8870                                 num_bytes = 0;
8871                                 break;
8872                         }
8873                         num_bytes = (bytenr + num_bytes) -
8874                                 (key.objectid + key.offset);
8875                         bytenr = key.objectid + key.offset;
8876                 } else {
8877                         if (key.objectid + key.offset < bytenr + num_bytes) {
8878                                 u64 new_start = key.objectid + key.offset;
8879                                 u64 new_bytes = bytenr + num_bytes - new_start;
8880
8881                                 /*
8882                                  * Weird case, the extent is in the middle of
8883                                  * our range, we'll have to search one side
8884                                  * and then the other.  Not sure if this happens
8885                                  * in real life, but no harm in coding it up
8886                                  * anyway just in case.
8887                                  */
8888                                 btrfs_release_path(&path);
8889                                 ret = check_extent_exists(root, new_start,
8890                                                           new_bytes);
8891                                 if (ret) {
8892                                         fprintf(stderr, "Right section didn't "
8893                                                 "have a record\n");
8894                                         break;
8895                                 }
8896                                 num_bytes = key.objectid - bytenr;
8897                                 goto again;
8898                         }
8899                         num_bytes = key.objectid - bytenr;
8900                 }
8901                 path.slots[0]++;
8902         }
8903         ret = 0;
8904
8905 out:
8906         if (num_bytes && !ret) {
8907                 fprintf(stderr, "There are no extents for csum range "
8908                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8909                 ret = 1;
8910         }
8911
8912         btrfs_release_path(&path);
8913         return ret;
8914 }
8915
8916 static int check_csums(struct btrfs_root *root)
8917 {
8918         struct btrfs_path path;
8919         struct extent_buffer *leaf;
8920         struct btrfs_key key;
8921         u64 offset = 0, num_bytes = 0;
8922         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8923         int errors = 0;
8924         int ret;
8925         u64 data_len;
8926         unsigned long leaf_offset;
8927
8928         root = root->fs_info->csum_root;
8929         if (!extent_buffer_uptodate(root->node)) {
8930                 fprintf(stderr, "No valid csum tree found\n");
8931                 return -ENOENT;
8932         }
8933
8934         btrfs_init_path(&path);
8935         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8936         key.type = BTRFS_EXTENT_CSUM_KEY;
8937         key.offset = 0;
8938         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8939         if (ret < 0) {
8940                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8941                 btrfs_release_path(&path);
8942                 return ret;
8943         }
8944
8945         if (ret > 0 && path.slots[0])
8946                 path.slots[0]--;
8947         ret = 0;
8948
8949         while (1) {
8950                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8951                         ret = btrfs_next_leaf(root, &path);
8952                         if (ret < 0) {
8953                                 fprintf(stderr, "Error going to next leaf "
8954                                         "%d\n", ret);
8955                                 break;
8956                         }
8957                         if (ret)
8958                                 break;
8959                 }
8960                 leaf = path.nodes[0];
8961
8962                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8963                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8964                         path.slots[0]++;
8965                         continue;
8966                 }
8967
8968                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8969                               csum_size) * root->fs_info->sectorsize;
8970                 if (!check_data_csum)
8971                         goto skip_csum_check;
8972                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8973                 ret = check_extent_csums(root, key.offset, data_len,
8974                                          leaf_offset, leaf);
8975                 if (ret)
8976                         break;
8977 skip_csum_check:
8978                 if (!num_bytes) {
8979                         offset = key.offset;
8980                 } else if (key.offset != offset + num_bytes) {
8981                         ret = check_extent_exists(root, offset, num_bytes);
8982                         if (ret) {
8983                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8984                                         "there is no extent record\n",
8985                                         offset, offset+num_bytes);
8986                                 errors++;
8987                         }
8988                         offset = key.offset;
8989                         num_bytes = 0;
8990                 }
8991                 num_bytes += data_len;
8992                 path.slots[0]++;
8993         }
8994
8995         btrfs_release_path(&path);
8996         return errors;
8997 }
8998
8999 static int is_dropped_key(struct btrfs_key *key,
9000                           struct btrfs_key *drop_key) {
9001         if (key->objectid < drop_key->objectid)
9002                 return 1;
9003         else if (key->objectid == drop_key->objectid) {
9004                 if (key->type < drop_key->type)
9005                         return 1;
9006                 else if (key->type == drop_key->type) {
9007                         if (key->offset < drop_key->offset)
9008                                 return 1;
9009                 }
9010         }
9011         return 0;
9012 }
9013
9014 /*
9015  * Here are the rules for FULL_BACKREF.
9016  *
9017  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
9018  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
9019  *      FULL_BACKREF set.
9020  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
9021  *    if it happened after the relocation occurred since we'll have dropped the
9022  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
9023  *    have no real way to know for sure.
9024  *
9025  * We process the blocks one root at a time, and we start from the lowest root
9026  * objectid and go to the highest.  So we can just lookup the owner backref for
9027  * the record and if we don't find it then we know it doesn't exist and we have
9028  * a FULL BACKREF.
9029  *
9030  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
9031  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
9032  * be set or not and then we can check later once we've gathered all the refs.
9033  */
9034 static int calc_extent_flag(struct cache_tree *extent_cache,
9035                            struct extent_buffer *buf,
9036                            struct root_item_record *ri,
9037                            u64 *flags)
9038 {
9039         struct extent_record *rec;
9040         struct cache_extent *cache;
9041         struct tree_backref *tback;
9042         u64 owner = 0;
9043
9044         cache = lookup_cache_extent(extent_cache, buf->start, 1);
9045         /* we have added this extent before */
9046         if (!cache)
9047                 return -ENOENT;
9048
9049         rec = container_of(cache, struct extent_record, cache);
9050
9051         /*
9052          * Except file/reloc tree, we can not have
9053          * FULL BACKREF MODE
9054          */
9055         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
9056                 goto normal;
9057         /*
9058          * root node
9059          */
9060         if (buf->start == ri->bytenr)
9061                 goto normal;
9062
9063         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
9064                 goto full_backref;
9065
9066         owner = btrfs_header_owner(buf);
9067         if (owner == ri->objectid)
9068                 goto normal;
9069
9070         tback = find_tree_backref(rec, 0, owner);
9071         if (!tback)
9072                 goto full_backref;
9073 normal:
9074         *flags = 0;
9075         if (rec->flag_block_full_backref != FLAG_UNSET &&
9076             rec->flag_block_full_backref != 0)
9077                 rec->bad_full_backref = 1;
9078         return 0;
9079 full_backref:
9080         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9081         if (rec->flag_block_full_backref != FLAG_UNSET &&
9082             rec->flag_block_full_backref != 1)
9083                 rec->bad_full_backref = 1;
9084         return 0;
9085 }
9086
9087 static void report_mismatch_key_root(u8 key_type, u64 rootid)
9088 {
9089         fprintf(stderr, "Invalid key type(");
9090         print_key_type(stderr, 0, key_type);
9091         fprintf(stderr, ") found in root(");
9092         print_objectid(stderr, rootid, 0);
9093         fprintf(stderr, ")\n");
9094 }
9095
9096 /*
9097  * Check if the key is valid with its extent buffer.
9098  *
9099  * This is a early check in case invalid key exists in a extent buffer
9100  * This is not comprehensive yet, but should prevent wrong key/item passed
9101  * further
9102  */
9103 static int check_type_with_root(u64 rootid, u8 key_type)
9104 {
9105         switch (key_type) {
9106         /* Only valid in chunk tree */
9107         case BTRFS_DEV_ITEM_KEY:
9108         case BTRFS_CHUNK_ITEM_KEY:
9109                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
9110                         goto err;
9111                 break;
9112         /* valid in csum and log tree */
9113         case BTRFS_CSUM_TREE_OBJECTID:
9114                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
9115                       is_fstree(rootid)))
9116                         goto err;
9117                 break;
9118         case BTRFS_EXTENT_ITEM_KEY:
9119         case BTRFS_METADATA_ITEM_KEY:
9120         case BTRFS_BLOCK_GROUP_ITEM_KEY:
9121                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
9122                         goto err;
9123                 break;
9124         case BTRFS_ROOT_ITEM_KEY:
9125                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
9126                         goto err;
9127                 break;
9128         case BTRFS_DEV_EXTENT_KEY:
9129                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
9130                         goto err;
9131                 break;
9132         }
9133         return 0;
9134 err:
9135         report_mismatch_key_root(key_type, rootid);
9136         return -EINVAL;
9137 }
9138
9139 static int run_next_block(struct btrfs_root *root,
9140                           struct block_info *bits,
9141                           int bits_nr,
9142                           u64 *last,
9143                           struct cache_tree *pending,
9144                           struct cache_tree *seen,
9145                           struct cache_tree *reada,
9146                           struct cache_tree *nodes,
9147                           struct cache_tree *extent_cache,
9148                           struct cache_tree *chunk_cache,
9149                           struct rb_root *dev_cache,
9150                           struct block_group_tree *block_group_cache,
9151                           struct device_extent_tree *dev_extent_cache,
9152                           struct root_item_record *ri)
9153 {
9154         struct btrfs_fs_info *fs_info = root->fs_info;
9155         struct extent_buffer *buf;
9156         struct extent_record *rec = NULL;
9157         u64 bytenr;
9158         u32 size;
9159         u64 parent;
9160         u64 owner;
9161         u64 flags;
9162         u64 ptr;
9163         u64 gen = 0;
9164         int ret = 0;
9165         int i;
9166         int nritems;
9167         struct btrfs_key key;
9168         struct cache_extent *cache;
9169         int reada_bits;
9170
9171         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
9172                                     bits_nr, &reada_bits);
9173         if (nritems == 0)
9174                 return 1;
9175
9176         if (!reada_bits) {
9177                 for(i = 0; i < nritems; i++) {
9178                         ret = add_cache_extent(reada, bits[i].start,
9179                                                bits[i].size);
9180                         if (ret == -EEXIST)
9181                                 continue;
9182
9183                         /* fixme, get the parent transid */
9184                         readahead_tree_block(fs_info, bits[i].start, 0);
9185                 }
9186         }
9187         *last = bits[0].start;
9188         bytenr = bits[0].start;
9189         size = bits[0].size;
9190
9191         cache = lookup_cache_extent(pending, bytenr, size);
9192         if (cache) {
9193                 remove_cache_extent(pending, cache);
9194                 free(cache);
9195         }
9196         cache = lookup_cache_extent(reada, bytenr, size);
9197         if (cache) {
9198                 remove_cache_extent(reada, cache);
9199                 free(cache);
9200         }
9201         cache = lookup_cache_extent(nodes, bytenr, size);
9202         if (cache) {
9203                 remove_cache_extent(nodes, cache);
9204                 free(cache);
9205         }
9206         cache = lookup_cache_extent(extent_cache, bytenr, size);
9207         if (cache) {
9208                 rec = container_of(cache, struct extent_record, cache);
9209                 gen = rec->parent_generation;
9210         }
9211
9212         /* fixme, get the real parent transid */
9213         buf = read_tree_block(root->fs_info, bytenr, gen);
9214         if (!extent_buffer_uptodate(buf)) {
9215                 record_bad_block_io(root->fs_info,
9216                                     extent_cache, bytenr, size);
9217                 goto out;
9218         }
9219
9220         nritems = btrfs_header_nritems(buf);
9221
9222         flags = 0;
9223         if (!init_extent_tree) {
9224                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
9225                                        btrfs_header_level(buf), 1, NULL,
9226                                        &flags);
9227                 if (ret < 0) {
9228                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9229                         if (ret < 0) {
9230                                 fprintf(stderr, "Couldn't calc extent flags\n");
9231                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9232                         }
9233                 }
9234         } else {
9235                 flags = 0;
9236                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
9237                 if (ret < 0) {
9238                         fprintf(stderr, "Couldn't calc extent flags\n");
9239                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9240                 }
9241         }
9242
9243         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9244                 if (ri != NULL &&
9245                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
9246                     ri->objectid == btrfs_header_owner(buf)) {
9247                         /*
9248                          * Ok we got to this block from it's original owner and
9249                          * we have FULL_BACKREF set.  Relocation can leave
9250                          * converted blocks over so this is altogether possible,
9251                          * however it's not possible if the generation > the
9252                          * last snapshot, so check for this case.
9253                          */
9254                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
9255                             btrfs_header_generation(buf) > ri->last_snapshot) {
9256                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9257                                 rec->bad_full_backref = 1;
9258                         }
9259                 }
9260         } else {
9261                 if (ri != NULL &&
9262                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
9263                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
9264                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9265                         rec->bad_full_backref = 1;
9266                 }
9267         }
9268
9269         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9270                 rec->flag_block_full_backref = 1;
9271                 parent = bytenr;
9272                 owner = 0;
9273         } else {
9274                 rec->flag_block_full_backref = 0;
9275                 parent = 0;
9276                 owner = btrfs_header_owner(buf);
9277         }
9278
9279         ret = check_block(root, extent_cache, buf, flags);
9280         if (ret)
9281                 goto out;
9282
9283         if (btrfs_is_leaf(buf)) {
9284                 btree_space_waste += btrfs_leaf_free_space(root, buf);
9285                 for (i = 0; i < nritems; i++) {
9286                         struct btrfs_file_extent_item *fi;
9287                         btrfs_item_key_to_cpu(buf, &key, i);
9288                         /*
9289                          * Check key type against the leaf owner.
9290                          * Could filter quite a lot of early error if
9291                          * owner is correct
9292                          */
9293                         if (check_type_with_root(btrfs_header_owner(buf),
9294                                                  key.type)) {
9295                                 fprintf(stderr, "ignoring invalid key\n");
9296                                 continue;
9297                         }
9298                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
9299                                 process_extent_item(root, extent_cache, buf,
9300                                                     i);
9301                                 continue;
9302                         }
9303                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9304                                 process_extent_item(root, extent_cache, buf,
9305                                                     i);
9306                                 continue;
9307                         }
9308                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
9309                                 total_csum_bytes +=
9310                                         btrfs_item_size_nr(buf, i);
9311                                 continue;
9312                         }
9313                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
9314                                 process_chunk_item(chunk_cache, &key, buf, i);
9315                                 continue;
9316                         }
9317                         if (key.type == BTRFS_DEV_ITEM_KEY) {
9318                                 process_device_item(dev_cache, &key, buf, i);
9319                                 continue;
9320                         }
9321                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9322                                 process_block_group_item(block_group_cache,
9323                                         &key, buf, i);
9324                                 continue;
9325                         }
9326                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
9327                                 process_device_extent_item(dev_extent_cache,
9328                                         &key, buf, i);
9329                                 continue;
9330
9331                         }
9332                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9333 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9334                                 process_extent_ref_v0(extent_cache, buf, i);
9335 #else
9336                                 BUG();
9337 #endif
9338                                 continue;
9339                         }
9340
9341                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9342                                 ret = add_tree_backref(extent_cache,
9343                                                 key.objectid, 0, key.offset, 0);
9344                                 if (ret < 0)
9345                                         error(
9346                                 "add_tree_backref failed (leaf tree block): %s",
9347                                               strerror(-ret));
9348                                 continue;
9349                         }
9350                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9351                                 ret = add_tree_backref(extent_cache,
9352                                                 key.objectid, key.offset, 0, 0);
9353                                 if (ret < 0)
9354                                         error(
9355                                 "add_tree_backref failed (leaf shared block): %s",
9356                                               strerror(-ret));
9357                                 continue;
9358                         }
9359                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9360                                 struct btrfs_extent_data_ref *ref;
9361                                 ref = btrfs_item_ptr(buf, i,
9362                                                 struct btrfs_extent_data_ref);
9363                                 add_data_backref(extent_cache,
9364                                         key.objectid, 0,
9365                                         btrfs_extent_data_ref_root(buf, ref),
9366                                         btrfs_extent_data_ref_objectid(buf,
9367                                                                        ref),
9368                                         btrfs_extent_data_ref_offset(buf, ref),
9369                                         btrfs_extent_data_ref_count(buf, ref),
9370                                         0, root->fs_info->sectorsize);
9371                                 continue;
9372                         }
9373                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9374                                 struct btrfs_shared_data_ref *ref;
9375                                 ref = btrfs_item_ptr(buf, i,
9376                                                 struct btrfs_shared_data_ref);
9377                                 add_data_backref(extent_cache,
9378                                         key.objectid, key.offset, 0, 0, 0,
9379                                         btrfs_shared_data_ref_count(buf, ref),
9380                                         0, root->fs_info->sectorsize);
9381                                 continue;
9382                         }
9383                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9384                                 struct bad_item *bad;
9385
9386                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9387                                         continue;
9388                                 if (!owner)
9389                                         continue;
9390                                 bad = malloc(sizeof(struct bad_item));
9391                                 if (!bad)
9392                                         continue;
9393                                 INIT_LIST_HEAD(&bad->list);
9394                                 memcpy(&bad->key, &key,
9395                                        sizeof(struct btrfs_key));
9396                                 bad->root_id = owner;
9397                                 list_add_tail(&bad->list, &delete_items);
9398                                 continue;
9399                         }
9400                         if (key.type != BTRFS_EXTENT_DATA_KEY)
9401                                 continue;
9402                         fi = btrfs_item_ptr(buf, i,
9403                                             struct btrfs_file_extent_item);
9404                         if (btrfs_file_extent_type(buf, fi) ==
9405                             BTRFS_FILE_EXTENT_INLINE)
9406                                 continue;
9407                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9408                                 continue;
9409
9410                         data_bytes_allocated +=
9411                                 btrfs_file_extent_disk_num_bytes(buf, fi);
9412                         if (data_bytes_allocated < root->fs_info->sectorsize) {
9413                                 abort();
9414                         }
9415                         data_bytes_referenced +=
9416                                 btrfs_file_extent_num_bytes(buf, fi);
9417                         add_data_backref(extent_cache,
9418                                 btrfs_file_extent_disk_bytenr(buf, fi),
9419                                 parent, owner, key.objectid, key.offset -
9420                                 btrfs_file_extent_offset(buf, fi), 1, 1,
9421                                 btrfs_file_extent_disk_num_bytes(buf, fi));
9422                 }
9423         } else {
9424                 int level;
9425                 struct btrfs_key first_key;
9426
9427                 first_key.objectid = 0;
9428
9429                 if (nritems > 0)
9430                         btrfs_item_key_to_cpu(buf, &first_key, 0);
9431                 level = btrfs_header_level(buf);
9432                 for (i = 0; i < nritems; i++) {
9433                         struct extent_record tmpl;
9434
9435                         ptr = btrfs_node_blockptr(buf, i);
9436                         size = root->fs_info->nodesize;
9437                         btrfs_node_key_to_cpu(buf, &key, i);
9438                         if (ri != NULL) {
9439                                 if ((level == ri->drop_level)
9440                                     && is_dropped_key(&key, &ri->drop_key)) {
9441                                         continue;
9442                                 }
9443                         }
9444
9445                         memset(&tmpl, 0, sizeof(tmpl));
9446                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9447                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9448                         tmpl.start = ptr;
9449                         tmpl.nr = size;
9450                         tmpl.refs = 1;
9451                         tmpl.metadata = 1;
9452                         tmpl.max_size = size;
9453                         ret = add_extent_rec(extent_cache, &tmpl);
9454                         if (ret < 0)
9455                                 goto out;
9456
9457                         ret = add_tree_backref(extent_cache, ptr, parent,
9458                                         owner, 1);
9459                         if (ret < 0) {
9460                                 error(
9461                                 "add_tree_backref failed (non-leaf block): %s",
9462                                       strerror(-ret));
9463                                 continue;
9464                         }
9465
9466                         if (level > 1) {
9467                                 add_pending(nodes, seen, ptr, size);
9468                         } else {
9469                                 add_pending(pending, seen, ptr, size);
9470                         }
9471                 }
9472                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
9473                                       nritems) * sizeof(struct btrfs_key_ptr);
9474         }
9475         total_btree_bytes += buf->len;
9476         if (fs_root_objectid(btrfs_header_owner(buf)))
9477                 total_fs_tree_bytes += buf->len;
9478         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9479                 total_extent_tree_bytes += buf->len;
9480 out:
9481         free_extent_buffer(buf);
9482         return ret;
9483 }
9484
9485 static int add_root_to_pending(struct extent_buffer *buf,
9486                                struct cache_tree *extent_cache,
9487                                struct cache_tree *pending,
9488                                struct cache_tree *seen,
9489                                struct cache_tree *nodes,
9490                                u64 objectid)
9491 {
9492         struct extent_record tmpl;
9493         int ret;
9494
9495         if (btrfs_header_level(buf) > 0)
9496                 add_pending(nodes, seen, buf->start, buf->len);
9497         else
9498                 add_pending(pending, seen, buf->start, buf->len);
9499
9500         memset(&tmpl, 0, sizeof(tmpl));
9501         tmpl.start = buf->start;
9502         tmpl.nr = buf->len;
9503         tmpl.is_root = 1;
9504         tmpl.refs = 1;
9505         tmpl.metadata = 1;
9506         tmpl.max_size = buf->len;
9507         add_extent_rec(extent_cache, &tmpl);
9508
9509         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9510             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9511                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9512                                 0, 1);
9513         else
9514                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9515                                 1);
9516         return ret;
9517 }
9518
9519 /* as we fix the tree, we might be deleting blocks that
9520  * we're tracking for repair.  This hook makes sure we
9521  * remove any backrefs for blocks as we are fixing them.
9522  */
9523 static int free_extent_hook(struct btrfs_trans_handle *trans,
9524                             struct btrfs_root *root,
9525                             u64 bytenr, u64 num_bytes, u64 parent,
9526                             u64 root_objectid, u64 owner, u64 offset,
9527                             int refs_to_drop)
9528 {
9529         struct extent_record *rec;
9530         struct cache_extent *cache;
9531         int is_data;
9532         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9533
9534         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9535         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9536         if (!cache)
9537                 return 0;
9538
9539         rec = container_of(cache, struct extent_record, cache);
9540         if (is_data) {
9541                 struct data_backref *back;
9542                 back = find_data_backref(rec, parent, root_objectid, owner,
9543                                          offset, 1, bytenr, num_bytes);
9544                 if (!back)
9545                         goto out;
9546                 if (back->node.found_ref) {
9547                         back->found_ref -= refs_to_drop;
9548                         if (rec->refs)
9549                                 rec->refs -= refs_to_drop;
9550                 }
9551                 if (back->node.found_extent_tree) {
9552                         back->num_refs -= refs_to_drop;
9553                         if (rec->extent_item_refs)
9554                                 rec->extent_item_refs -= refs_to_drop;
9555                 }
9556                 if (back->found_ref == 0)
9557                         back->node.found_ref = 0;
9558                 if (back->num_refs == 0)
9559                         back->node.found_extent_tree = 0;
9560
9561                 if (!back->node.found_extent_tree && back->node.found_ref) {
9562                         rb_erase(&back->node.node, &rec->backref_tree);
9563                         free(back);
9564                 }
9565         } else {
9566                 struct tree_backref *back;
9567                 back = find_tree_backref(rec, parent, root_objectid);
9568                 if (!back)
9569                         goto out;
9570                 if (back->node.found_ref) {
9571                         if (rec->refs)
9572                                 rec->refs--;
9573                         back->node.found_ref = 0;
9574                 }
9575                 if (back->node.found_extent_tree) {
9576                         if (rec->extent_item_refs)
9577                                 rec->extent_item_refs--;
9578                         back->node.found_extent_tree = 0;
9579                 }
9580                 if (!back->node.found_extent_tree && back->node.found_ref) {
9581                         rb_erase(&back->node.node, &rec->backref_tree);
9582                         free(back);
9583                 }
9584         }
9585         maybe_free_extent_rec(extent_cache, rec);
9586 out:
9587         return 0;
9588 }
9589
9590 static int delete_extent_records(struct btrfs_trans_handle *trans,
9591                                  struct btrfs_root *root,
9592                                  struct btrfs_path *path,
9593                                  u64 bytenr)
9594 {
9595         struct btrfs_key key;
9596         struct btrfs_key found_key;
9597         struct extent_buffer *leaf;
9598         int ret;
9599         int slot;
9600
9601
9602         key.objectid = bytenr;
9603         key.type = (u8)-1;
9604         key.offset = (u64)-1;
9605
9606         while(1) {
9607                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9608                                         &key, path, 0, 1);
9609                 if (ret < 0)
9610                         break;
9611
9612                 if (ret > 0) {
9613                         ret = 0;
9614                         if (path->slots[0] == 0)
9615                                 break;
9616                         path->slots[0]--;
9617                 }
9618                 ret = 0;
9619
9620                 leaf = path->nodes[0];
9621                 slot = path->slots[0];
9622
9623                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9624                 if (found_key.objectid != bytenr)
9625                         break;
9626
9627                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9628                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
9629                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9630                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9631                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9632                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9633                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9634                         btrfs_release_path(path);
9635                         if (found_key.type == 0) {
9636                                 if (found_key.offset == 0)
9637                                         break;
9638                                 key.offset = found_key.offset - 1;
9639                                 key.type = found_key.type;
9640                         }
9641                         key.type = found_key.type - 1;
9642                         key.offset = (u64)-1;
9643                         continue;
9644                 }
9645
9646                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9647                         found_key.objectid, found_key.type, found_key.offset);
9648
9649                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9650                 if (ret)
9651                         break;
9652                 btrfs_release_path(path);
9653
9654                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9655                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
9656                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9657                                 found_key.offset : root->fs_info->nodesize;
9658
9659                         ret = btrfs_update_block_group(trans, root, bytenr,
9660                                                        bytes, 0, 0);
9661                         if (ret)
9662                                 break;
9663                 }
9664         }
9665
9666         btrfs_release_path(path);
9667         return ret;
9668 }
9669
9670 /*
9671  * for a single backref, this will allocate a new extent
9672  * and add the backref to it.
9673  */
9674 static int record_extent(struct btrfs_trans_handle *trans,
9675                          struct btrfs_fs_info *info,
9676                          struct btrfs_path *path,
9677                          struct extent_record *rec,
9678                          struct extent_backref *back,
9679                          int allocated, u64 flags)
9680 {
9681         int ret = 0;
9682         struct btrfs_root *extent_root = info->extent_root;
9683         struct extent_buffer *leaf;
9684         struct btrfs_key ins_key;
9685         struct btrfs_extent_item *ei;
9686         struct data_backref *dback;
9687         struct btrfs_tree_block_info *bi;
9688
9689         if (!back->is_data)
9690                 rec->max_size = max_t(u64, rec->max_size,
9691                                     info->nodesize);
9692
9693         if (!allocated) {
9694                 u32 item_size = sizeof(*ei);
9695
9696                 if (!back->is_data)
9697                         item_size += sizeof(*bi);
9698
9699                 ins_key.objectid = rec->start;
9700                 ins_key.offset = rec->max_size;
9701                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9702
9703                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9704                                         &ins_key, item_size);
9705                 if (ret)
9706                         goto fail;
9707
9708                 leaf = path->nodes[0];
9709                 ei = btrfs_item_ptr(leaf, path->slots[0],
9710                                     struct btrfs_extent_item);
9711
9712                 btrfs_set_extent_refs(leaf, ei, 0);
9713                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9714
9715                 if (back->is_data) {
9716                         btrfs_set_extent_flags(leaf, ei,
9717                                                BTRFS_EXTENT_FLAG_DATA);
9718                 } else {
9719                         struct btrfs_disk_key copy_key;;
9720
9721                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9722                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9723                                              sizeof(*bi));
9724
9725                         btrfs_set_disk_key_objectid(&copy_key,
9726                                                     rec->info_objectid);
9727                         btrfs_set_disk_key_type(&copy_key, 0);
9728                         btrfs_set_disk_key_offset(&copy_key, 0);
9729
9730                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9731                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9732
9733                         btrfs_set_extent_flags(leaf, ei,
9734                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9735                 }
9736
9737                 btrfs_mark_buffer_dirty(leaf);
9738                 ret = btrfs_update_block_group(trans, extent_root, rec->start,
9739                                                rec->max_size, 1, 0);
9740                 if (ret)
9741                         goto fail;
9742                 btrfs_release_path(path);
9743         }
9744
9745         if (back->is_data) {
9746                 u64 parent;
9747                 int i;
9748
9749                 dback = to_data_backref(back);
9750                 if (back->full_backref)
9751                         parent = dback->parent;
9752                 else
9753                         parent = 0;
9754
9755                 for (i = 0; i < dback->found_ref; i++) {
9756                         /* if parent != 0, we're doing a full backref
9757                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9758                          * just makes the backref allocator create a data
9759                          * backref
9760                          */
9761                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9762                                                    rec->start, rec->max_size,
9763                                                    parent,
9764                                                    dback->root,
9765                                                    parent ?
9766                                                    BTRFS_FIRST_FREE_OBJECTID :
9767                                                    dback->owner,
9768                                                    dback->offset);
9769                         if (ret)
9770                                 break;
9771                 }
9772                 fprintf(stderr, "adding new data backref"
9773                                 " on %llu %s %llu owner %llu"
9774                                 " offset %llu found %d\n",
9775                                 (unsigned long long)rec->start,
9776                                 back->full_backref ?
9777                                 "parent" : "root",
9778                                 back->full_backref ?
9779                                 (unsigned long long)parent :
9780                                 (unsigned long long)dback->root,
9781                                 (unsigned long long)dback->owner,
9782                                 (unsigned long long)dback->offset,
9783                                 dback->found_ref);
9784         } else {
9785                 u64 parent;
9786                 struct tree_backref *tback;
9787
9788                 tback = to_tree_backref(back);
9789                 if (back->full_backref)
9790                         parent = tback->parent;
9791                 else
9792                         parent = 0;
9793
9794                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9795                                            rec->start, rec->max_size,
9796                                            parent, tback->root, 0, 0);
9797                 fprintf(stderr, "adding new tree backref on "
9798                         "start %llu len %llu parent %llu root %llu\n",
9799                         rec->start, rec->max_size, parent, tback->root);
9800         }
9801 fail:
9802         btrfs_release_path(path);
9803         return ret;
9804 }
9805
9806 static struct extent_entry *find_entry(struct list_head *entries,
9807                                        u64 bytenr, u64 bytes)
9808 {
9809         struct extent_entry *entry = NULL;
9810
9811         list_for_each_entry(entry, entries, list) {
9812                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9813                         return entry;
9814         }
9815
9816         return NULL;
9817 }
9818
9819 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9820 {
9821         struct extent_entry *entry, *best = NULL, *prev = NULL;
9822
9823         list_for_each_entry(entry, entries, list) {
9824                 /*
9825                  * If there are as many broken entries as entries then we know
9826                  * not to trust this particular entry.
9827                  */
9828                 if (entry->broken == entry->count)
9829                         continue;
9830
9831                 /*
9832                  * Special case, when there are only two entries and 'best' is
9833                  * the first one
9834                  */
9835                 if (!prev) {
9836                         best = entry;
9837                         prev = entry;
9838                         continue;
9839                 }
9840
9841                 /*
9842                  * If our current entry == best then we can't be sure our best
9843                  * is really the best, so we need to keep searching.
9844                  */
9845                 if (best && best->count == entry->count) {
9846                         prev = entry;
9847                         best = NULL;
9848                         continue;
9849                 }
9850
9851                 /* Prev == entry, not good enough, have to keep searching */
9852                 if (!prev->broken && prev->count == entry->count)
9853                         continue;
9854
9855                 if (!best)
9856                         best = (prev->count > entry->count) ? prev : entry;
9857                 else if (best->count < entry->count)
9858                         best = entry;
9859                 prev = entry;
9860         }
9861
9862         return best;
9863 }
9864
9865 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9866                       struct data_backref *dback, struct extent_entry *entry)
9867 {
9868         struct btrfs_trans_handle *trans;
9869         struct btrfs_root *root;
9870         struct btrfs_file_extent_item *fi;
9871         struct extent_buffer *leaf;
9872         struct btrfs_key key;
9873         u64 bytenr, bytes;
9874         int ret, err;
9875
9876         key.objectid = dback->root;
9877         key.type = BTRFS_ROOT_ITEM_KEY;
9878         key.offset = (u64)-1;
9879         root = btrfs_read_fs_root(info, &key);
9880         if (IS_ERR(root)) {
9881                 fprintf(stderr, "Couldn't find root for our ref\n");
9882                 return -EINVAL;
9883         }
9884
9885         /*
9886          * The backref points to the original offset of the extent if it was
9887          * split, so we need to search down to the offset we have and then walk
9888          * forward until we find the backref we're looking for.
9889          */
9890         key.objectid = dback->owner;
9891         key.type = BTRFS_EXTENT_DATA_KEY;
9892         key.offset = dback->offset;
9893         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9894         if (ret < 0) {
9895                 fprintf(stderr, "Error looking up ref %d\n", ret);
9896                 return ret;
9897         }
9898
9899         while (1) {
9900                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9901                         ret = btrfs_next_leaf(root, path);
9902                         if (ret) {
9903                                 fprintf(stderr, "Couldn't find our ref, next\n");
9904                                 return -EINVAL;
9905                         }
9906                 }
9907                 leaf = path->nodes[0];
9908                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9909                 if (key.objectid != dback->owner ||
9910                     key.type != BTRFS_EXTENT_DATA_KEY) {
9911                         fprintf(stderr, "Couldn't find our ref, search\n");
9912                         return -EINVAL;
9913                 }
9914                 fi = btrfs_item_ptr(leaf, path->slots[0],
9915                                     struct btrfs_file_extent_item);
9916                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9917                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9918
9919                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9920                         break;
9921                 path->slots[0]++;
9922         }
9923
9924         btrfs_release_path(path);
9925
9926         trans = btrfs_start_transaction(root, 1);
9927         if (IS_ERR(trans))
9928                 return PTR_ERR(trans);
9929
9930         /*
9931          * Ok we have the key of the file extent we want to fix, now we can cow
9932          * down to the thing and fix it.
9933          */
9934         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9935         if (ret < 0) {
9936                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9937                         key.objectid, key.type, key.offset, ret);
9938                 goto out;
9939         }
9940         if (ret > 0) {
9941                 fprintf(stderr, "Well that's odd, we just found this key "
9942                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9943                         key.offset);
9944                 ret = -EINVAL;
9945                 goto out;
9946         }
9947         leaf = path->nodes[0];
9948         fi = btrfs_item_ptr(leaf, path->slots[0],
9949                             struct btrfs_file_extent_item);
9950
9951         if (btrfs_file_extent_compression(leaf, fi) &&
9952             dback->disk_bytenr != entry->bytenr) {
9953                 fprintf(stderr, "Ref doesn't match the record start and is "
9954                         "compressed, please take a btrfs-image of this file "
9955                         "system and send it to a btrfs developer so they can "
9956                         "complete this functionality for bytenr %Lu\n",
9957                         dback->disk_bytenr);
9958                 ret = -EINVAL;
9959                 goto out;
9960         }
9961
9962         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9963                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9964         } else if (dback->disk_bytenr > entry->bytenr) {
9965                 u64 off_diff, offset;
9966
9967                 off_diff = dback->disk_bytenr - entry->bytenr;
9968                 offset = btrfs_file_extent_offset(leaf, fi);
9969                 if (dback->disk_bytenr + offset +
9970                     btrfs_file_extent_num_bytes(leaf, fi) >
9971                     entry->bytenr + entry->bytes) {
9972                         fprintf(stderr, "Ref is past the entry end, please "
9973                                 "take a btrfs-image of this file system and "
9974                                 "send it to a btrfs developer, ref %Lu\n",
9975                                 dback->disk_bytenr);
9976                         ret = -EINVAL;
9977                         goto out;
9978                 }
9979                 offset += off_diff;
9980                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9981                 btrfs_set_file_extent_offset(leaf, fi, offset);
9982         } else if (dback->disk_bytenr < entry->bytenr) {
9983                 u64 offset;
9984
9985                 offset = btrfs_file_extent_offset(leaf, fi);
9986                 if (dback->disk_bytenr + offset < entry->bytenr) {
9987                         fprintf(stderr, "Ref is before the entry start, please"
9988                                 " take a btrfs-image of this file system and "
9989                                 "send it to a btrfs developer, ref %Lu\n",
9990                                 dback->disk_bytenr);
9991                         ret = -EINVAL;
9992                         goto out;
9993                 }
9994
9995                 offset += dback->disk_bytenr;
9996                 offset -= entry->bytenr;
9997                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9998                 btrfs_set_file_extent_offset(leaf, fi, offset);
9999         }
10000
10001         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
10002
10003         /*
10004          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
10005          * only do this if we aren't using compression, otherwise it's a
10006          * trickier case.
10007          */
10008         if (!btrfs_file_extent_compression(leaf, fi))
10009                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
10010         else
10011                 printf("ram bytes may be wrong?\n");
10012         btrfs_mark_buffer_dirty(leaf);
10013 out:
10014         err = btrfs_commit_transaction(trans, root);
10015         btrfs_release_path(path);
10016         return ret ? ret : err;
10017 }
10018
10019 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
10020                            struct extent_record *rec)
10021 {
10022         struct extent_backref *back, *tmp;
10023         struct data_backref *dback;
10024         struct extent_entry *entry, *best = NULL;
10025         LIST_HEAD(entries);
10026         int nr_entries = 0;
10027         int broken_entries = 0;
10028         int ret = 0;
10029         short mismatch = 0;
10030
10031         /*
10032          * Metadata is easy and the backrefs should always agree on bytenr and
10033          * size, if not we've got bigger issues.
10034          */
10035         if (rec->metadata)
10036                 return 0;
10037
10038         rbtree_postorder_for_each_entry_safe(back, tmp,
10039                                              &rec->backref_tree, node) {
10040                 if (back->full_backref || !back->is_data)
10041                         continue;
10042
10043                 dback = to_data_backref(back);
10044
10045                 /*
10046                  * We only pay attention to backrefs that we found a real
10047                  * backref for.
10048                  */
10049                 if (dback->found_ref == 0)
10050                         continue;
10051
10052                 /*
10053                  * For now we only catch when the bytes don't match, not the
10054                  * bytenr.  We can easily do this at the same time, but I want
10055                  * to have a fs image to test on before we just add repair
10056                  * functionality willy-nilly so we know we won't screw up the
10057                  * repair.
10058                  */
10059
10060                 entry = find_entry(&entries, dback->disk_bytenr,
10061                                    dback->bytes);
10062                 if (!entry) {
10063                         entry = malloc(sizeof(struct extent_entry));
10064                         if (!entry) {
10065                                 ret = -ENOMEM;
10066                                 goto out;
10067                         }
10068                         memset(entry, 0, sizeof(*entry));
10069                         entry->bytenr = dback->disk_bytenr;
10070                         entry->bytes = dback->bytes;
10071                         list_add_tail(&entry->list, &entries);
10072                         nr_entries++;
10073                 }
10074
10075                 /*
10076                  * If we only have on entry we may think the entries agree when
10077                  * in reality they don't so we have to do some extra checking.
10078                  */
10079                 if (dback->disk_bytenr != rec->start ||
10080                     dback->bytes != rec->nr || back->broken)
10081                         mismatch = 1;
10082
10083                 if (back->broken) {
10084                         entry->broken++;
10085                         broken_entries++;
10086                 }
10087
10088                 entry->count++;
10089         }
10090
10091         /* Yay all the backrefs agree, carry on good sir */
10092         if (nr_entries <= 1 && !mismatch)
10093                 goto out;
10094
10095         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
10096                 "%Lu\n", rec->start);
10097
10098         /*
10099          * First we want to see if the backrefs can agree amongst themselves who
10100          * is right, so figure out which one of the entries has the highest
10101          * count.
10102          */
10103         best = find_most_right_entry(&entries);
10104
10105         /*
10106          * Ok so we may have an even split between what the backrefs think, so
10107          * this is where we use the extent ref to see what it thinks.
10108          */
10109         if (!best) {
10110                 entry = find_entry(&entries, rec->start, rec->nr);
10111                 if (!entry && (!broken_entries || !rec->found_rec)) {
10112                         fprintf(stderr, "Backrefs don't agree with each other "
10113                                 "and extent record doesn't agree with anybody,"
10114                                 " so we can't fix bytenr %Lu bytes %Lu\n",
10115                                 rec->start, rec->nr);
10116                         ret = -EINVAL;
10117                         goto out;
10118                 } else if (!entry) {
10119                         /*
10120                          * Ok our backrefs were broken, we'll assume this is the
10121                          * correct value and add an entry for this range.
10122                          */
10123                         entry = malloc(sizeof(struct extent_entry));
10124                         if (!entry) {
10125                                 ret = -ENOMEM;
10126                                 goto out;
10127                         }
10128                         memset(entry, 0, sizeof(*entry));
10129                         entry->bytenr = rec->start;
10130                         entry->bytes = rec->nr;
10131                         list_add_tail(&entry->list, &entries);
10132                         nr_entries++;
10133                 }
10134                 entry->count++;
10135                 best = find_most_right_entry(&entries);
10136                 if (!best) {
10137                         fprintf(stderr, "Backrefs and extent record evenly "
10138                                 "split on who is right, this is going to "
10139                                 "require user input to fix bytenr %Lu bytes "
10140                                 "%Lu\n", rec->start, rec->nr);
10141                         ret = -EINVAL;
10142                         goto out;
10143                 }
10144         }
10145
10146         /*
10147          * I don't think this can happen currently as we'll abort() if we catch
10148          * this case higher up, but in case somebody removes that we still can't
10149          * deal with it properly here yet, so just bail out of that's the case.
10150          */
10151         if (best->bytenr != rec->start) {
10152                 fprintf(stderr, "Extent start and backref starts don't match, "
10153                         "please use btrfs-image on this file system and send "
10154                         "it to a btrfs developer so they can make fsck fix "
10155                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
10156                         rec->start, rec->nr);
10157                 ret = -EINVAL;
10158                 goto out;
10159         }
10160
10161         /*
10162          * Ok great we all agreed on an extent record, let's go find the real
10163          * references and fix up the ones that don't match.
10164          */
10165         rbtree_postorder_for_each_entry_safe(back, tmp,
10166                                              &rec->backref_tree, node) {
10167                 if (back->full_backref || !back->is_data)
10168                         continue;
10169
10170                 dback = to_data_backref(back);
10171
10172                 /*
10173                  * Still ignoring backrefs that don't have a real ref attached
10174                  * to them.
10175                  */
10176                 if (dback->found_ref == 0)
10177                         continue;
10178
10179                 if (dback->bytes == best->bytes &&
10180                     dback->disk_bytenr == best->bytenr)
10181                         continue;
10182
10183                 ret = repair_ref(info, path, dback, best);
10184                 if (ret)
10185                         goto out;
10186         }
10187
10188         /*
10189          * Ok we messed with the actual refs, which means we need to drop our
10190          * entire cache and go back and rescan.  I know this is a huge pain and
10191          * adds a lot of extra work, but it's the only way to be safe.  Once all
10192          * the backrefs agree we may not need to do anything to the extent
10193          * record itself.
10194          */
10195         ret = -EAGAIN;
10196 out:
10197         while (!list_empty(&entries)) {
10198                 entry = list_entry(entries.next, struct extent_entry, list);
10199                 list_del_init(&entry->list);
10200                 free(entry);
10201         }
10202         return ret;
10203 }
10204
10205 static int process_duplicates(struct cache_tree *extent_cache,
10206                               struct extent_record *rec)
10207 {
10208         struct extent_record *good, *tmp;
10209         struct cache_extent *cache;
10210         int ret;
10211
10212         /*
10213          * If we found a extent record for this extent then return, or if we
10214          * have more than one duplicate we are likely going to need to delete
10215          * something.
10216          */
10217         if (rec->found_rec || rec->num_duplicates > 1)
10218                 return 0;
10219
10220         /* Shouldn't happen but just in case */
10221         BUG_ON(!rec->num_duplicates);
10222
10223         /*
10224          * So this happens if we end up with a backref that doesn't match the
10225          * actual extent entry.  So either the backref is bad or the extent
10226          * entry is bad.  Either way we want to have the extent_record actually
10227          * reflect what we found in the extent_tree, so we need to take the
10228          * duplicate out and use that as the extent_record since the only way we
10229          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
10230          */
10231         remove_cache_extent(extent_cache, &rec->cache);
10232
10233         good = to_extent_record(rec->dups.next);
10234         list_del_init(&good->list);
10235         INIT_LIST_HEAD(&good->backrefs);
10236         INIT_LIST_HEAD(&good->dups);
10237         good->cache.start = good->start;
10238         good->cache.size = good->nr;
10239         good->content_checked = 0;
10240         good->owner_ref_checked = 0;
10241         good->num_duplicates = 0;
10242         good->refs = rec->refs;
10243         list_splice_init(&rec->backrefs, &good->backrefs);
10244         while (1) {
10245                 cache = lookup_cache_extent(extent_cache, good->start,
10246                                             good->nr);
10247                 if (!cache)
10248                         break;
10249                 tmp = container_of(cache, struct extent_record, cache);
10250
10251                 /*
10252                  * If we find another overlapping extent and it's found_rec is
10253                  * set then it's a duplicate and we need to try and delete
10254                  * something.
10255                  */
10256                 if (tmp->found_rec || tmp->num_duplicates > 0) {
10257                         if (list_empty(&good->list))
10258                                 list_add_tail(&good->list,
10259                                               &duplicate_extents);
10260                         good->num_duplicates += tmp->num_duplicates + 1;
10261                         list_splice_init(&tmp->dups, &good->dups);
10262                         list_del_init(&tmp->list);
10263                         list_add_tail(&tmp->list, &good->dups);
10264                         remove_cache_extent(extent_cache, &tmp->cache);
10265                         continue;
10266                 }
10267
10268                 /*
10269                  * Ok we have another non extent item backed extent rec, so lets
10270                  * just add it to this extent and carry on like we did above.
10271                  */
10272                 good->refs += tmp->refs;
10273                 list_splice_init(&tmp->backrefs, &good->backrefs);
10274                 remove_cache_extent(extent_cache, &tmp->cache);
10275                 free(tmp);
10276         }
10277         ret = insert_cache_extent(extent_cache, &good->cache);
10278         BUG_ON(ret);
10279         free(rec);
10280         return good->num_duplicates ? 0 : 1;
10281 }
10282
10283 static int delete_duplicate_records(struct btrfs_root *root,
10284                                     struct extent_record *rec)
10285 {
10286         struct btrfs_trans_handle *trans;
10287         LIST_HEAD(delete_list);
10288         struct btrfs_path path;
10289         struct extent_record *tmp, *good, *n;
10290         int nr_del = 0;
10291         int ret = 0, err;
10292         struct btrfs_key key;
10293
10294         btrfs_init_path(&path);
10295
10296         good = rec;
10297         /* Find the record that covers all of the duplicates. */
10298         list_for_each_entry(tmp, &rec->dups, list) {
10299                 if (good->start < tmp->start)
10300                         continue;
10301                 if (good->nr > tmp->nr)
10302                         continue;
10303
10304                 if (tmp->start + tmp->nr < good->start + good->nr) {
10305                         fprintf(stderr, "Ok we have overlapping extents that "
10306                                 "aren't completely covered by each other, this "
10307                                 "is going to require more careful thought.  "
10308                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
10309                                 tmp->start, tmp->nr, good->start, good->nr);
10310                         abort();
10311                 }
10312                 good = tmp;
10313         }
10314
10315         if (good != rec)
10316                 list_add_tail(&rec->list, &delete_list);
10317
10318         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10319                 if (tmp == good)
10320                         continue;
10321                 list_move_tail(&tmp->list, &delete_list);
10322         }
10323
10324         root = root->fs_info->extent_root;
10325         trans = btrfs_start_transaction(root, 1);
10326         if (IS_ERR(trans)) {
10327                 ret = PTR_ERR(trans);
10328                 goto out;
10329         }
10330
10331         list_for_each_entry(tmp, &delete_list, list) {
10332                 if (tmp->found_rec == 0)
10333                         continue;
10334                 key.objectid = tmp->start;
10335                 key.type = BTRFS_EXTENT_ITEM_KEY;
10336                 key.offset = tmp->nr;
10337
10338                 /* Shouldn't happen but just in case */
10339                 if (tmp->metadata) {
10340                         fprintf(stderr, "Well this shouldn't happen, extent "
10341                                 "record overlaps but is metadata? "
10342                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10343                         abort();
10344                 }
10345
10346                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10347                 if (ret) {
10348                         if (ret > 0)
10349                                 ret = -EINVAL;
10350                         break;
10351                 }
10352                 ret = btrfs_del_item(trans, root, &path);
10353                 if (ret)
10354                         break;
10355                 btrfs_release_path(&path);
10356                 nr_del++;
10357         }
10358         err = btrfs_commit_transaction(trans, root);
10359         if (err && !ret)
10360                 ret = err;
10361 out:
10362         while (!list_empty(&delete_list)) {
10363                 tmp = to_extent_record(delete_list.next);
10364                 list_del_init(&tmp->list);
10365                 if (tmp == rec)
10366                         continue;
10367                 free(tmp);
10368         }
10369
10370         while (!list_empty(&rec->dups)) {
10371                 tmp = to_extent_record(rec->dups.next);
10372                 list_del_init(&tmp->list);
10373                 free(tmp);
10374         }
10375
10376         btrfs_release_path(&path);
10377
10378         if (!ret && !nr_del)
10379                 rec->num_duplicates = 0;
10380
10381         return ret ? ret : nr_del;
10382 }
10383
10384 static int find_possible_backrefs(struct btrfs_fs_info *info,
10385                                   struct btrfs_path *path,
10386                                   struct cache_tree *extent_cache,
10387                                   struct extent_record *rec)
10388 {
10389         struct btrfs_root *root;
10390         struct extent_backref *back, *tmp;
10391         struct data_backref *dback;
10392         struct cache_extent *cache;
10393         struct btrfs_file_extent_item *fi;
10394         struct btrfs_key key;
10395         u64 bytenr, bytes;
10396         int ret;
10397
10398         rbtree_postorder_for_each_entry_safe(back, tmp,
10399                                              &rec->backref_tree, node) {
10400                 /* Don't care about full backrefs (poor unloved backrefs) */
10401                 if (back->full_backref || !back->is_data)
10402                         continue;
10403
10404                 dback = to_data_backref(back);
10405
10406                 /* We found this one, we don't need to do a lookup */
10407                 if (dback->found_ref)
10408                         continue;
10409
10410                 key.objectid = dback->root;
10411                 key.type = BTRFS_ROOT_ITEM_KEY;
10412                 key.offset = (u64)-1;
10413
10414                 root = btrfs_read_fs_root(info, &key);
10415
10416                 /* No root, definitely a bad ref, skip */
10417                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10418                         continue;
10419                 /* Other err, exit */
10420                 if (IS_ERR(root))
10421                         return PTR_ERR(root);
10422
10423                 key.objectid = dback->owner;
10424                 key.type = BTRFS_EXTENT_DATA_KEY;
10425                 key.offset = dback->offset;
10426                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10427                 if (ret) {
10428                         btrfs_release_path(path);
10429                         if (ret < 0)
10430                                 return ret;
10431                         /* Didn't find it, we can carry on */
10432                         ret = 0;
10433                         continue;
10434                 }
10435
10436                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10437                                     struct btrfs_file_extent_item);
10438                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10439                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10440                 btrfs_release_path(path);
10441                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10442                 if (cache) {
10443                         struct extent_record *tmp;
10444                         tmp = container_of(cache, struct extent_record, cache);
10445
10446                         /*
10447                          * If we found an extent record for the bytenr for this
10448                          * particular backref then we can't add it to our
10449                          * current extent record.  We only want to add backrefs
10450                          * that don't have a corresponding extent item in the
10451                          * extent tree since they likely belong to this record
10452                          * and we need to fix it if it doesn't match bytenrs.
10453                          */
10454                         if  (tmp->found_rec)
10455                                 continue;
10456                 }
10457
10458                 dback->found_ref += 1;
10459                 dback->disk_bytenr = bytenr;
10460                 dback->bytes = bytes;
10461
10462                 /*
10463                  * Set this so the verify backref code knows not to trust the
10464                  * values in this backref.
10465                  */
10466                 back->broken = 1;
10467         }
10468
10469         return 0;
10470 }
10471
10472 /*
10473  * Record orphan data ref into corresponding root.
10474  *
10475  * Return 0 if the extent item contains data ref and recorded.
10476  * Return 1 if the extent item contains no useful data ref
10477  *   On that case, it may contains only shared_dataref or metadata backref
10478  *   or the file extent exists(this should be handled by the extent bytenr
10479  *   recovery routine)
10480  * Return <0 if something goes wrong.
10481  */
10482 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10483                                       struct extent_record *rec)
10484 {
10485         struct btrfs_key key;
10486         struct btrfs_root *dest_root;
10487         struct extent_backref *back, *tmp;
10488         struct data_backref *dback;
10489         struct orphan_data_extent *orphan;
10490         struct btrfs_path path;
10491         int recorded_data_ref = 0;
10492         int ret = 0;
10493
10494         if (rec->metadata)
10495                 return 1;
10496         btrfs_init_path(&path);
10497         rbtree_postorder_for_each_entry_safe(back, tmp,
10498                                              &rec->backref_tree, node) {
10499                 if (back->full_backref || !back->is_data ||
10500                     !back->found_extent_tree)
10501                         continue;
10502                 dback = to_data_backref(back);
10503                 if (dback->found_ref)
10504                         continue;
10505                 key.objectid = dback->root;
10506                 key.type = BTRFS_ROOT_ITEM_KEY;
10507                 key.offset = (u64)-1;
10508
10509                 dest_root = btrfs_read_fs_root(fs_info, &key);
10510
10511                 /* For non-exist root we just skip it */
10512                 if (IS_ERR(dest_root) || !dest_root)
10513                         continue;
10514
10515                 key.objectid = dback->owner;
10516                 key.type = BTRFS_EXTENT_DATA_KEY;
10517                 key.offset = dback->offset;
10518
10519                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10520                 btrfs_release_path(&path);
10521                 /*
10522                  * For ret < 0, it's OK since the fs-tree may be corrupted,
10523                  * we need to record it for inode/file extent rebuild.
10524                  * For ret > 0, we record it only for file extent rebuild.
10525                  * For ret == 0, the file extent exists but only bytenr
10526                  * mismatch, let the original bytenr fix routine to handle,
10527                  * don't record it.
10528                  */
10529                 if (ret == 0)
10530                         continue;
10531                 ret = 0;
10532                 orphan = malloc(sizeof(*orphan));
10533                 if (!orphan) {
10534                         ret = -ENOMEM;
10535                         goto out;
10536                 }
10537                 INIT_LIST_HEAD(&orphan->list);
10538                 orphan->root = dback->root;
10539                 orphan->objectid = dback->owner;
10540                 orphan->offset = dback->offset;
10541                 orphan->disk_bytenr = rec->cache.start;
10542                 orphan->disk_len = rec->cache.size;
10543                 list_add(&dest_root->orphan_data_extents, &orphan->list);
10544                 recorded_data_ref = 1;
10545         }
10546 out:
10547         btrfs_release_path(&path);
10548         if (!ret)
10549                 return !recorded_data_ref;
10550         else
10551                 return ret;
10552 }
10553
10554 /*
10555  * when an incorrect extent item is found, this will delete
10556  * all of the existing entries for it and recreate them
10557  * based on what the tree scan found.
10558  */
10559 static int fixup_extent_refs(struct btrfs_fs_info *info,
10560                              struct cache_tree *extent_cache,
10561                              struct extent_record *rec)
10562 {
10563         struct btrfs_trans_handle *trans = NULL;
10564         int ret;
10565         struct btrfs_path path;
10566         struct cache_extent *cache;
10567         struct extent_backref *back, *tmp;
10568         int allocated = 0;
10569         u64 flags = 0;
10570
10571         if (rec->flag_block_full_backref)
10572                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10573
10574         btrfs_init_path(&path);
10575         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10576                 /*
10577                  * Sometimes the backrefs themselves are so broken they don't
10578                  * get attached to any meaningful rec, so first go back and
10579                  * check any of our backrefs that we couldn't find and throw
10580                  * them into the list if we find the backref so that
10581                  * verify_backrefs can figure out what to do.
10582                  */
10583                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10584                 if (ret < 0)
10585                         goto out;
10586         }
10587
10588         /* step one, make sure all of the backrefs agree */
10589         ret = verify_backrefs(info, &path, rec);
10590         if (ret < 0)
10591                 goto out;
10592
10593         trans = btrfs_start_transaction(info->extent_root, 1);
10594         if (IS_ERR(trans)) {
10595                 ret = PTR_ERR(trans);
10596                 goto out;
10597         }
10598
10599         /* step two, delete all the existing records */
10600         ret = delete_extent_records(trans, info->extent_root, &path,
10601                                     rec->start);
10602
10603         if (ret < 0)
10604                 goto out;
10605
10606         /* was this block corrupt?  If so, don't add references to it */
10607         cache = lookup_cache_extent(info->corrupt_blocks,
10608                                     rec->start, rec->max_size);
10609         if (cache) {
10610                 ret = 0;
10611                 goto out;
10612         }
10613
10614         /* step three, recreate all the refs we did find */
10615         rbtree_postorder_for_each_entry_safe(back, tmp,
10616                                              &rec->backref_tree, node) {
10617                 /*
10618                  * if we didn't find any references, don't create a
10619                  * new extent record
10620                  */
10621                 if (!back->found_ref)
10622                         continue;
10623
10624                 rec->bad_full_backref = 0;
10625                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10626                 allocated = 1;
10627
10628                 if (ret)
10629                         goto out;
10630         }
10631 out:
10632         if (trans) {
10633                 int err = btrfs_commit_transaction(trans, info->extent_root);
10634                 if (!ret)
10635                         ret = err;
10636         }
10637
10638         if (!ret)
10639                 fprintf(stderr, "Repaired extent references for %llu\n",
10640                                 (unsigned long long)rec->start);
10641
10642         btrfs_release_path(&path);
10643         return ret;
10644 }
10645
10646 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10647                               struct extent_record *rec)
10648 {
10649         struct btrfs_trans_handle *trans;
10650         struct btrfs_root *root = fs_info->extent_root;
10651         struct btrfs_path path;
10652         struct btrfs_extent_item *ei;
10653         struct btrfs_key key;
10654         u64 flags;
10655         int ret = 0;
10656
10657         key.objectid = rec->start;
10658         if (rec->metadata) {
10659                 key.type = BTRFS_METADATA_ITEM_KEY;
10660                 key.offset = rec->info_level;
10661         } else {
10662                 key.type = BTRFS_EXTENT_ITEM_KEY;
10663                 key.offset = rec->max_size;
10664         }
10665
10666         trans = btrfs_start_transaction(root, 0);
10667         if (IS_ERR(trans))
10668                 return PTR_ERR(trans);
10669
10670         btrfs_init_path(&path);
10671         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10672         if (ret < 0) {
10673                 btrfs_release_path(&path);
10674                 btrfs_commit_transaction(trans, root);
10675                 return ret;
10676         } else if (ret) {
10677                 fprintf(stderr, "Didn't find extent for %llu\n",
10678                         (unsigned long long)rec->start);
10679                 btrfs_release_path(&path);
10680                 btrfs_commit_transaction(trans, root);
10681                 return -ENOENT;
10682         }
10683
10684         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10685                             struct btrfs_extent_item);
10686         flags = btrfs_extent_flags(path.nodes[0], ei);
10687         if (rec->flag_block_full_backref) {
10688                 fprintf(stderr, "setting full backref on %llu\n",
10689                         (unsigned long long)key.objectid);
10690                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10691         } else {
10692                 fprintf(stderr, "clearing full backref on %llu\n",
10693                         (unsigned long long)key.objectid);
10694                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10695         }
10696         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10697         btrfs_mark_buffer_dirty(path.nodes[0]);
10698         btrfs_release_path(&path);
10699         ret = btrfs_commit_transaction(trans, root);
10700         if (!ret)
10701                 fprintf(stderr, "Repaired extent flags for %llu\n",
10702                                 (unsigned long long)rec->start);
10703
10704         return ret;
10705 }
10706
10707 /* right now we only prune from the extent allocation tree */
10708 static int prune_one_block(struct btrfs_trans_handle *trans,
10709                            struct btrfs_fs_info *info,
10710                            struct btrfs_corrupt_block *corrupt)
10711 {
10712         int ret;
10713         struct btrfs_path path;
10714         struct extent_buffer *eb;
10715         u64 found;
10716         int slot;
10717         int nritems;
10718         int level = corrupt->level + 1;
10719
10720         btrfs_init_path(&path);
10721 again:
10722         /* we want to stop at the parent to our busted block */
10723         path.lowest_level = level;
10724
10725         ret = btrfs_search_slot(trans, info->extent_root,
10726                                 &corrupt->key, &path, -1, 1);
10727
10728         if (ret < 0)
10729                 goto out;
10730
10731         eb = path.nodes[level];
10732         if (!eb) {
10733                 ret = -ENOENT;
10734                 goto out;
10735         }
10736
10737         /*
10738          * hopefully the search gave us the block we want to prune,
10739          * lets try that first
10740          */
10741         slot = path.slots[level];
10742         found =  btrfs_node_blockptr(eb, slot);
10743         if (found == corrupt->cache.start)
10744                 goto del_ptr;
10745
10746         nritems = btrfs_header_nritems(eb);
10747
10748         /* the search failed, lets scan this node and hope we find it */
10749         for (slot = 0; slot < nritems; slot++) {
10750                 found =  btrfs_node_blockptr(eb, slot);
10751                 if (found == corrupt->cache.start)
10752                         goto del_ptr;
10753         }
10754         /*
10755          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10756          * to this block
10757          */
10758         if (eb == info->extent_root->node) {
10759                 ret = -ENOENT;
10760                 goto out;
10761         } else {
10762                 level++;
10763                 btrfs_release_path(&path);
10764                 goto again;
10765         }
10766
10767 del_ptr:
10768         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10769         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10770
10771 out:
10772         btrfs_release_path(&path);
10773         return ret;
10774 }
10775
10776 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10777 {
10778         struct btrfs_trans_handle *trans = NULL;
10779         struct cache_extent *cache;
10780         struct btrfs_corrupt_block *corrupt;
10781
10782         while (1) {
10783                 cache = search_cache_extent(info->corrupt_blocks, 0);
10784                 if (!cache)
10785                         break;
10786                 if (!trans) {
10787                         trans = btrfs_start_transaction(info->extent_root, 1);
10788                         if (IS_ERR(trans))
10789                                 return PTR_ERR(trans);
10790                 }
10791                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10792                 prune_one_block(trans, info, corrupt);
10793                 remove_cache_extent(info->corrupt_blocks, cache);
10794         }
10795         if (trans)
10796                 return btrfs_commit_transaction(trans, info->extent_root);
10797         return 0;
10798 }
10799
10800 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10801 {
10802         struct btrfs_block_group_cache *cache;
10803         u64 start, end;
10804         int ret;
10805
10806         while (1) {
10807                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10808                                             &start, &end, EXTENT_DIRTY);
10809                 if (ret)
10810                         break;
10811                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10812         }
10813
10814         start = 0;
10815         while (1) {
10816                 cache = btrfs_lookup_first_block_group(fs_info, start);
10817                 if (!cache)
10818                         break;
10819                 if (cache->cached)
10820                         cache->cached = 0;
10821                 start = cache->key.objectid + cache->key.offset;
10822         }
10823 }
10824
10825 static int check_extent_refs(struct btrfs_root *root,
10826                              struct cache_tree *extent_cache)
10827 {
10828         struct extent_record *rec;
10829         struct cache_extent *cache;
10830         int ret = 0;
10831         int had_dups = 0;
10832         int err = 0;
10833
10834         if (repair) {
10835                 /*
10836                  * if we're doing a repair, we have to make sure
10837                  * we don't allocate from the problem extents.
10838                  * In the worst case, this will be all the
10839                  * extents in the FS
10840                  */
10841                 cache = search_cache_extent(extent_cache, 0);
10842                 while(cache) {
10843                         rec = container_of(cache, struct extent_record, cache);
10844                         set_extent_dirty(root->fs_info->excluded_extents,
10845                                          rec->start,
10846                                          rec->start + rec->max_size - 1);
10847                         cache = next_cache_extent(cache);
10848                 }
10849
10850                 /* pin down all the corrupted blocks too */
10851                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10852                 while(cache) {
10853                         set_extent_dirty(root->fs_info->excluded_extents,
10854                                          cache->start,
10855                                          cache->start + cache->size - 1);
10856                         cache = next_cache_extent(cache);
10857                 }
10858                 prune_corrupt_blocks(root->fs_info);
10859                 reset_cached_block_groups(root->fs_info);
10860         }
10861
10862         reset_cached_block_groups(root->fs_info);
10863
10864         /*
10865          * We need to delete any duplicate entries we find first otherwise we
10866          * could mess up the extent tree when we have backrefs that actually
10867          * belong to a different extent item and not the weird duplicate one.
10868          */
10869         while (repair && !list_empty(&duplicate_extents)) {
10870                 rec = to_extent_record(duplicate_extents.next);
10871                 list_del_init(&rec->list);
10872
10873                 /* Sometimes we can find a backref before we find an actual
10874                  * extent, so we need to process it a little bit to see if there
10875                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10876                  * if this is a backref screwup.  If we need to delete stuff
10877                  * process_duplicates() will return 0, otherwise it will return
10878                  * 1 and we
10879                  */
10880                 if (process_duplicates(extent_cache, rec))
10881                         continue;
10882                 ret = delete_duplicate_records(root, rec);
10883                 if (ret < 0)
10884                         return ret;
10885                 /*
10886                  * delete_duplicate_records will return the number of entries
10887                  * deleted, so if it's greater than 0 then we know we actually
10888                  * did something and we need to remove.
10889                  */
10890                 if (ret)
10891                         had_dups = 1;
10892         }
10893
10894         if (had_dups)
10895                 return -EAGAIN;
10896
10897         while(1) {
10898                 int cur_err = 0;
10899                 int fix = 0;
10900
10901                 cache = search_cache_extent(extent_cache, 0);
10902                 if (!cache)
10903                         break;
10904                 rec = container_of(cache, struct extent_record, cache);
10905                 if (rec->num_duplicates) {
10906                         fprintf(stderr, "extent item %llu has multiple extent "
10907                                 "items\n", (unsigned long long)rec->start);
10908                         cur_err = 1;
10909                 }
10910
10911                 if (rec->refs != rec->extent_item_refs) {
10912                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10913                                 (unsigned long long)rec->start,
10914                                 (unsigned long long)rec->nr);
10915                         fprintf(stderr, "extent item %llu, found %llu\n",
10916                                 (unsigned long long)rec->extent_item_refs,
10917                                 (unsigned long long)rec->refs);
10918                         ret = record_orphan_data_extents(root->fs_info, rec);
10919                         if (ret < 0)
10920                                 goto repair_abort;
10921                         fix = ret;
10922                         cur_err = 1;
10923                 }
10924                 if (all_backpointers_checked(rec, 1)) {
10925                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10926                                 (unsigned long long)rec->start,
10927                                 (unsigned long long)rec->nr);
10928                         fix = 1;
10929                         cur_err = 1;
10930                 }
10931                 if (!rec->owner_ref_checked) {
10932                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10933                                 (unsigned long long)rec->start,
10934                                 (unsigned long long)rec->nr);
10935                         fix = 1;
10936                         cur_err = 1;
10937                 }
10938
10939                 if (repair && fix) {
10940                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10941                         if (ret)
10942                                 goto repair_abort;
10943                 }
10944
10945
10946                 if (rec->bad_full_backref) {
10947                         fprintf(stderr, "bad full backref, on [%llu]\n",
10948                                 (unsigned long long)rec->start);
10949                         if (repair) {
10950                                 ret = fixup_extent_flags(root->fs_info, rec);
10951                                 if (ret)
10952                                         goto repair_abort;
10953                                 fix = 1;
10954                         }
10955                         cur_err = 1;
10956                 }
10957                 /*
10958                  * Although it's not a extent ref's problem, we reuse this
10959                  * routine for error reporting.
10960                  * No repair function yet.
10961                  */
10962                 if (rec->crossing_stripes) {
10963                         fprintf(stderr,
10964                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10965                                 rec->start, rec->start + rec->max_size);
10966                         cur_err = 1;
10967                 }
10968
10969                 if (rec->wrong_chunk_type) {
10970                         fprintf(stderr,
10971                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10972                                 rec->start, rec->start + rec->max_size);
10973                         cur_err = 1;
10974                 }
10975
10976                 err = cur_err;
10977                 remove_cache_extent(extent_cache, cache);
10978                 free_all_extent_backrefs(rec);
10979                 if (!init_extent_tree && repair && (!cur_err || fix))
10980                         clear_extent_dirty(root->fs_info->excluded_extents,
10981                                            rec->start,
10982                                            rec->start + rec->max_size - 1);
10983                 free(rec);
10984         }
10985 repair_abort:
10986         if (repair) {
10987                 if (ret && ret != -EAGAIN) {
10988                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10989                         exit(1);
10990                 } else if (!ret) {
10991                         struct btrfs_trans_handle *trans;
10992
10993                         root = root->fs_info->extent_root;
10994                         trans = btrfs_start_transaction(root, 1);
10995                         if (IS_ERR(trans)) {
10996                                 ret = PTR_ERR(trans);
10997                                 goto repair_abort;
10998                         }
10999
11000                         ret = btrfs_fix_block_accounting(trans, root);
11001                         if (ret)
11002                                 goto repair_abort;
11003                         ret = btrfs_commit_transaction(trans, root);
11004                         if (ret)
11005                                 goto repair_abort;
11006                 }
11007                 return ret;
11008         }
11009
11010         if (err)
11011                 err = -EIO;
11012         return err;
11013 }
11014
11015 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
11016 {
11017         u64 stripe_size;
11018
11019         if (type & BTRFS_BLOCK_GROUP_RAID0) {
11020                 stripe_size = length;
11021                 stripe_size /= num_stripes;
11022         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
11023                 stripe_size = length * 2;
11024                 stripe_size /= num_stripes;
11025         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
11026                 stripe_size = length;
11027                 stripe_size /= (num_stripes - 1);
11028         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
11029                 stripe_size = length;
11030                 stripe_size /= (num_stripes - 2);
11031         } else {
11032                 stripe_size = length;
11033         }
11034         return stripe_size;
11035 }
11036
11037 /*
11038  * Check the chunk with its block group/dev list ref:
11039  * Return 0 if all refs seems valid.
11040  * Return 1 if part of refs seems valid, need later check for rebuild ref
11041  * like missing block group and needs to search extent tree to rebuild them.
11042  * Return -1 if essential refs are missing and unable to rebuild.
11043  */
11044 static int check_chunk_refs(struct chunk_record *chunk_rec,
11045                             struct block_group_tree *block_group_cache,
11046                             struct device_extent_tree *dev_extent_cache,
11047                             int silent)
11048 {
11049         struct cache_extent *block_group_item;
11050         struct block_group_record *block_group_rec;
11051         struct cache_extent *dev_extent_item;
11052         struct device_extent_record *dev_extent_rec;
11053         u64 devid;
11054         u64 offset;
11055         u64 length;
11056         int metadump_v2 = 0;
11057         int i;
11058         int ret = 0;
11059
11060         block_group_item = lookup_cache_extent(&block_group_cache->tree,
11061                                                chunk_rec->offset,
11062                                                chunk_rec->length);
11063         if (block_group_item) {
11064                 block_group_rec = container_of(block_group_item,
11065                                                struct block_group_record,
11066                                                cache);
11067                 if (chunk_rec->length != block_group_rec->offset ||
11068                     chunk_rec->offset != block_group_rec->objectid ||
11069                     (!metadump_v2 &&
11070                      chunk_rec->type_flags != block_group_rec->flags)) {
11071                         if (!silent)
11072                                 fprintf(stderr,
11073                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
11074                                         chunk_rec->objectid,
11075                                         chunk_rec->type,
11076                                         chunk_rec->offset,
11077                                         chunk_rec->length,
11078                                         chunk_rec->offset,
11079                                         chunk_rec->type_flags,
11080                                         block_group_rec->objectid,
11081                                         block_group_rec->type,
11082                                         block_group_rec->offset,
11083                                         block_group_rec->offset,
11084                                         block_group_rec->objectid,
11085                                         block_group_rec->flags);
11086                         ret = -1;
11087                 } else {
11088                         list_del_init(&block_group_rec->list);
11089                         chunk_rec->bg_rec = block_group_rec;
11090                 }
11091         } else {
11092                 if (!silent)
11093                         fprintf(stderr,
11094                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
11095                                 chunk_rec->objectid,
11096                                 chunk_rec->type,
11097                                 chunk_rec->offset,
11098                                 chunk_rec->length,
11099                                 chunk_rec->offset,
11100                                 chunk_rec->type_flags);
11101                 ret = 1;
11102         }
11103
11104         if (metadump_v2)
11105                 return ret;
11106
11107         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
11108                                     chunk_rec->num_stripes);
11109         for (i = 0; i < chunk_rec->num_stripes; ++i) {
11110                 devid = chunk_rec->stripes[i].devid;
11111                 offset = chunk_rec->stripes[i].offset;
11112                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
11113                                                        devid, offset, length);
11114                 if (dev_extent_item) {
11115                         dev_extent_rec = container_of(dev_extent_item,
11116                                                 struct device_extent_record,
11117                                                 cache);
11118                         if (dev_extent_rec->objectid != devid ||
11119                             dev_extent_rec->offset != offset ||
11120                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
11121                             dev_extent_rec->length != length) {
11122                                 if (!silent)
11123                                         fprintf(stderr,
11124                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
11125                                                 chunk_rec->objectid,
11126                                                 chunk_rec->type,
11127                                                 chunk_rec->offset,
11128                                                 chunk_rec->stripes[i].devid,
11129                                                 chunk_rec->stripes[i].offset,
11130                                                 dev_extent_rec->objectid,
11131                                                 dev_extent_rec->offset,
11132                                                 dev_extent_rec->length);
11133                                 ret = -1;
11134                         } else {
11135                                 list_move(&dev_extent_rec->chunk_list,
11136                                           &chunk_rec->dextents);
11137                         }
11138                 } else {
11139                         if (!silent)
11140                                 fprintf(stderr,
11141                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
11142                                         chunk_rec->objectid,
11143                                         chunk_rec->type,
11144                                         chunk_rec->offset,
11145                                         chunk_rec->stripes[i].devid,
11146                                         chunk_rec->stripes[i].offset);
11147                         ret = -1;
11148                 }
11149         }
11150         return ret;
11151 }
11152
11153 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
11154 int check_chunks(struct cache_tree *chunk_cache,
11155                  struct block_group_tree *block_group_cache,
11156                  struct device_extent_tree *dev_extent_cache,
11157                  struct list_head *good, struct list_head *bad,
11158                  struct list_head *rebuild, int silent)
11159 {
11160         struct cache_extent *chunk_item;
11161         struct chunk_record *chunk_rec;
11162         struct block_group_record *bg_rec;
11163         struct device_extent_record *dext_rec;
11164         int err;
11165         int ret = 0;
11166
11167         chunk_item = first_cache_extent(chunk_cache);
11168         while (chunk_item) {
11169                 chunk_rec = container_of(chunk_item, struct chunk_record,
11170                                          cache);
11171                 err = check_chunk_refs(chunk_rec, block_group_cache,
11172                                        dev_extent_cache, silent);
11173                 if (err < 0)
11174                         ret = err;
11175                 if (err == 0 && good)
11176                         list_add_tail(&chunk_rec->list, good);
11177                 if (err > 0 && rebuild)
11178                         list_add_tail(&chunk_rec->list, rebuild);
11179                 if (err < 0 && bad)
11180                         list_add_tail(&chunk_rec->list, bad);
11181                 chunk_item = next_cache_extent(chunk_item);
11182         }
11183
11184         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
11185                 if (!silent)
11186                         fprintf(stderr,
11187                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
11188                                 bg_rec->objectid,
11189                                 bg_rec->offset,
11190                                 bg_rec->flags);
11191                 if (!ret)
11192                         ret = 1;
11193         }
11194
11195         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
11196                             chunk_list) {
11197                 if (!silent)
11198                         fprintf(stderr,
11199                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
11200                                 dext_rec->objectid,
11201                                 dext_rec->offset,
11202                                 dext_rec->length);
11203                 if (!ret)
11204                         ret = 1;
11205         }
11206         return ret;
11207 }
11208
11209
11210 static int check_device_used(struct device_record *dev_rec,
11211                              struct device_extent_tree *dext_cache)
11212 {
11213         struct cache_extent *cache;
11214         struct device_extent_record *dev_extent_rec;
11215         u64 total_byte = 0;
11216
11217         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
11218         while (cache) {
11219                 dev_extent_rec = container_of(cache,
11220                                               struct device_extent_record,
11221                                               cache);
11222                 if (dev_extent_rec->objectid != dev_rec->devid)
11223                         break;
11224
11225                 list_del_init(&dev_extent_rec->device_list);
11226                 total_byte += dev_extent_rec->length;
11227                 cache = next_cache_extent(cache);
11228         }
11229
11230         if (total_byte != dev_rec->byte_used) {
11231                 fprintf(stderr,
11232                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
11233                         total_byte, dev_rec->byte_used, dev_rec->objectid,
11234                         dev_rec->type, dev_rec->offset);
11235                 return -1;
11236         } else {
11237                 return 0;
11238         }
11239 }
11240
11241 /* check btrfs_dev_item -> btrfs_dev_extent */
11242 static int check_devices(struct rb_root *dev_cache,
11243                          struct device_extent_tree *dev_extent_cache)
11244 {
11245         struct rb_node *dev_node;
11246         struct device_record *dev_rec;
11247         struct device_extent_record *dext_rec;
11248         int err;
11249         int ret = 0;
11250
11251         dev_node = rb_first(dev_cache);
11252         while (dev_node) {
11253                 dev_rec = container_of(dev_node, struct device_record, node);
11254                 err = check_device_used(dev_rec, dev_extent_cache);
11255                 if (err)
11256                         ret = err;
11257
11258                 dev_node = rb_next(dev_node);
11259         }
11260         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11261                             device_list) {
11262                 fprintf(stderr,
11263                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11264                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
11265                 if (!ret)
11266                         ret = 1;
11267         }
11268         return ret;
11269 }
11270
11271 static int add_root_item_to_list(struct list_head *head,
11272                                   u64 objectid, u64 bytenr, u64 last_snapshot,
11273                                   u8 level, u8 drop_level,
11274                                   struct btrfs_key *drop_key)
11275 {
11276
11277         struct root_item_record *ri_rec;
11278         ri_rec = malloc(sizeof(*ri_rec));
11279         if (!ri_rec)
11280                 return -ENOMEM;
11281         ri_rec->bytenr = bytenr;
11282         ri_rec->objectid = objectid;
11283         ri_rec->level = level;
11284         ri_rec->drop_level = drop_level;
11285         ri_rec->last_snapshot = last_snapshot;
11286         if (drop_key)
11287                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11288         list_add_tail(&ri_rec->list, head);
11289
11290         return 0;
11291 }
11292
11293 static void free_root_item_list(struct list_head *list)
11294 {
11295         struct root_item_record *ri_rec;
11296
11297         while (!list_empty(list)) {
11298                 ri_rec = list_first_entry(list, struct root_item_record,
11299                                           list);
11300                 list_del_init(&ri_rec->list);
11301                 free(ri_rec);
11302         }
11303 }
11304
11305 static int deal_root_from_list(struct list_head *list,
11306                                struct btrfs_root *root,
11307                                struct block_info *bits,
11308                                int bits_nr,
11309                                struct cache_tree *pending,
11310                                struct cache_tree *seen,
11311                                struct cache_tree *reada,
11312                                struct cache_tree *nodes,
11313                                struct cache_tree *extent_cache,
11314                                struct cache_tree *chunk_cache,
11315                                struct rb_root *dev_cache,
11316                                struct block_group_tree *block_group_cache,
11317                                struct device_extent_tree *dev_extent_cache)
11318 {
11319         int ret = 0;
11320         u64 last;
11321
11322         while (!list_empty(list)) {
11323                 struct root_item_record *rec;
11324                 struct extent_buffer *buf;
11325                 rec = list_entry(list->next,
11326                                  struct root_item_record, list);
11327                 last = 0;
11328                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11329                 if (!extent_buffer_uptodate(buf)) {
11330                         free_extent_buffer(buf);
11331                         ret = -EIO;
11332                         break;
11333                 }
11334                 ret = add_root_to_pending(buf, extent_cache, pending,
11335                                     seen, nodes, rec->objectid);
11336                 if (ret < 0)
11337                         break;
11338                 /*
11339                  * To rebuild extent tree, we need deal with snapshot
11340                  * one by one, otherwise we deal with node firstly which
11341                  * can maximize readahead.
11342                  */
11343                 while (1) {
11344                         ret = run_next_block(root, bits, bits_nr, &last,
11345                                              pending, seen, reada, nodes,
11346                                              extent_cache, chunk_cache,
11347                                              dev_cache, block_group_cache,
11348                                              dev_extent_cache, rec);
11349                         if (ret != 0)
11350                                 break;
11351                 }
11352                 free_extent_buffer(buf);
11353                 list_del(&rec->list);
11354                 free(rec);
11355                 if (ret < 0)
11356                         break;
11357         }
11358         while (ret >= 0) {
11359                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11360                                      reada, nodes, extent_cache, chunk_cache,
11361                                      dev_cache, block_group_cache,
11362                                      dev_extent_cache, NULL);
11363                 if (ret != 0) {
11364                         if (ret > 0)
11365                                 ret = 0;
11366                         break;
11367                 }
11368         }
11369         return ret;
11370 }
11371
11372 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11373 {
11374         struct rb_root dev_cache;
11375         struct cache_tree chunk_cache;
11376         struct block_group_tree block_group_cache;
11377         struct device_extent_tree dev_extent_cache;
11378         struct cache_tree extent_cache;
11379         struct cache_tree seen;
11380         struct cache_tree pending;
11381         struct cache_tree reada;
11382         struct cache_tree nodes;
11383         struct extent_io_tree excluded_extents;
11384         struct cache_tree corrupt_blocks;
11385         struct btrfs_path path;
11386         struct btrfs_key key;
11387         struct btrfs_key found_key;
11388         int ret, err = 0;
11389         struct block_info *bits;
11390         int bits_nr;
11391         struct extent_buffer *leaf;
11392         int slot;
11393         struct btrfs_root_item ri;
11394         struct list_head dropping_trees;
11395         struct list_head normal_trees;
11396         struct btrfs_root *root1;
11397         struct btrfs_root *root;
11398         u64 objectid;
11399         u8 level;
11400
11401         root = fs_info->fs_root;
11402         dev_cache = RB_ROOT;
11403         cache_tree_init(&chunk_cache);
11404         block_group_tree_init(&block_group_cache);
11405         device_extent_tree_init(&dev_extent_cache);
11406
11407         cache_tree_init(&extent_cache);
11408         cache_tree_init(&seen);
11409         cache_tree_init(&pending);
11410         cache_tree_init(&nodes);
11411         cache_tree_init(&reada);
11412         cache_tree_init(&corrupt_blocks);
11413         extent_io_tree_init(&excluded_extents);
11414         INIT_LIST_HEAD(&dropping_trees);
11415         INIT_LIST_HEAD(&normal_trees);
11416
11417         if (repair) {
11418                 fs_info->excluded_extents = &excluded_extents;
11419                 fs_info->fsck_extent_cache = &extent_cache;
11420                 fs_info->free_extent_hook = free_extent_hook;
11421                 fs_info->corrupt_blocks = &corrupt_blocks;
11422         }
11423
11424         bits_nr = 1024;
11425         bits = malloc(bits_nr * sizeof(struct block_info));
11426         if (!bits) {
11427                 perror("malloc");
11428                 exit(1);
11429         }
11430
11431         if (ctx.progress_enabled) {
11432                 ctx.tp = TASK_EXTENTS;
11433                 task_start(ctx.info);
11434         }
11435
11436 again:
11437         root1 = fs_info->tree_root;
11438         level = btrfs_header_level(root1->node);
11439         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11440                                     root1->node->start, 0, level, 0, NULL);
11441         if (ret < 0)
11442                 goto out;
11443         root1 = fs_info->chunk_root;
11444         level = btrfs_header_level(root1->node);
11445         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11446                                     root1->node->start, 0, level, 0, NULL);
11447         if (ret < 0)
11448                 goto out;
11449         btrfs_init_path(&path);
11450         key.offset = 0;
11451         key.objectid = 0;
11452         key.type = BTRFS_ROOT_ITEM_KEY;
11453         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11454         if (ret < 0)
11455                 goto out;
11456         while(1) {
11457                 leaf = path.nodes[0];
11458                 slot = path.slots[0];
11459                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11460                         ret = btrfs_next_leaf(root, &path);
11461                         if (ret != 0)
11462                                 break;
11463                         leaf = path.nodes[0];
11464                         slot = path.slots[0];
11465                 }
11466                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11467                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11468                         unsigned long offset;
11469                         u64 last_snapshot;
11470
11471                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11472                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11473                         last_snapshot = btrfs_root_last_snapshot(&ri);
11474                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11475                                 level = btrfs_root_level(&ri);
11476                                 ret = add_root_item_to_list(&normal_trees,
11477                                                 found_key.objectid,
11478                                                 btrfs_root_bytenr(&ri),
11479                                                 last_snapshot, level,
11480                                                 0, NULL);
11481                                 if (ret < 0)
11482                                         goto out;
11483                         } else {
11484                                 level = btrfs_root_level(&ri);
11485                                 objectid = found_key.objectid;
11486                                 btrfs_disk_key_to_cpu(&found_key,
11487                                                       &ri.drop_progress);
11488                                 ret = add_root_item_to_list(&dropping_trees,
11489                                                 objectid,
11490                                                 btrfs_root_bytenr(&ri),
11491                                                 last_snapshot, level,
11492                                                 ri.drop_level, &found_key);
11493                                 if (ret < 0)
11494                                         goto out;
11495                         }
11496                 }
11497                 path.slots[0]++;
11498         }
11499         btrfs_release_path(&path);
11500
11501         /*
11502          * check_block can return -EAGAIN if it fixes something, please keep
11503          * this in mind when dealing with return values from these functions, if
11504          * we get -EAGAIN we want to fall through and restart the loop.
11505          */
11506         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11507                                   &seen, &reada, &nodes, &extent_cache,
11508                                   &chunk_cache, &dev_cache, &block_group_cache,
11509                                   &dev_extent_cache);
11510         if (ret < 0) {
11511                 if (ret == -EAGAIN)
11512                         goto loop;
11513                 goto out;
11514         }
11515         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11516                                   &pending, &seen, &reada, &nodes,
11517                                   &extent_cache, &chunk_cache, &dev_cache,
11518                                   &block_group_cache, &dev_extent_cache);
11519         if (ret < 0) {
11520                 if (ret == -EAGAIN)
11521                         goto loop;
11522                 goto out;
11523         }
11524
11525         ret = check_chunks(&chunk_cache, &block_group_cache,
11526                            &dev_extent_cache, NULL, NULL, NULL, 0);
11527         if (ret) {
11528                 if (ret == -EAGAIN)
11529                         goto loop;
11530                 err = ret;
11531         }
11532
11533         ret = check_extent_refs(root, &extent_cache);
11534         if (ret < 0) {
11535                 if (ret == -EAGAIN)
11536                         goto loop;
11537                 goto out;
11538         }
11539
11540         ret = check_devices(&dev_cache, &dev_extent_cache);
11541         if (ret && err)
11542                 ret = err;
11543
11544 out:
11545         task_stop(ctx.info);
11546         if (repair) {
11547                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11548                 extent_io_tree_cleanup(&excluded_extents);
11549                 fs_info->fsck_extent_cache = NULL;
11550                 fs_info->free_extent_hook = NULL;
11551                 fs_info->corrupt_blocks = NULL;
11552                 fs_info->excluded_extents = NULL;
11553         }
11554         free(bits);
11555         free_chunk_cache_tree(&chunk_cache);
11556         free_device_cache_tree(&dev_cache);
11557         free_block_group_tree(&block_group_cache);
11558         free_device_extent_tree(&dev_extent_cache);
11559         free_extent_cache_tree(&seen);
11560         free_extent_cache_tree(&pending);
11561         free_extent_cache_tree(&reada);
11562         free_extent_cache_tree(&nodes);
11563         free_root_item_list(&normal_trees);
11564         free_root_item_list(&dropping_trees);
11565         return ret;
11566 loop:
11567         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11568         free_extent_cache_tree(&seen);
11569         free_extent_cache_tree(&pending);
11570         free_extent_cache_tree(&reada);
11571         free_extent_cache_tree(&nodes);
11572         free_chunk_cache_tree(&chunk_cache);
11573         free_block_group_tree(&block_group_cache);
11574         free_device_cache_tree(&dev_cache);
11575         free_device_extent_tree(&dev_extent_cache);
11576         free_extent_record_cache(&extent_cache);
11577         free_root_item_list(&normal_trees);
11578         free_root_item_list(&dropping_trees);
11579         extent_io_tree_cleanup(&excluded_extents);
11580         goto again;
11581 }
11582
11583 static int check_extent_inline_ref(struct extent_buffer *eb,
11584                    struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11585 {
11586         int ret;
11587         u8 type = btrfs_extent_inline_ref_type(eb, iref);
11588
11589         switch (type) {
11590         case BTRFS_TREE_BLOCK_REF_KEY:
11591         case BTRFS_EXTENT_DATA_REF_KEY:
11592         case BTRFS_SHARED_BLOCK_REF_KEY:
11593         case BTRFS_SHARED_DATA_REF_KEY:
11594                 ret = 0;
11595                 break;
11596         default:
11597                 error("extent[%llu %u %llu] has unknown ref type: %d",
11598                       key->objectid, key->type, key->offset, type);
11599                 ret = UNKNOWN_TYPE;
11600                 break;
11601         }
11602
11603         return ret;
11604 }
11605
11606 /*
11607  * Check backrefs of a tree block given by @bytenr or @eb.
11608  *
11609  * @root:       the root containing the @bytenr or @eb
11610  * @eb:         tree block extent buffer, can be NULL
11611  * @bytenr:     bytenr of the tree block to search
11612  * @level:      tree level of the tree block
11613  * @owner:      owner of the tree block
11614  *
11615  * Return >0 for any error found and output error message
11616  * Return 0 for no error found
11617  */
11618 static int check_tree_block_ref(struct btrfs_root *root,
11619                                 struct extent_buffer *eb, u64 bytenr,
11620                                 int level, u64 owner, struct node_refs *nrefs)
11621 {
11622         struct btrfs_key key;
11623         struct btrfs_root *extent_root = root->fs_info->extent_root;
11624         struct btrfs_path path;
11625         struct btrfs_extent_item *ei;
11626         struct btrfs_extent_inline_ref *iref;
11627         struct extent_buffer *leaf;
11628         unsigned long end;
11629         unsigned long ptr;
11630         int slot;
11631         int skinny_level;
11632         int root_level = btrfs_header_level(root->node);
11633         int type;
11634         u32 nodesize = root->fs_info->nodesize;
11635         u32 item_size;
11636         u64 offset;
11637         int tree_reloc_root = 0;
11638         int found_ref = 0;
11639         int err = 0;
11640         int ret;
11641         int strict = 1;
11642         int parent = 0;
11643
11644         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
11645             btrfs_header_bytenr(root->node) == bytenr)
11646                 tree_reloc_root = 1;
11647         btrfs_init_path(&path);
11648         key.objectid = bytenr;
11649         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11650                 key.type = BTRFS_METADATA_ITEM_KEY;
11651         else
11652                 key.type = BTRFS_EXTENT_ITEM_KEY;
11653         key.offset = (u64)-1;
11654
11655         /* Search for the backref in extent tree */
11656         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11657         if (ret < 0) {
11658                 err |= BACKREF_MISSING;
11659                 goto out;
11660         }
11661         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11662         if (ret) {
11663                 err |= BACKREF_MISSING;
11664                 goto out;
11665         }
11666
11667         leaf = path.nodes[0];
11668         slot = path.slots[0];
11669         btrfs_item_key_to_cpu(leaf, &key, slot);
11670
11671         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11672
11673         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11674                 skinny_level = (int)key.offset;
11675                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11676         } else {
11677                 struct btrfs_tree_block_info *info;
11678
11679                 info = (struct btrfs_tree_block_info *)(ei + 1);
11680                 skinny_level = btrfs_tree_block_level(leaf, info);
11681                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11682         }
11683
11684
11685         if (eb) {
11686                 u64 header_gen;
11687                 u64 extent_gen;
11688
11689                 /*
11690                  * Due to the feature of shared tree blocks, if the upper node
11691                  * is a fs root or shared node, the extent of checked node may
11692                  * not be updated until the next CoW.
11693                  */
11694                 if (nrefs)
11695                         strict = should_check_extent_strictly(root, nrefs,
11696                                         level);
11697                 if (!(btrfs_extent_flags(leaf, ei) &
11698                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11699                         error(
11700                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11701                                 key.objectid, nodesize,
11702                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11703                         err = BACKREF_MISMATCH;
11704                 }
11705                 header_gen = btrfs_header_generation(eb);
11706                 extent_gen = btrfs_extent_generation(leaf, ei);
11707                 if (header_gen != extent_gen) {
11708                         error(
11709         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11710                                 key.objectid, nodesize, header_gen,
11711                                 extent_gen);
11712                         err = BACKREF_MISMATCH;
11713                 }
11714                 if (level != skinny_level) {
11715                         error(
11716                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11717                                 key.objectid, nodesize, level, skinny_level);
11718                         err = BACKREF_MISMATCH;
11719                 }
11720                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11721                         error(
11722                         "extent[%llu %u] is referred by other roots than %llu",
11723                                 key.objectid, nodesize, root->objectid);
11724                         err = BACKREF_MISMATCH;
11725                 }
11726         }
11727
11728         /*
11729          * Iterate the extent/metadata item to find the exact backref
11730          */
11731         item_size = btrfs_item_size_nr(leaf, slot);
11732         ptr = (unsigned long)iref;
11733         end = (unsigned long)ei + item_size;
11734
11735         while (ptr < end) {
11736                 iref = (struct btrfs_extent_inline_ref *)ptr;
11737                 type = btrfs_extent_inline_ref_type(leaf, iref);
11738                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11739
11740                 ret = check_extent_inline_ref(leaf, &key, iref);
11741                 if (ret) {
11742                         err |= ret;
11743                         break;
11744                 }
11745                 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11746                         if (offset == root->objectid)
11747                                 found_ref = 1;
11748                         if (!strict && owner == offset)
11749                                 found_ref = 1;
11750                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11751                         /*
11752                          * Backref of tree reloc root points to itself, no need
11753                          * to check backref any more.
11754                          */
11755                         if (tree_reloc_root) {
11756                                 found_ref = 1;
11757                         } else {
11758                                 /*
11759                                  * Check if the backref points to valid
11760                                  * referencer
11761                                  */
11762                                 found_ref = !check_tree_block_ref( root, NULL,
11763                                                 offset, level + 1, owner,
11764                                                 NULL);
11765                         }
11766                 }
11767
11768                 if (found_ref)
11769                         break;
11770                 ptr += btrfs_extent_inline_ref_size(type);
11771         }
11772
11773         /*
11774          * Inlined extent item doesn't have what we need, check
11775          * TREE_BLOCK_REF_KEY
11776          */
11777         if (!found_ref) {
11778                 btrfs_release_path(&path);
11779                 key.objectid = bytenr;
11780                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11781                 key.offset = root->objectid;
11782
11783                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11784                 if (!ret)
11785                         found_ref = 1;
11786         }
11787         if (!found_ref)
11788                 err |= BACKREF_MISSING;
11789 out:
11790         btrfs_release_path(&path);
11791         if (nrefs && strict &&
11792             level < root_level && nrefs->full_backref[level + 1])
11793                 parent = nrefs->bytenr[level + 1];
11794         if (eb && (err & BACKREF_MISSING))
11795                 error(
11796         "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11797                       bytenr, nodesize, owner, level,
11798                       parent ? "parent" : "root",
11799                       parent ? parent : root->objectid);
11800         return err;
11801 }
11802
11803 /*
11804  * If @err contains BACKREF_MISSING then add extent of the
11805  * file_extent_data_item.
11806  *
11807  * Returns error bits after reapir.
11808  */
11809 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11810                                    struct btrfs_root *root,
11811                                    struct btrfs_path *pathp,
11812                                    struct node_refs *nrefs,
11813                                    int err)
11814 {
11815         struct btrfs_file_extent_item *fi;
11816         struct btrfs_key fi_key;
11817         struct btrfs_key key;
11818         struct btrfs_extent_item *ei;
11819         struct btrfs_path path;
11820         struct btrfs_root *extent_root = root->fs_info->extent_root;
11821         struct extent_buffer *eb;
11822         u64 size;
11823         u64 disk_bytenr;
11824         u64 num_bytes;
11825         u64 parent;
11826         u64 offset;
11827         u64 extent_offset;
11828         u64 file_offset;
11829         int generation;
11830         int slot;
11831         int ret = 0;
11832
11833         eb = pathp->nodes[0];
11834         slot = pathp->slots[0];
11835         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11836         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11837
11838         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11839             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11840                 return err;
11841
11842         file_offset = fi_key.offset;
11843         generation = btrfs_file_extent_generation(eb, fi);
11844         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11845         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11846         extent_offset = btrfs_file_extent_offset(eb, fi);
11847         offset = file_offset - extent_offset;
11848
11849         /* now repair only adds backref */
11850         if ((err & BACKREF_MISSING) == 0)
11851                 return err;
11852
11853         /* search extent item */
11854         key.objectid = disk_bytenr;
11855         key.type = BTRFS_EXTENT_ITEM_KEY;
11856         key.offset = num_bytes;
11857
11858         btrfs_init_path(&path);
11859         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11860         if (ret < 0) {
11861                 ret = -EIO;
11862                 goto out;
11863         }
11864
11865         /* insert an extent item */
11866         if (ret > 0) {
11867                 key.objectid = disk_bytenr;
11868                 key.type = BTRFS_EXTENT_ITEM_KEY;
11869                 key.offset = num_bytes;
11870                 size = sizeof(*ei);
11871
11872                 btrfs_release_path(&path);
11873                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11874                                               size);
11875                 if (ret)
11876                         goto out;
11877                 eb = path.nodes[0];
11878                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11879
11880                 btrfs_set_extent_refs(eb, ei, 0);
11881                 btrfs_set_extent_generation(eb, ei, generation);
11882                 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11883
11884                 btrfs_mark_buffer_dirty(eb);
11885                 ret = btrfs_update_block_group(trans, extent_root, disk_bytenr,
11886                                                num_bytes, 1, 0);
11887                 btrfs_release_path(&path);
11888         }
11889
11890         if (nrefs->full_backref[0])
11891                 parent = btrfs_header_bytenr(eb);
11892         else
11893                 parent = 0;
11894
11895         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11896                                    root->objectid,
11897                    parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11898                                    offset);
11899         if (ret) {
11900                 error(
11901                 "failed to increase extent data backref[%llu %llu] root %llu",
11902                       disk_bytenr, num_bytes, root->objectid);
11903                 goto out;
11904         } else {
11905                 printf("Add one extent data backref [%llu %llu]\n",
11906                        disk_bytenr, num_bytes);
11907         }
11908
11909         err &= ~BACKREF_MISSING;
11910 out:
11911         if (ret)
11912                 error("can't repair root %llu extent data item[%llu %llu]",
11913                       root->objectid, disk_bytenr, num_bytes);
11914         return err;
11915 }
11916
11917 /*
11918  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11919  *
11920  * Return >0 any error found and output error message
11921  * Return 0 for no error found
11922  */
11923 static int check_extent_data_item(struct btrfs_root *root,
11924                                   struct btrfs_path *pathp,
11925                                   struct node_refs *nrefs,  int account_bytes)
11926 {
11927         struct btrfs_file_extent_item *fi;
11928         struct extent_buffer *eb = pathp->nodes[0];
11929         struct btrfs_path path;
11930         struct btrfs_root *extent_root = root->fs_info->extent_root;
11931         struct btrfs_key fi_key;
11932         struct btrfs_key dbref_key;
11933         struct extent_buffer *leaf;
11934         struct btrfs_extent_item *ei;
11935         struct btrfs_extent_inline_ref *iref;
11936         struct btrfs_extent_data_ref *dref;
11937         u64 owner;
11938         u64 disk_bytenr;
11939         u64 disk_num_bytes;
11940         u64 extent_num_bytes;
11941         u64 extent_flags;
11942         u32 item_size;
11943         unsigned long end;
11944         unsigned long ptr;
11945         int type;
11946         u64 ref_root;
11947         int found_dbackref = 0;
11948         int slot = pathp->slots[0];
11949         int err = 0;
11950         int ret;
11951         int strict;
11952
11953         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11954         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11955
11956         /* Nothing to check for hole and inline data extents */
11957         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11958             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11959                 return 0;
11960
11961         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11962         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11963         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11964
11965         /* Check unaligned disk_num_bytes and num_bytes */
11966         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11967                 error(
11968 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11969                         fi_key.objectid, fi_key.offset, disk_num_bytes,
11970                         root->fs_info->sectorsize);
11971                 err |= BYTES_UNALIGNED;
11972         } else if (account_bytes) {
11973                 data_bytes_allocated += disk_num_bytes;
11974         }
11975         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11976                 error(
11977 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11978                         fi_key.objectid, fi_key.offset, extent_num_bytes,
11979                         root->fs_info->sectorsize);
11980                 err |= BYTES_UNALIGNED;
11981         } else if (account_bytes) {
11982                 data_bytes_referenced += extent_num_bytes;
11983         }
11984         owner = btrfs_header_owner(eb);
11985
11986         /* Check the extent item of the file extent in extent tree */
11987         btrfs_init_path(&path);
11988         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11989         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11990         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11991
11992         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11993         if (ret)
11994                 goto out;
11995
11996         leaf = path.nodes[0];
11997         slot = path.slots[0];
11998         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11999
12000         extent_flags = btrfs_extent_flags(leaf, ei);
12001
12002         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
12003                 error(
12004                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
12005                     disk_bytenr, disk_num_bytes,
12006                     BTRFS_EXTENT_FLAG_DATA);
12007                 err |= BACKREF_MISMATCH;
12008         }
12009
12010         /* Check data backref inside that extent item */
12011         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
12012         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12013         ptr = (unsigned long)iref;
12014         end = (unsigned long)ei + item_size;
12015         strict = should_check_extent_strictly(root, nrefs, -1);
12016
12017         while (ptr < end) {
12018                 iref = (struct btrfs_extent_inline_ref *)ptr;
12019                 type = btrfs_extent_inline_ref_type(leaf, iref);
12020                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12021
12022                 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
12023                 if (ret) {
12024                         err |= ret;
12025                         break;
12026                 }
12027                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
12028                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
12029                         if (ref_root == root->objectid)
12030                                 found_dbackref = 1;
12031                         else if (!strict && owner == ref_root)
12032                                 found_dbackref = 1;
12033                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
12034                         found_dbackref = !check_tree_block_ref(root, NULL,
12035                                 btrfs_extent_inline_ref_offset(leaf, iref),
12036                                 0, owner, NULL);
12037                 }
12038
12039                 if (found_dbackref)
12040                         break;
12041                 ptr += btrfs_extent_inline_ref_size(type);
12042         }
12043
12044         if (!found_dbackref) {
12045                 btrfs_release_path(&path);
12046
12047                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
12048                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
12049                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
12050                 dbref_key.offset = hash_extent_data_ref(root->objectid,
12051                                 fi_key.objectid, fi_key.offset);
12052
12053                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12054                                         &dbref_key, &path, 0, 0);
12055                 if (!ret) {
12056                         found_dbackref = 1;
12057                         goto out;
12058                 }
12059
12060                 btrfs_release_path(&path);
12061
12062                 /*
12063                  * Neither inlined nor EXTENT_DATA_REF found, try
12064                  * SHARED_DATA_REF as last chance.
12065                  */
12066                 dbref_key.objectid = disk_bytenr;
12067                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
12068                 dbref_key.offset = eb->start;
12069
12070                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
12071                                         &dbref_key, &path, 0, 0);
12072                 if (!ret) {
12073                         found_dbackref = 1;
12074                         goto out;
12075                 }
12076         }
12077
12078 out:
12079         if (!found_dbackref)
12080                 err |= BACKREF_MISSING;
12081         btrfs_release_path(&path);
12082         if (err & BACKREF_MISSING) {
12083                 error("data extent[%llu %llu] backref lost",
12084                       disk_bytenr, disk_num_bytes);
12085         }
12086         return err;
12087 }
12088
12089 /*
12090  * Get real tree block level for the case like shared block
12091  * Return >= 0 as tree level
12092  * Return <0 for error
12093  */
12094 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
12095 {
12096         struct extent_buffer *eb;
12097         struct btrfs_path path;
12098         struct btrfs_key key;
12099         struct btrfs_extent_item *ei;
12100         u64 flags;
12101         u64 transid;
12102         u8 backref_level;
12103         u8 header_level;
12104         int ret;
12105
12106         /* Search extent tree for extent generation and level */
12107         key.objectid = bytenr;
12108         key.type = BTRFS_METADATA_ITEM_KEY;
12109         key.offset = (u64)-1;
12110
12111         btrfs_init_path(&path);
12112         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
12113         if (ret < 0)
12114                 goto release_out;
12115         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
12116         if (ret < 0)
12117                 goto release_out;
12118         if (ret > 0) {
12119                 ret = -ENOENT;
12120                 goto release_out;
12121         }
12122
12123         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12124         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
12125                             struct btrfs_extent_item);
12126         flags = btrfs_extent_flags(path.nodes[0], ei);
12127         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
12128                 ret = -ENOENT;
12129                 goto release_out;
12130         }
12131
12132         /* Get transid for later read_tree_block() check */
12133         transid = btrfs_extent_generation(path.nodes[0], ei);
12134
12135         /* Get backref level as one source */
12136         if (key.type == BTRFS_METADATA_ITEM_KEY) {
12137                 backref_level = key.offset;
12138         } else {
12139                 struct btrfs_tree_block_info *info;
12140
12141                 info = (struct btrfs_tree_block_info *)(ei + 1);
12142                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
12143         }
12144         btrfs_release_path(&path);
12145
12146         /* Get level from tree block as an alternative source */
12147         eb = read_tree_block(fs_info, bytenr, transid);
12148         if (!extent_buffer_uptodate(eb)) {
12149                 free_extent_buffer(eb);
12150                 return -EIO;
12151         }
12152         header_level = btrfs_header_level(eb);
12153         free_extent_buffer(eb);
12154
12155         if (header_level != backref_level)
12156                 return -EIO;
12157         return header_level;
12158
12159 release_out:
12160         btrfs_release_path(&path);
12161         return ret;
12162 }
12163
12164 /*
12165  * Check if a tree block backref is valid (points to a valid tree block)
12166  * if level == -1, level will be resolved
12167  * Return >0 for any error found and print error message
12168  */
12169 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
12170                                     u64 bytenr, int level)
12171 {
12172         struct btrfs_root *root;
12173         struct btrfs_key key;
12174         struct btrfs_path path;
12175         struct extent_buffer *eb;
12176         struct extent_buffer *node;
12177         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12178         int err = 0;
12179         int ret;
12180
12181         /* Query level for level == -1 special case */
12182         if (level == -1)
12183                 level = query_tree_block_level(fs_info, bytenr);
12184         if (level < 0) {
12185                 err |= REFERENCER_MISSING;
12186                 goto out;
12187         }
12188
12189         key.objectid = root_id;
12190         key.type = BTRFS_ROOT_ITEM_KEY;
12191         key.offset = (u64)-1;
12192
12193         root = btrfs_read_fs_root(fs_info, &key);
12194         if (IS_ERR(root)) {
12195                 err |= REFERENCER_MISSING;
12196                 goto out;
12197         }
12198
12199         /* Read out the tree block to get item/node key */
12200         eb = read_tree_block(fs_info, bytenr, 0);
12201         if (!extent_buffer_uptodate(eb)) {
12202                 err |= REFERENCER_MISSING;
12203                 free_extent_buffer(eb);
12204                 goto out;
12205         }
12206
12207         /* Empty tree, no need to check key */
12208         if (!btrfs_header_nritems(eb) && !level) {
12209                 free_extent_buffer(eb);
12210                 goto out;
12211         }
12212
12213         if (level)
12214                 btrfs_node_key_to_cpu(eb, &key, 0);
12215         else
12216                 btrfs_item_key_to_cpu(eb, &key, 0);
12217
12218         free_extent_buffer(eb);
12219
12220         btrfs_init_path(&path);
12221         path.lowest_level = level;
12222         /* Search with the first key, to ensure we can reach it */
12223         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12224         if (ret < 0) {
12225                 err |= REFERENCER_MISSING;
12226                 goto release_out;
12227         }
12228
12229         node = path.nodes[level];
12230         if (btrfs_header_bytenr(node) != bytenr) {
12231                 error(
12232         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
12233                         bytenr, nodesize, bytenr,
12234                         btrfs_header_bytenr(node));
12235                 err |= REFERENCER_MISMATCH;
12236         }
12237         if (btrfs_header_level(node) != level) {
12238                 error(
12239         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
12240                         bytenr, nodesize, level,
12241                         btrfs_header_level(node));
12242                 err |= REFERENCER_MISMATCH;
12243         }
12244
12245 release_out:
12246         btrfs_release_path(&path);
12247 out:
12248         if (err & REFERENCER_MISSING) {
12249                 if (level < 0)
12250                         error("extent [%llu %d] lost referencer (owner: %llu)",
12251                                 bytenr, nodesize, root_id);
12252                 else
12253                         error(
12254                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
12255                                 bytenr, nodesize, root_id, level);
12256         }
12257
12258         return err;
12259 }
12260
12261 /*
12262  * Check if tree block @eb is tree reloc root.
12263  * Return 0 if it's not or any problem happens
12264  * Return 1 if it's a tree reloc root
12265  */
12266 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
12267                                  struct extent_buffer *eb)
12268 {
12269         struct btrfs_root *tree_reloc_root;
12270         struct btrfs_key key;
12271         u64 bytenr = btrfs_header_bytenr(eb);
12272         u64 owner = btrfs_header_owner(eb);
12273         int ret = 0;
12274
12275         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12276         key.offset = owner;
12277         key.type = BTRFS_ROOT_ITEM_KEY;
12278
12279         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
12280         if (IS_ERR(tree_reloc_root))
12281                 return 0;
12282
12283         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
12284                 ret = 1;
12285         btrfs_free_fs_root(tree_reloc_root);
12286         return ret;
12287 }
12288
12289 /*
12290  * Check referencer for shared block backref
12291  * If level == -1, this function will resolve the level.
12292  */
12293 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
12294                                      u64 parent, u64 bytenr, int level)
12295 {
12296         struct extent_buffer *eb;
12297         u32 nr;
12298         int found_parent = 0;
12299         int i;
12300
12301         eb = read_tree_block(fs_info, parent, 0);
12302         if (!extent_buffer_uptodate(eb))
12303                 goto out;
12304
12305         if (level == -1)
12306                 level = query_tree_block_level(fs_info, bytenr);
12307         if (level < 0)
12308                 goto out;
12309
12310         /* It's possible it's a tree reloc root */
12311         if (parent == bytenr) {
12312                 if (is_tree_reloc_root(fs_info, eb))
12313                         found_parent = 1;
12314                 goto out;
12315         }
12316
12317         if (level + 1 != btrfs_header_level(eb))
12318                 goto out;
12319
12320         nr = btrfs_header_nritems(eb);
12321         for (i = 0; i < nr; i++) {
12322                 if (bytenr == btrfs_node_blockptr(eb, i)) {
12323                         found_parent = 1;
12324                         break;
12325                 }
12326         }
12327 out:
12328         free_extent_buffer(eb);
12329         if (!found_parent) {
12330                 error(
12331         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12332                         bytenr, fs_info->nodesize, parent, level);
12333                 return REFERENCER_MISSING;
12334         }
12335         return 0;
12336 }
12337
12338 /*
12339  * Check referencer for normal (inlined) data ref
12340  * If len == 0, it will be resolved by searching in extent tree
12341  */
12342 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12343                                      u64 root_id, u64 objectid, u64 offset,
12344                                      u64 bytenr, u64 len, u32 count)
12345 {
12346         struct btrfs_root *root;
12347         struct btrfs_root *extent_root = fs_info->extent_root;
12348         struct btrfs_key key;
12349         struct btrfs_path path;
12350         struct extent_buffer *leaf;
12351         struct btrfs_file_extent_item *fi;
12352         u32 found_count = 0;
12353         int slot;
12354         int ret = 0;
12355
12356         if (!len) {
12357                 key.objectid = bytenr;
12358                 key.type = BTRFS_EXTENT_ITEM_KEY;
12359                 key.offset = (u64)-1;
12360
12361                 btrfs_init_path(&path);
12362                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12363                 if (ret < 0)
12364                         goto out;
12365                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12366                 if (ret)
12367                         goto out;
12368                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12369                 if (key.objectid != bytenr ||
12370                     key.type != BTRFS_EXTENT_ITEM_KEY)
12371                         goto out;
12372                 len = key.offset;
12373                 btrfs_release_path(&path);
12374         }
12375         key.objectid = root_id;
12376         key.type = BTRFS_ROOT_ITEM_KEY;
12377         key.offset = (u64)-1;
12378         btrfs_init_path(&path);
12379
12380         root = btrfs_read_fs_root(fs_info, &key);
12381         if (IS_ERR(root))
12382                 goto out;
12383
12384         key.objectid = objectid;
12385         key.type = BTRFS_EXTENT_DATA_KEY;
12386         /*
12387          * It can be nasty as data backref offset is
12388          * file offset - file extent offset, which is smaller or
12389          * equal to original backref offset.  The only special case is
12390          * overflow.  So we need to special check and do further search.
12391          */
12392         key.offset = offset & (1ULL << 63) ? 0 : offset;
12393
12394         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12395         if (ret < 0)
12396                 goto out;
12397
12398         /*
12399          * Search afterwards to get correct one
12400          * NOTE: As we must do a comprehensive check on the data backref to
12401          * make sure the dref count also matches, we must iterate all file
12402          * extents for that inode.
12403          */
12404         while (1) {
12405                 leaf = path.nodes[0];
12406                 slot = path.slots[0];
12407
12408                 if (slot >= btrfs_header_nritems(leaf))
12409                         goto next;
12410                 btrfs_item_key_to_cpu(leaf, &key, slot);
12411                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12412                         break;
12413                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12414                 /*
12415                  * Except normal disk bytenr and disk num bytes, we still
12416                  * need to do extra check on dbackref offset as
12417                  * dbackref offset = file_offset - file_extent_offset
12418                  */
12419                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12420                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12421                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12422                     offset)
12423                         found_count++;
12424
12425 next:
12426                 ret = btrfs_next_item(root, &path);
12427                 if (ret)
12428                         break;
12429         }
12430 out:
12431         btrfs_release_path(&path);
12432         if (found_count != count) {
12433                 error(
12434 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12435                         bytenr, len, root_id, objectid, offset, count, found_count);
12436                 return REFERENCER_MISSING;
12437         }
12438         return 0;
12439 }
12440
12441 /*
12442  * Check if the referencer of a shared data backref exists
12443  */
12444 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12445                                      u64 parent, u64 bytenr)
12446 {
12447         struct extent_buffer *eb;
12448         struct btrfs_key key;
12449         struct btrfs_file_extent_item *fi;
12450         u32 nr;
12451         int found_parent = 0;
12452         int i;
12453
12454         eb = read_tree_block(fs_info, parent, 0);
12455         if (!extent_buffer_uptodate(eb))
12456                 goto out;
12457
12458         nr = btrfs_header_nritems(eb);
12459         for (i = 0; i < nr; i++) {
12460                 btrfs_item_key_to_cpu(eb, &key, i);
12461                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12462                         continue;
12463
12464                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12465                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12466                         continue;
12467
12468                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12469                         found_parent = 1;
12470                         break;
12471                 }
12472         }
12473
12474 out:
12475         free_extent_buffer(eb);
12476         if (!found_parent) {
12477                 error("shared extent %llu referencer lost (parent: %llu)",
12478                         bytenr, parent);
12479                 return REFERENCER_MISSING;
12480         }
12481         return 0;
12482 }
12483
12484 /*
12485  * Only delete backref if REFERENCER_MISSING now
12486  *
12487  * Returns <0   the extent was deleted
12488  * Returns >0   the backref was deleted but extent still exists, returned value
12489  *               means error after repair
12490  * Returns  0   nothing happened
12491  */
12492 static int repair_extent_item(struct btrfs_trans_handle *trans,
12493                       struct btrfs_root *root, struct btrfs_path *path,
12494                       u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12495                       u64 owner, u64 offset, int err)
12496 {
12497         struct btrfs_key old_key;
12498         int freed = 0;
12499         int ret;
12500
12501         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12502
12503         if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12504                 /* delete the backref */
12505                 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12506                           num_bytes, parent, root_objectid, owner, offset);
12507                 if (!ret) {
12508                         freed = 1;
12509                         err &= ~REFERENCER_MISSING;
12510                         printf("Delete backref in extent [%llu %llu]\n",
12511                                bytenr, num_bytes);
12512                 } else {
12513                         error("fail to delete backref in extent [%llu %llu]",
12514                                bytenr, num_bytes);
12515                 }
12516         }
12517
12518         /* btrfs_free_extent may delete the extent */
12519         btrfs_release_path(path);
12520         ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12521
12522         if (ret)
12523                 ret = -ENOENT;
12524         else if (freed)
12525                 ret = err;
12526         return ret;
12527 }
12528
12529 /*
12530  * This function will check a given extent item, including its backref and
12531  * itself (like crossing stripe boundary and type)
12532  *
12533  * Since we don't use extent_record anymore, introduce new error bit
12534  */
12535 static int check_extent_item(struct btrfs_trans_handle *trans,
12536                              struct btrfs_fs_info *fs_info,
12537                              struct btrfs_path *path)
12538 {
12539         struct btrfs_extent_item *ei;
12540         struct btrfs_extent_inline_ref *iref;
12541         struct btrfs_extent_data_ref *dref;
12542         struct extent_buffer *eb = path->nodes[0];
12543         unsigned long end;
12544         unsigned long ptr;
12545         int slot = path->slots[0];
12546         int type;
12547         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12548         u32 item_size = btrfs_item_size_nr(eb, slot);
12549         u64 flags;
12550         u64 offset;
12551         u64 parent;
12552         u64 num_bytes;
12553         u64 root_objectid;
12554         u64 owner;
12555         u64 owner_offset;
12556         int metadata = 0;
12557         int level;
12558         struct btrfs_key key;
12559         int ret;
12560         int err = 0;
12561
12562         btrfs_item_key_to_cpu(eb, &key, slot);
12563         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12564                 bytes_used += key.offset;
12565                 num_bytes = key.offset;
12566         } else {
12567                 bytes_used += nodesize;
12568                 num_bytes = nodesize;
12569         }
12570
12571         if (item_size < sizeof(*ei)) {
12572                 /*
12573                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12574                  * old thing when on disk format is still un-determined.
12575                  * No need to care about it anymore
12576                  */
12577                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12578                 return -ENOTTY;
12579         }
12580
12581         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12582         flags = btrfs_extent_flags(eb, ei);
12583
12584         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12585                 metadata = 1;
12586         if (metadata && check_crossing_stripes(global_info, key.objectid,
12587                                                eb->len)) {
12588                 error("bad metadata [%llu, %llu) crossing stripe boundary",
12589                       key.objectid, key.objectid + nodesize);
12590                 err |= CROSSING_STRIPE_BOUNDARY;
12591         }
12592
12593         ptr = (unsigned long)(ei + 1);
12594
12595         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12596                 /* Old EXTENT_ITEM metadata */
12597                 struct btrfs_tree_block_info *info;
12598
12599                 info = (struct btrfs_tree_block_info *)ptr;
12600                 level = btrfs_tree_block_level(eb, info);
12601                 ptr += sizeof(struct btrfs_tree_block_info);
12602         } else {
12603                 /* New METADATA_ITEM */
12604                 level = key.offset;
12605         }
12606         end = (unsigned long)ei + item_size;
12607
12608 next:
12609         /* Reached extent item end normally */
12610         if (ptr == end)
12611                 goto out;
12612
12613         /* Beyond extent item end, wrong item size */
12614         if (ptr > end) {
12615                 err |= ITEM_SIZE_MISMATCH;
12616                 error("extent item at bytenr %llu slot %d has wrong size",
12617                         eb->start, slot);
12618                 goto out;
12619         }
12620
12621         parent = 0;
12622         root_objectid = 0;
12623         owner = 0;
12624         owner_offset = 0;
12625         /* Now check every backref in this extent item */
12626         iref = (struct btrfs_extent_inline_ref *)ptr;
12627         type = btrfs_extent_inline_ref_type(eb, iref);
12628         offset = btrfs_extent_inline_ref_offset(eb, iref);
12629         switch (type) {
12630         case BTRFS_TREE_BLOCK_REF_KEY:
12631                 root_objectid = offset;
12632                 owner = level;
12633                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12634                                                level);
12635                 err |= ret;
12636                 break;
12637         case BTRFS_SHARED_BLOCK_REF_KEY:
12638                 parent = offset;
12639                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12640                                                  level);
12641                 err |= ret;
12642                 break;
12643         case BTRFS_EXTENT_DATA_REF_KEY:
12644                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12645                 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12646                 owner = btrfs_extent_data_ref_objectid(eb, dref);
12647                 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12648                 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12649                                         owner_offset, key.objectid, key.offset,
12650                                         btrfs_extent_data_ref_count(eb, dref));
12651                 err |= ret;
12652                 break;
12653         case BTRFS_SHARED_DATA_REF_KEY:
12654                 parent = offset;
12655                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12656                 err |= ret;
12657                 break;
12658         default:
12659                 error("extent[%llu %d %llu] has unknown ref type: %d",
12660                         key.objectid, key.type, key.offset, type);
12661                 ret = UNKNOWN_TYPE;
12662                 err |= ret;
12663                 goto out;
12664         }
12665
12666         if (err && repair) {
12667                 ret = repair_extent_item(trans, fs_info->extent_root, path,
12668                          key.objectid, num_bytes, parent, root_objectid,
12669                          owner, owner_offset, ret);
12670                 if (ret < 0)
12671                         goto out;
12672                 if (ret) {
12673                         goto next;
12674                         err = ret;
12675                 }
12676         }
12677
12678         ptr += btrfs_extent_inline_ref_size(type);
12679         goto next;
12680
12681 out:
12682         return err;
12683 }
12684
12685 /*
12686  * Check if a dev extent item is referred correctly by its chunk
12687  */
12688 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12689                                  struct extent_buffer *eb, int slot)
12690 {
12691         struct btrfs_root *chunk_root = fs_info->chunk_root;
12692         struct btrfs_dev_extent *ptr;
12693         struct btrfs_path path;
12694         struct btrfs_key chunk_key;
12695         struct btrfs_key devext_key;
12696         struct btrfs_chunk *chunk;
12697         struct extent_buffer *l;
12698         int num_stripes;
12699         u64 length;
12700         int i;
12701         int found_chunk = 0;
12702         int ret;
12703
12704         btrfs_item_key_to_cpu(eb, &devext_key, slot);
12705         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12706         length = btrfs_dev_extent_length(eb, ptr);
12707
12708         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12709         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12710         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12711
12712         btrfs_init_path(&path);
12713         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12714         if (ret)
12715                 goto out;
12716
12717         l = path.nodes[0];
12718         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12719         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12720                                       chunk_key.offset);
12721         if (ret < 0)
12722                 goto out;
12723
12724         if (btrfs_stripe_length(fs_info, l, chunk) != length)
12725                 goto out;
12726
12727         num_stripes = btrfs_chunk_num_stripes(l, chunk);
12728         for (i = 0; i < num_stripes; i++) {
12729                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12730                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12731
12732                 if (devid == devext_key.objectid &&
12733                     offset == devext_key.offset) {
12734                         found_chunk = 1;
12735                         break;
12736                 }
12737         }
12738 out:
12739         btrfs_release_path(&path);
12740         if (!found_chunk) {
12741                 error(
12742                 "device extent[%llu, %llu, %llu] did not find the related chunk",
12743                         devext_key.objectid, devext_key.offset, length);
12744                 return REFERENCER_MISSING;
12745         }
12746         return 0;
12747 }
12748
12749 /*
12750  * Check if the used space is correct with the dev item
12751  */
12752 static int check_dev_item(struct btrfs_fs_info *fs_info,
12753                           struct extent_buffer *eb, int slot)
12754 {
12755         struct btrfs_root *dev_root = fs_info->dev_root;
12756         struct btrfs_dev_item *dev_item;
12757         struct btrfs_path path;
12758         struct btrfs_key key;
12759         struct btrfs_dev_extent *ptr;
12760         u64 dev_id;
12761         u64 used;
12762         u64 total = 0;
12763         int ret;
12764
12765         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12766         dev_id = btrfs_device_id(eb, dev_item);
12767         used = btrfs_device_bytes_used(eb, dev_item);
12768
12769         key.objectid = dev_id;
12770         key.type = BTRFS_DEV_EXTENT_KEY;
12771         key.offset = 0;
12772
12773         btrfs_init_path(&path);
12774         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12775         if (ret < 0) {
12776                 btrfs_item_key_to_cpu(eb, &key, slot);
12777                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12778                         key.objectid, key.type, key.offset);
12779                 btrfs_release_path(&path);
12780                 return REFERENCER_MISSING;
12781         }
12782
12783         /* Iterate dev_extents to calculate the used space of a device */
12784         while (1) {
12785                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12786                         goto next;
12787
12788                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12789                 if (key.objectid > dev_id)
12790                         break;
12791                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12792                         goto next;
12793
12794                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12795                                      struct btrfs_dev_extent);
12796                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12797 next:
12798                 ret = btrfs_next_item(dev_root, &path);
12799                 if (ret)
12800                         break;
12801         }
12802         btrfs_release_path(&path);
12803
12804         if (used != total) {
12805                 btrfs_item_key_to_cpu(eb, &key, slot);
12806                 error(
12807 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12808                         total, used, BTRFS_ROOT_TREE_OBJECTID,
12809                         BTRFS_DEV_EXTENT_KEY, dev_id);
12810                 return ACCOUNTING_MISMATCH;
12811         }
12812         return 0;
12813 }
12814
12815 /*
12816  * Check a block group item with its referener (chunk) and its used space
12817  * with extent/metadata item
12818  */
12819 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12820                                   struct extent_buffer *eb, int slot)
12821 {
12822         struct btrfs_root *extent_root = fs_info->extent_root;
12823         struct btrfs_root *chunk_root = fs_info->chunk_root;
12824         struct btrfs_block_group_item *bi;
12825         struct btrfs_block_group_item bg_item;
12826         struct btrfs_path path;
12827         struct btrfs_key bg_key;
12828         struct btrfs_key chunk_key;
12829         struct btrfs_key extent_key;
12830         struct btrfs_chunk *chunk;
12831         struct extent_buffer *leaf;
12832         struct btrfs_extent_item *ei;
12833         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12834         u64 flags;
12835         u64 bg_flags;
12836         u64 used;
12837         u64 total = 0;
12838         int ret;
12839         int err = 0;
12840
12841         btrfs_item_key_to_cpu(eb, &bg_key, slot);
12842         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12843         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12844         used = btrfs_block_group_used(&bg_item);
12845         bg_flags = btrfs_block_group_flags(&bg_item);
12846
12847         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12848         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12849         chunk_key.offset = bg_key.objectid;
12850
12851         btrfs_init_path(&path);
12852         /* Search for the referencer chunk */
12853         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12854         if (ret) {
12855                 error(
12856                 "block group[%llu %llu] did not find the related chunk item",
12857                         bg_key.objectid, bg_key.offset);
12858                 err |= REFERENCER_MISSING;
12859         } else {
12860                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12861                                         struct btrfs_chunk);
12862                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12863                                                 bg_key.offset) {
12864                         error(
12865         "block group[%llu %llu] related chunk item length does not match",
12866                                 bg_key.objectid, bg_key.offset);
12867                         err |= REFERENCER_MISMATCH;
12868                 }
12869         }
12870         btrfs_release_path(&path);
12871
12872         /* Search from the block group bytenr */
12873         extent_key.objectid = bg_key.objectid;
12874         extent_key.type = 0;
12875         extent_key.offset = 0;
12876
12877         btrfs_init_path(&path);
12878         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12879         if (ret < 0)
12880                 goto out;
12881
12882         /* Iterate extent tree to account used space */
12883         while (1) {
12884                 leaf = path.nodes[0];
12885
12886                 /* Search slot can point to the last item beyond leaf nritems */
12887                 if (path.slots[0] >= btrfs_header_nritems(leaf))
12888                         goto next;
12889
12890                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12891                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12892                         break;
12893
12894                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12895                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12896                         goto next;
12897                 if (extent_key.objectid < bg_key.objectid)
12898                         goto next;
12899
12900                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12901                         total += nodesize;
12902                 else
12903                         total += extent_key.offset;
12904
12905                 ei = btrfs_item_ptr(leaf, path.slots[0],
12906                                     struct btrfs_extent_item);
12907                 flags = btrfs_extent_flags(leaf, ei);
12908                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12909                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12910                                 error(
12911                         "bad extent[%llu, %llu) type mismatch with chunk",
12912                                         extent_key.objectid,
12913                                         extent_key.objectid + extent_key.offset);
12914                                 err |= CHUNK_TYPE_MISMATCH;
12915                         }
12916                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12917                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12918                                     BTRFS_BLOCK_GROUP_METADATA))) {
12919                                 error(
12920                         "bad extent[%llu, %llu) type mismatch with chunk",
12921                                         extent_key.objectid,
12922                                         extent_key.objectid + nodesize);
12923                                 err |= CHUNK_TYPE_MISMATCH;
12924                         }
12925                 }
12926 next:
12927                 ret = btrfs_next_item(extent_root, &path);
12928                 if (ret)
12929                         break;
12930         }
12931
12932 out:
12933         btrfs_release_path(&path);
12934
12935         if (total != used) {
12936                 error(
12937                 "block group[%llu %llu] used %llu but extent items used %llu",
12938                         bg_key.objectid, bg_key.offset, used, total);
12939                 err |= BG_ACCOUNTING_ERROR;
12940         }
12941         return err;
12942 }
12943
12944 /*
12945  * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12946  * FIXME: We still need to repair error of dev_item.
12947  *
12948  * Returns error after repair.
12949  */
12950 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12951                              struct btrfs_root *chunk_root,
12952                              struct btrfs_path *path, int err)
12953 {
12954         struct btrfs_chunk *chunk;
12955         struct btrfs_key chunk_key;
12956         struct extent_buffer *eb = path->nodes[0];
12957         u64 length;
12958         int slot = path->slots[0];
12959         u64 type;
12960         int ret = 0;
12961
12962         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12963         if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12964                 return err;
12965         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12966         type = btrfs_chunk_type(path->nodes[0], chunk);
12967         length = btrfs_chunk_length(eb, chunk);
12968
12969         if (err & REFERENCER_MISSING) {
12970                 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12971                      type, chunk_key.objectid, chunk_key.offset, length);
12972                 if (ret) {
12973                         error("fail to add block group item[%llu %llu]",
12974                               chunk_key.offset, length);
12975                         goto out;
12976                 } else {
12977                         err &= ~REFERENCER_MISSING;
12978                         printf("Added block group item[%llu %llu]\n",
12979                                chunk_key.offset, length);
12980                 }
12981         }
12982
12983 out:
12984         return err;
12985 }
12986
12987 /*
12988  * Check a chunk item.
12989  * Including checking all referred dev_extents and block group
12990  */
12991 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12992                             struct extent_buffer *eb, int slot)
12993 {
12994         struct btrfs_root *extent_root = fs_info->extent_root;
12995         struct btrfs_root *dev_root = fs_info->dev_root;
12996         struct btrfs_path path;
12997         struct btrfs_key chunk_key;
12998         struct btrfs_key bg_key;
12999         struct btrfs_key devext_key;
13000         struct btrfs_chunk *chunk;
13001         struct extent_buffer *leaf;
13002         struct btrfs_block_group_item *bi;
13003         struct btrfs_block_group_item bg_item;
13004         struct btrfs_dev_extent *ptr;
13005         u64 length;
13006         u64 chunk_end;
13007         u64 stripe_len;
13008         u64 type;
13009         int num_stripes;
13010         u64 offset;
13011         u64 objectid;
13012         int i;
13013         int ret;
13014         int err = 0;
13015
13016         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
13017         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
13018         length = btrfs_chunk_length(eb, chunk);
13019         chunk_end = chunk_key.offset + length;
13020         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
13021                                       chunk_key.offset);
13022         if (ret < 0) {
13023                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
13024                         chunk_end);
13025                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
13026                 goto out;
13027         }
13028         type = btrfs_chunk_type(eb, chunk);
13029
13030         bg_key.objectid = chunk_key.offset;
13031         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
13032         bg_key.offset = length;
13033
13034         btrfs_init_path(&path);
13035         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
13036         if (ret) {
13037                 error(
13038                 "chunk[%llu %llu) did not find the related block group item",
13039                         chunk_key.offset, chunk_end);
13040                 err |= REFERENCER_MISSING;
13041         } else{
13042                 leaf = path.nodes[0];
13043                 bi = btrfs_item_ptr(leaf, path.slots[0],
13044                                     struct btrfs_block_group_item);
13045                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
13046                                    sizeof(bg_item));
13047                 if (btrfs_block_group_flags(&bg_item) != type) {
13048                         error(
13049 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
13050                                 chunk_key.offset, chunk_end, type,
13051                                 btrfs_block_group_flags(&bg_item));
13052                         err |= REFERENCER_MISSING;
13053                 }
13054         }
13055
13056         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
13057         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
13058         for (i = 0; i < num_stripes; i++) {
13059                 btrfs_release_path(&path);
13060                 btrfs_init_path(&path);
13061                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
13062                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
13063                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
13064
13065                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
13066                                         0, 0);
13067                 if (ret)
13068                         goto not_match_dev;
13069
13070                 leaf = path.nodes[0];
13071                 ptr = btrfs_item_ptr(leaf, path.slots[0],
13072                                      struct btrfs_dev_extent);
13073                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
13074                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
13075                 if (objectid != chunk_key.objectid ||
13076                     offset != chunk_key.offset ||
13077                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
13078                         goto not_match_dev;
13079                 continue;
13080 not_match_dev:
13081                 err |= BACKREF_MISSING;
13082                 error(
13083                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
13084                         chunk_key.objectid, chunk_end, i);
13085                 continue;
13086         }
13087         btrfs_release_path(&path);
13088 out:
13089         return err;
13090 }
13091
13092 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
13093                                    struct btrfs_root *root,
13094                                    struct btrfs_path *path)
13095 {
13096         struct btrfs_key key;
13097         int ret = 0;
13098
13099         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
13100         btrfs_release_path(path);
13101         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
13102         if (ret) {
13103                 ret = -ENOENT;
13104                 goto out;
13105         }
13106
13107         ret = btrfs_del_item(trans, root, path);
13108         if (ret)
13109                 goto out;
13110
13111         if (path->slots[0] == 0)
13112                 btrfs_prev_leaf(root, path);
13113         else
13114                 path->slots[0]--;
13115 out:
13116         if (ret)
13117                 error("failed to delete root %llu item[%llu, %u, %llu]",
13118                       root->objectid, key.objectid, key.type, key.offset);
13119         else
13120                 printf("Deleted root %llu item[%llu, %u, %llu]\n",
13121                        root->objectid, key.objectid, key.type, key.offset);
13122         return ret;
13123 }
13124
13125 /*
13126  * Main entry function to check known items and update related accounting info
13127  */
13128 static int check_leaf_items(struct btrfs_trans_handle *trans,
13129                             struct btrfs_root *root, struct btrfs_path *path,
13130                             struct node_refs *nrefs, int account_bytes)
13131 {
13132         struct btrfs_fs_info *fs_info = root->fs_info;
13133         struct btrfs_key key;
13134         struct extent_buffer *eb;
13135         int slot;
13136         int type;
13137         struct btrfs_extent_data_ref *dref;
13138         int ret = 0;
13139         int err = 0;
13140
13141 again:
13142         eb = path->nodes[0];
13143         slot = path->slots[0];
13144         if (slot >= btrfs_header_nritems(eb)) {
13145                 if (slot == 0) {
13146                         error("empty leaf [%llu %u] root %llu", eb->start,
13147                                 root->fs_info->nodesize, root->objectid);
13148                         err |= EIO;
13149                 }
13150                 goto out;
13151         }
13152
13153         btrfs_item_key_to_cpu(eb, &key, slot);
13154         type = key.type;
13155
13156         switch (type) {
13157         case BTRFS_EXTENT_DATA_KEY:
13158                 ret = check_extent_data_item(root, path, nrefs, account_bytes);
13159                 if (repair && ret)
13160                         ret = repair_extent_data_item(trans, root, path, nrefs,
13161                                                       ret);
13162                 err |= ret;
13163                 break;
13164         case BTRFS_BLOCK_GROUP_ITEM_KEY:
13165                 ret = check_block_group_item(fs_info, eb, slot);
13166                 if (repair &&
13167                     ret & REFERENCER_MISSING)
13168                         ret = delete_extent_tree_item(trans, root, path);
13169                 err |= ret;
13170                 break;
13171         case BTRFS_DEV_ITEM_KEY:
13172                 ret = check_dev_item(fs_info, eb, slot);
13173                 err |= ret;
13174                 break;
13175         case BTRFS_CHUNK_ITEM_KEY:
13176                 ret = check_chunk_item(fs_info, eb, slot);
13177                 if (repair && ret)
13178                         ret = repair_chunk_item(trans, root, path, ret);
13179                 err |= ret;
13180                 break;
13181         case BTRFS_DEV_EXTENT_KEY:
13182                 ret = check_dev_extent_item(fs_info, eb, slot);
13183                 err |= ret;
13184                 break;
13185         case BTRFS_EXTENT_ITEM_KEY:
13186         case BTRFS_METADATA_ITEM_KEY:
13187                 ret = check_extent_item(trans, fs_info, path);
13188                 err |= ret;
13189                 break;
13190         case BTRFS_EXTENT_CSUM_KEY:
13191                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
13192                 err |= ret;
13193                 break;
13194         case BTRFS_TREE_BLOCK_REF_KEY:
13195                 ret = check_tree_block_backref(fs_info, key.offset,
13196                                                key.objectid, -1);
13197                 if (repair &&
13198                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13199                         ret = delete_extent_tree_item(trans, root, path);
13200                 err |= ret;
13201                 break;
13202         case BTRFS_EXTENT_DATA_REF_KEY:
13203                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
13204                 ret = check_extent_data_backref(fs_info,
13205                                 btrfs_extent_data_ref_root(eb, dref),
13206                                 btrfs_extent_data_ref_objectid(eb, dref),
13207                                 btrfs_extent_data_ref_offset(eb, dref),
13208                                 key.objectid, 0,
13209                                 btrfs_extent_data_ref_count(eb, dref));
13210                 if (repair &&
13211                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13212                         ret = delete_extent_tree_item(trans, root, path);
13213                 err |= ret;
13214                 break;
13215         case BTRFS_SHARED_BLOCK_REF_KEY:
13216                 ret = check_shared_block_backref(fs_info, key.offset,
13217                                                  key.objectid, -1);
13218                 if (repair &&
13219                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13220                         ret = delete_extent_tree_item(trans, root, path);
13221                 err |= ret;
13222                 break;
13223         case BTRFS_SHARED_DATA_REF_KEY:
13224                 ret = check_shared_data_backref(fs_info, key.offset,
13225                                                 key.objectid);
13226                 if (repair &&
13227                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13228                         ret = delete_extent_tree_item(trans, root, path);
13229                 err |= ret;
13230                 break;
13231         default:
13232                 break;
13233         }
13234
13235         ++path->slots[0];
13236         goto again;
13237 out:
13238         return err;
13239 }
13240
13241 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info);
13242
13243 /*
13244  * Low memory usage version check_chunks_and_extents.
13245  */
13246 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
13247 {
13248         struct btrfs_trans_handle *trans = NULL;
13249         struct btrfs_path path;
13250         struct btrfs_key old_key;
13251         struct btrfs_key key;
13252         struct btrfs_root *root1;
13253         struct btrfs_root *root;
13254         struct btrfs_root *cur_root;
13255         int err = 0;
13256         int ret;
13257
13258         root = fs_info->fs_root;
13259
13260         if (repair) {
13261                 /* pin every tree block to avoid extent overwrite */
13262                 ret = pin_metadata_blocks(fs_info);
13263                 if (ret) {
13264                         error("failed to pin metadata blocks");
13265                         return ret;
13266                 }
13267                 trans = btrfs_start_transaction(fs_info->extent_root, 1);
13268                 if (IS_ERR(trans)) {
13269                         error("failed to start transaction before check");
13270                         return PTR_ERR(trans);
13271                 }
13272         }
13273
13274         root1 = root->fs_info->chunk_root;
13275         ret = check_btrfs_root(trans, root1, 0, 1);
13276         err |= ret;
13277
13278         root1 = root->fs_info->tree_root;
13279         ret = check_btrfs_root(trans, root1, 0, 1);
13280         err |= ret;
13281
13282         btrfs_init_path(&path);
13283         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
13284         key.offset = 0;
13285         key.type = BTRFS_ROOT_ITEM_KEY;
13286
13287         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
13288         if (ret) {
13289                 error("cannot find extent tree in tree_root");
13290                 goto out;
13291         }
13292
13293         while (1) {
13294                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13295                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13296                         goto next;
13297                 old_key = key;
13298                 key.offset = (u64)-1;
13299
13300                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13301                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13302                                         &key);
13303                 else
13304                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
13305                 if (IS_ERR(cur_root) || !cur_root) {
13306                         error("failed to read tree: %lld", key.objectid);
13307                         goto next;
13308                 }
13309
13310                 ret = check_btrfs_root(trans, cur_root, 0, 1);
13311                 err |= ret;
13312
13313                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13314                         btrfs_free_fs_root(cur_root);
13315
13316                 btrfs_release_path(&path);
13317                 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13318                                         &old_key, &path, 0, 0);
13319                 if (ret)
13320                         goto out;
13321 next:
13322                 ret = btrfs_next_item(root1, &path);
13323                 if (ret)
13324                         goto out;
13325         }
13326 out:
13327
13328         /* if repair, update block accounting */
13329         if (repair) {
13330                 ret = btrfs_fix_block_accounting(trans, root);
13331                 if (ret)
13332                         err |= ret;
13333                 else
13334                         err &= ~BG_ACCOUNTING_ERROR;
13335         }
13336
13337         if (trans)
13338                 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13339
13340         btrfs_release_path(&path);
13341
13342         return err;
13343 }
13344
13345 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13346 {
13347         int ret;
13348
13349         if (!ctx.progress_enabled)
13350                 fprintf(stderr, "checking extents\n");
13351         if (check_mode == CHECK_MODE_LOWMEM)
13352                 ret = check_chunks_and_extents_v2(fs_info);
13353         else
13354                 ret = check_chunks_and_extents(fs_info);
13355
13356         return ret;
13357 }
13358
13359 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13360                            struct btrfs_root *root, int overwrite)
13361 {
13362         struct extent_buffer *c;
13363         struct extent_buffer *old = root->node;
13364         int level;
13365         int ret;
13366         struct btrfs_disk_key disk_key = {0,0,0};
13367
13368         level = 0;
13369
13370         if (overwrite) {
13371                 c = old;
13372                 extent_buffer_get(c);
13373                 goto init;
13374         }
13375         c = btrfs_alloc_free_block(trans, root,
13376                                    root->fs_info->nodesize,
13377                                    root->root_key.objectid,
13378                                    &disk_key, level, 0, 0);
13379         if (IS_ERR(c)) {
13380                 c = old;
13381                 extent_buffer_get(c);
13382                 overwrite = 1;
13383         }
13384 init:
13385         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13386         btrfs_set_header_level(c, level);
13387         btrfs_set_header_bytenr(c, c->start);
13388         btrfs_set_header_generation(c, trans->transid);
13389         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13390         btrfs_set_header_owner(c, root->root_key.objectid);
13391
13392         write_extent_buffer(c, root->fs_info->fsid,
13393                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
13394
13395         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13396                             btrfs_header_chunk_tree_uuid(c),
13397                             BTRFS_UUID_SIZE);
13398
13399         btrfs_mark_buffer_dirty(c);
13400         /*
13401          * this case can happen in the following case:
13402          *
13403          * 1.overwrite previous root.
13404          *
13405          * 2.reinit reloc data root, this is because we skip pin
13406          * down reloc data tree before which means we can allocate
13407          * same block bytenr here.
13408          */
13409         if (old->start == c->start) {
13410                 btrfs_set_root_generation(&root->root_item,
13411                                           trans->transid);
13412                 root->root_item.level = btrfs_header_level(root->node);
13413                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13414                                         &root->root_key, &root->root_item);
13415                 if (ret) {
13416                         free_extent_buffer(c);
13417                         return ret;
13418                 }
13419         }
13420         free_extent_buffer(old);
13421         root->node = c;
13422         add_root_to_dirty_list(root);
13423         return 0;
13424 }
13425
13426 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13427                                 struct extent_buffer *eb, int tree_root)
13428 {
13429         struct extent_buffer *tmp;
13430         struct btrfs_root_item *ri;
13431         struct btrfs_key key;
13432         u64 bytenr;
13433         int level = btrfs_header_level(eb);
13434         int nritems;
13435         int ret;
13436         int i;
13437
13438         /*
13439          * If we have pinned this block before, don't pin it again.
13440          * This can not only avoid forever loop with broken filesystem
13441          * but also give us some speedups.
13442          */
13443         if (test_range_bit(&fs_info->pinned_extents, eb->start,
13444                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13445                 return 0;
13446
13447         btrfs_pin_extent(fs_info, eb->start, eb->len);
13448
13449         nritems = btrfs_header_nritems(eb);
13450         for (i = 0; i < nritems; i++) {
13451                 if (level == 0) {
13452                         btrfs_item_key_to_cpu(eb, &key, i);
13453                         if (key.type != BTRFS_ROOT_ITEM_KEY)
13454                                 continue;
13455                         /* Skip the extent root and reloc roots */
13456                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13457                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13458                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13459                                 continue;
13460                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13461                         bytenr = btrfs_disk_root_bytenr(eb, ri);
13462
13463                         /*
13464                          * If at any point we start needing the real root we
13465                          * will have to build a stump root for the root we are
13466                          * in, but for now this doesn't actually use the root so
13467                          * just pass in extent_root.
13468                          */
13469                         tmp = read_tree_block(fs_info, bytenr, 0);
13470                         if (!extent_buffer_uptodate(tmp)) {
13471                                 fprintf(stderr, "Error reading root block\n");
13472                                 return -EIO;
13473                         }
13474                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
13475                         free_extent_buffer(tmp);
13476                         if (ret)
13477                                 return ret;
13478                 } else {
13479                         bytenr = btrfs_node_blockptr(eb, i);
13480
13481                         /* If we aren't the tree root don't read the block */
13482                         if (level == 1 && !tree_root) {
13483                                 btrfs_pin_extent(fs_info, bytenr,
13484                                                 fs_info->nodesize);
13485                                 continue;
13486                         }
13487
13488                         tmp = read_tree_block(fs_info, bytenr, 0);
13489                         if (!extent_buffer_uptodate(tmp)) {
13490                                 fprintf(stderr, "Error reading tree block\n");
13491                                 return -EIO;
13492                         }
13493                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13494                         free_extent_buffer(tmp);
13495                         if (ret)
13496                                 return ret;
13497                 }
13498         }
13499
13500         return 0;
13501 }
13502
13503 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13504 {
13505         int ret;
13506
13507         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13508         if (ret)
13509                 return ret;
13510
13511         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13512 }
13513
13514 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13515 {
13516         struct btrfs_block_group_cache *cache;
13517         struct btrfs_path path;
13518         struct extent_buffer *leaf;
13519         struct btrfs_chunk *chunk;
13520         struct btrfs_key key;
13521         int ret;
13522         u64 start;
13523
13524         btrfs_init_path(&path);
13525         key.objectid = 0;
13526         key.type = BTRFS_CHUNK_ITEM_KEY;
13527         key.offset = 0;
13528         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13529         if (ret < 0) {
13530                 btrfs_release_path(&path);
13531                 return ret;
13532         }
13533
13534         /*
13535          * We do this in case the block groups were screwed up and had alloc
13536          * bits that aren't actually set on the chunks.  This happens with
13537          * restored images every time and could happen in real life I guess.
13538          */
13539         fs_info->avail_data_alloc_bits = 0;
13540         fs_info->avail_metadata_alloc_bits = 0;
13541         fs_info->avail_system_alloc_bits = 0;
13542
13543         /* First we need to create the in-memory block groups */
13544         while (1) {
13545                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13546                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13547                         if (ret < 0) {
13548                                 btrfs_release_path(&path);
13549                                 return ret;
13550                         }
13551                         if (ret) {
13552                                 ret = 0;
13553                                 break;
13554                         }
13555                 }
13556                 leaf = path.nodes[0];
13557                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13558                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13559                         path.slots[0]++;
13560                         continue;
13561                 }
13562
13563                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13564                 btrfs_add_block_group(fs_info, 0,
13565                                       btrfs_chunk_type(leaf, chunk),
13566                                       key.objectid, key.offset,
13567                                       btrfs_chunk_length(leaf, chunk));
13568                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13569                                  key.offset + btrfs_chunk_length(leaf, chunk));
13570                 path.slots[0]++;
13571         }
13572         start = 0;
13573         while (1) {
13574                 cache = btrfs_lookup_first_block_group(fs_info, start);
13575                 if (!cache)
13576                         break;
13577                 cache->cached = 1;
13578                 start = cache->key.objectid + cache->key.offset;
13579         }
13580
13581         btrfs_release_path(&path);
13582         return 0;
13583 }
13584
13585 static int reset_balance(struct btrfs_trans_handle *trans,
13586                          struct btrfs_fs_info *fs_info)
13587 {
13588         struct btrfs_root *root = fs_info->tree_root;
13589         struct btrfs_path path;
13590         struct extent_buffer *leaf;
13591         struct btrfs_key key;
13592         int del_slot, del_nr = 0;
13593         int ret;
13594         int found = 0;
13595
13596         btrfs_init_path(&path);
13597         key.objectid = BTRFS_BALANCE_OBJECTID;
13598         key.type = BTRFS_BALANCE_ITEM_KEY;
13599         key.offset = 0;
13600         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13601         if (ret) {
13602                 if (ret > 0)
13603                         ret = 0;
13604                 if (!ret)
13605                         goto reinit_data_reloc;
13606                 else
13607                         goto out;
13608         }
13609
13610         ret = btrfs_del_item(trans, root, &path);
13611         if (ret)
13612                 goto out;
13613         btrfs_release_path(&path);
13614
13615         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13616         key.type = BTRFS_ROOT_ITEM_KEY;
13617         key.offset = 0;
13618         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13619         if (ret < 0)
13620                 goto out;
13621         while (1) {
13622                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13623                         if (!found)
13624                                 break;
13625
13626                         if (del_nr) {
13627                                 ret = btrfs_del_items(trans, root, &path,
13628                                                       del_slot, del_nr);
13629                                 del_nr = 0;
13630                                 if (ret)
13631                                         goto out;
13632                         }
13633                         key.offset++;
13634                         btrfs_release_path(&path);
13635
13636                         found = 0;
13637                         ret = btrfs_search_slot(trans, root, &key, &path,
13638                                                 -1, 1);
13639                         if (ret < 0)
13640                                 goto out;
13641                         continue;
13642                 }
13643                 found = 1;
13644                 leaf = path.nodes[0];
13645                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13646                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13647                         break;
13648                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13649                         path.slots[0]++;
13650                         continue;
13651                 }
13652                 if (!del_nr) {
13653                         del_slot = path.slots[0];
13654                         del_nr = 1;
13655                 } else {
13656                         del_nr++;
13657                 }
13658                 path.slots[0]++;
13659         }
13660
13661         if (del_nr) {
13662                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13663                 if (ret)
13664                         goto out;
13665         }
13666         btrfs_release_path(&path);
13667
13668 reinit_data_reloc:
13669         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13670         key.type = BTRFS_ROOT_ITEM_KEY;
13671         key.offset = (u64)-1;
13672         root = btrfs_read_fs_root(fs_info, &key);
13673         if (IS_ERR(root)) {
13674                 fprintf(stderr, "Error reading data reloc tree\n");
13675                 ret = PTR_ERR(root);
13676                 goto out;
13677         }
13678         record_root_in_trans(trans, root);
13679         ret = btrfs_fsck_reinit_root(trans, root, 0);
13680         if (ret)
13681                 goto out;
13682         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13683 out:
13684         btrfs_release_path(&path);
13685         return ret;
13686 }
13687
13688 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13689                               struct btrfs_fs_info *fs_info)
13690 {
13691         u64 start = 0;
13692         int ret;
13693
13694         /*
13695          * The only reason we don't do this is because right now we're just
13696          * walking the trees we find and pinning down their bytes, we don't look
13697          * at any of the leaves.  In order to do mixed groups we'd have to check
13698          * the leaves of any fs roots and pin down the bytes for any file
13699          * extents we find.  Not hard but why do it if we don't have to?
13700          */
13701         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13702                 fprintf(stderr, "We don't support re-initing the extent tree "
13703                         "for mixed block groups yet, please notify a btrfs "
13704                         "developer you want to do this so they can add this "
13705                         "functionality.\n");
13706                 return -EINVAL;
13707         }
13708
13709         /*
13710          * first we need to walk all of the trees except the extent tree and pin
13711          * down the bytes that are in use so we don't overwrite any existing
13712          * metadata.
13713          */
13714         ret = pin_metadata_blocks(fs_info);
13715         if (ret) {
13716                 fprintf(stderr, "error pinning down used bytes\n");
13717                 return ret;
13718         }
13719
13720         /*
13721          * Need to drop all the block groups since we're going to recreate all
13722          * of them again.
13723          */
13724         btrfs_free_block_groups(fs_info);
13725         ret = reset_block_groups(fs_info);
13726         if (ret) {
13727                 fprintf(stderr, "error resetting the block groups\n");
13728                 return ret;
13729         }
13730
13731         /* Ok we can allocate now, reinit the extent root */
13732         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13733         if (ret) {
13734                 fprintf(stderr, "extent root initialization failed\n");
13735                 /*
13736                  * When the transaction code is updated we should end the
13737                  * transaction, but for now progs only knows about commit so
13738                  * just return an error.
13739                  */
13740                 return ret;
13741         }
13742
13743         /*
13744          * Now we have all the in-memory block groups setup so we can make
13745          * allocations properly, and the metadata we care about is safe since we
13746          * pinned all of it above.
13747          */
13748         while (1) {
13749                 struct btrfs_block_group_cache *cache;
13750
13751                 cache = btrfs_lookup_first_block_group(fs_info, start);
13752                 if (!cache)
13753                         break;
13754                 start = cache->key.objectid + cache->key.offset;
13755                 ret = btrfs_insert_item(trans, fs_info->extent_root,
13756                                         &cache->key, &cache->item,
13757                                         sizeof(cache->item));
13758                 if (ret) {
13759                         fprintf(stderr, "Error adding block group\n");
13760                         return ret;
13761                 }
13762                 btrfs_extent_post_op(trans, fs_info->extent_root);
13763         }
13764
13765         ret = reset_balance(trans, fs_info);
13766         if (ret)
13767                 fprintf(stderr, "error resetting the pending balance\n");
13768
13769         return ret;
13770 }
13771
13772 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13773 {
13774         struct btrfs_path path;
13775         struct btrfs_trans_handle *trans;
13776         struct btrfs_key key;
13777         int ret;
13778
13779         printf("Recowing metadata block %llu\n", eb->start);
13780         key.objectid = btrfs_header_owner(eb);
13781         key.type = BTRFS_ROOT_ITEM_KEY;
13782         key.offset = (u64)-1;
13783
13784         root = btrfs_read_fs_root(root->fs_info, &key);
13785         if (IS_ERR(root)) {
13786                 fprintf(stderr, "Couldn't find owner root %llu\n",
13787                         key.objectid);
13788                 return PTR_ERR(root);
13789         }
13790
13791         trans = btrfs_start_transaction(root, 1);
13792         if (IS_ERR(trans))
13793                 return PTR_ERR(trans);
13794
13795         btrfs_init_path(&path);
13796         path.lowest_level = btrfs_header_level(eb);
13797         if (path.lowest_level)
13798                 btrfs_node_key_to_cpu(eb, &key, 0);
13799         else
13800                 btrfs_item_key_to_cpu(eb, &key, 0);
13801
13802         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13803         btrfs_commit_transaction(trans, root);
13804         btrfs_release_path(&path);
13805         return ret;
13806 }
13807
13808 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13809 {
13810         struct btrfs_path path;
13811         struct btrfs_trans_handle *trans;
13812         struct btrfs_key key;
13813         int ret;
13814
13815         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13816                bad->key.type, bad->key.offset);
13817         key.objectid = bad->root_id;
13818         key.type = BTRFS_ROOT_ITEM_KEY;
13819         key.offset = (u64)-1;
13820
13821         root = btrfs_read_fs_root(root->fs_info, &key);
13822         if (IS_ERR(root)) {
13823                 fprintf(stderr, "Couldn't find owner root %llu\n",
13824                         key.objectid);
13825                 return PTR_ERR(root);
13826         }
13827
13828         trans = btrfs_start_transaction(root, 1);
13829         if (IS_ERR(trans))
13830                 return PTR_ERR(trans);
13831
13832         btrfs_init_path(&path);
13833         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13834         if (ret) {
13835                 if (ret > 0)
13836                         ret = 0;
13837                 goto out;
13838         }
13839         ret = btrfs_del_item(trans, root, &path);
13840 out:
13841         btrfs_commit_transaction(trans, root);
13842         btrfs_release_path(&path);
13843         return ret;
13844 }
13845
13846 static int zero_log_tree(struct btrfs_root *root)
13847 {
13848         struct btrfs_trans_handle *trans;
13849         int ret;
13850
13851         trans = btrfs_start_transaction(root, 1);
13852         if (IS_ERR(trans)) {
13853                 ret = PTR_ERR(trans);
13854                 return ret;
13855         }
13856         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13857         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13858         ret = btrfs_commit_transaction(trans, root);
13859         return ret;
13860 }
13861
13862 static int populate_csum(struct btrfs_trans_handle *trans,
13863                          struct btrfs_root *csum_root, char *buf, u64 start,
13864                          u64 len)
13865 {
13866         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13867         u64 offset = 0;
13868         u64 sectorsize;
13869         int ret = 0;
13870
13871         while (offset < len) {
13872                 sectorsize = fs_info->sectorsize;
13873                 ret = read_extent_data(fs_info, buf, start + offset,
13874                                        &sectorsize, 0);
13875                 if (ret)
13876                         break;
13877                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13878                                             start + offset, buf, sectorsize);
13879                 if (ret)
13880                         break;
13881                 offset += sectorsize;
13882         }
13883         return ret;
13884 }
13885
13886 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13887                                       struct btrfs_root *csum_root,
13888                                       struct btrfs_root *cur_root)
13889 {
13890         struct btrfs_path path;
13891         struct btrfs_key key;
13892         struct extent_buffer *node;
13893         struct btrfs_file_extent_item *fi;
13894         char *buf = NULL;
13895         u64 start = 0;
13896         u64 len = 0;
13897         int slot = 0;
13898         int ret = 0;
13899
13900         buf = malloc(cur_root->fs_info->sectorsize);
13901         if (!buf)
13902                 return -ENOMEM;
13903
13904         btrfs_init_path(&path);
13905         key.objectid = 0;
13906         key.offset = 0;
13907         key.type = 0;
13908         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13909         if (ret < 0)
13910                 goto out;
13911         /* Iterate all regular file extents and fill its csum */
13912         while (1) {
13913                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13914
13915                 if (key.type != BTRFS_EXTENT_DATA_KEY)
13916                         goto next;
13917                 node = path.nodes[0];
13918                 slot = path.slots[0];
13919                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13920                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13921                         goto next;
13922                 start = btrfs_file_extent_disk_bytenr(node, fi);
13923                 len = btrfs_file_extent_disk_num_bytes(node, fi);
13924
13925                 ret = populate_csum(trans, csum_root, buf, start, len);
13926                 if (ret == -EEXIST)
13927                         ret = 0;
13928                 if (ret < 0)
13929                         goto out;
13930 next:
13931                 /*
13932                  * TODO: if next leaf is corrupted, jump to nearest next valid
13933                  * leaf.
13934                  */
13935                 ret = btrfs_next_item(cur_root, &path);
13936                 if (ret < 0)
13937                         goto out;
13938                 if (ret > 0) {
13939                         ret = 0;
13940                         goto out;
13941                 }
13942         }
13943
13944 out:
13945         btrfs_release_path(&path);
13946         free(buf);
13947         return ret;
13948 }
13949
13950 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13951                                   struct btrfs_root *csum_root)
13952 {
13953         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13954         struct btrfs_path path;
13955         struct btrfs_root *tree_root = fs_info->tree_root;
13956         struct btrfs_root *cur_root;
13957         struct extent_buffer *node;
13958         struct btrfs_key key;
13959         int slot = 0;
13960         int ret = 0;
13961
13962         btrfs_init_path(&path);
13963         key.objectid = BTRFS_FS_TREE_OBJECTID;
13964         key.offset = 0;
13965         key.type = BTRFS_ROOT_ITEM_KEY;
13966         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13967         if (ret < 0)
13968                 goto out;
13969         if (ret > 0) {
13970                 ret = -ENOENT;
13971                 goto out;
13972         }
13973
13974         while (1) {
13975                 node = path.nodes[0];
13976                 slot = path.slots[0];
13977                 btrfs_item_key_to_cpu(node, &key, slot);
13978                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13979                         goto out;
13980                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13981                         goto next;
13982                 if (!is_fstree(key.objectid))
13983                         goto next;
13984                 key.offset = (u64)-1;
13985
13986                 cur_root = btrfs_read_fs_root(fs_info, &key);
13987                 if (IS_ERR(cur_root) || !cur_root) {
13988                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13989                                 key.objectid);
13990                         goto out;
13991                 }
13992                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13993                                 cur_root);
13994                 if (ret < 0)
13995                         goto out;
13996 next:
13997                 ret = btrfs_next_item(tree_root, &path);
13998                 if (ret > 0) {
13999                         ret = 0;
14000                         goto out;
14001                 }
14002                 if (ret < 0)
14003                         goto out;
14004         }
14005
14006 out:
14007         btrfs_release_path(&path);
14008         return ret;
14009 }
14010
14011 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
14012                                       struct btrfs_root *csum_root)
14013 {
14014         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
14015         struct btrfs_path path;
14016         struct btrfs_extent_item *ei;
14017         struct extent_buffer *leaf;
14018         char *buf;
14019         struct btrfs_key key;
14020         int ret;
14021
14022         btrfs_init_path(&path);
14023         key.objectid = 0;
14024         key.type = BTRFS_EXTENT_ITEM_KEY;
14025         key.offset = 0;
14026         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
14027         if (ret < 0) {
14028                 btrfs_release_path(&path);
14029                 return ret;
14030         }
14031
14032         buf = malloc(csum_root->fs_info->sectorsize);
14033         if (!buf) {
14034                 btrfs_release_path(&path);
14035                 return -ENOMEM;
14036         }
14037
14038         while (1) {
14039                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
14040                         ret = btrfs_next_leaf(extent_root, &path);
14041                         if (ret < 0)
14042                                 break;
14043                         if (ret) {
14044                                 ret = 0;
14045                                 break;
14046                         }
14047                 }
14048                 leaf = path.nodes[0];
14049
14050                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
14051                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
14052                         path.slots[0]++;
14053                         continue;
14054                 }
14055
14056                 ei = btrfs_item_ptr(leaf, path.slots[0],
14057                                     struct btrfs_extent_item);
14058                 if (!(btrfs_extent_flags(leaf, ei) &
14059                       BTRFS_EXTENT_FLAG_DATA)) {
14060                         path.slots[0]++;
14061                         continue;
14062                 }
14063
14064                 ret = populate_csum(trans, csum_root, buf, key.objectid,
14065                                     key.offset);
14066                 if (ret)
14067                         break;
14068                 path.slots[0]++;
14069         }
14070
14071         btrfs_release_path(&path);
14072         free(buf);
14073         return ret;
14074 }
14075
14076 /*
14077  * Recalculate the csum and put it into the csum tree.
14078  *
14079  * Extent tree init will wipe out all the extent info, so in that case, we
14080  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
14081  * will use fs/subvol trees to init the csum tree.
14082  */
14083 static int fill_csum_tree(struct btrfs_trans_handle *trans,
14084                           struct btrfs_root *csum_root,
14085                           int search_fs_tree)
14086 {
14087         if (search_fs_tree)
14088                 return fill_csum_tree_from_fs(trans, csum_root);
14089         else
14090                 return fill_csum_tree_from_extent(trans, csum_root);
14091 }
14092
14093 static void free_roots_info_cache(void)
14094 {
14095         if (!roots_info_cache)
14096                 return;
14097
14098         while (!cache_tree_empty(roots_info_cache)) {
14099                 struct cache_extent *entry;
14100                 struct root_item_info *rii;
14101
14102                 entry = first_cache_extent(roots_info_cache);
14103                 if (!entry)
14104                         break;
14105                 remove_cache_extent(roots_info_cache, entry);
14106                 rii = container_of(entry, struct root_item_info, cache_extent);
14107                 free(rii);
14108         }
14109
14110         free(roots_info_cache);
14111         roots_info_cache = NULL;
14112 }
14113
14114 static int build_roots_info_cache(struct btrfs_fs_info *info)
14115 {
14116         int ret = 0;
14117         struct btrfs_key key;
14118         struct extent_buffer *leaf;
14119         struct btrfs_path path;
14120
14121         if (!roots_info_cache) {
14122                 roots_info_cache = malloc(sizeof(*roots_info_cache));
14123                 if (!roots_info_cache)
14124                         return -ENOMEM;
14125                 cache_tree_init(roots_info_cache);
14126         }
14127
14128         btrfs_init_path(&path);
14129         key.objectid = 0;
14130         key.type = BTRFS_EXTENT_ITEM_KEY;
14131         key.offset = 0;
14132         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
14133         if (ret < 0)
14134                 goto out;
14135         leaf = path.nodes[0];
14136
14137         while (1) {
14138                 struct btrfs_key found_key;
14139                 struct btrfs_extent_item *ei;
14140                 struct btrfs_extent_inline_ref *iref;
14141                 int slot = path.slots[0];
14142                 int type;
14143                 u64 flags;
14144                 u64 root_id;
14145                 u8 level;
14146                 struct cache_extent *entry;
14147                 struct root_item_info *rii;
14148
14149                 if (slot >= btrfs_header_nritems(leaf)) {
14150                         ret = btrfs_next_leaf(info->extent_root, &path);
14151                         if (ret < 0) {
14152                                 break;
14153                         } else if (ret) {
14154                                 ret = 0;
14155                                 break;
14156                         }
14157                         leaf = path.nodes[0];
14158                         slot = path.slots[0];
14159                 }
14160
14161                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14162
14163                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
14164                     found_key.type != BTRFS_METADATA_ITEM_KEY)
14165                         goto next;
14166
14167                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
14168                 flags = btrfs_extent_flags(leaf, ei);
14169
14170                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
14171                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
14172                         goto next;
14173
14174                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
14175                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
14176                         level = found_key.offset;
14177                 } else {
14178                         struct btrfs_tree_block_info *binfo;
14179
14180                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
14181                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
14182                         level = btrfs_tree_block_level(leaf, binfo);
14183                 }
14184
14185                 /*
14186                  * For a root extent, it must be of the following type and the
14187                  * first (and only one) iref in the item.
14188                  */
14189                 type = btrfs_extent_inline_ref_type(leaf, iref);
14190                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
14191                         goto next;
14192
14193                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
14194                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14195                 if (!entry) {
14196                         rii = malloc(sizeof(struct root_item_info));
14197                         if (!rii) {
14198                                 ret = -ENOMEM;
14199                                 goto out;
14200                         }
14201                         rii->cache_extent.start = root_id;
14202                         rii->cache_extent.size = 1;
14203                         rii->level = (u8)-1;
14204                         entry = &rii->cache_extent;
14205                         ret = insert_cache_extent(roots_info_cache, entry);
14206                         ASSERT(ret == 0);
14207                 } else {
14208                         rii = container_of(entry, struct root_item_info,
14209                                            cache_extent);
14210                 }
14211
14212                 ASSERT(rii->cache_extent.start == root_id);
14213                 ASSERT(rii->cache_extent.size == 1);
14214
14215                 if (level > rii->level || rii->level == (u8)-1) {
14216                         rii->level = level;
14217                         rii->bytenr = found_key.objectid;
14218                         rii->gen = btrfs_extent_generation(leaf, ei);
14219                         rii->node_count = 1;
14220                 } else if (level == rii->level) {
14221                         rii->node_count++;
14222                 }
14223 next:
14224                 path.slots[0]++;
14225         }
14226
14227 out:
14228         btrfs_release_path(&path);
14229
14230         return ret;
14231 }
14232
14233 static int maybe_repair_root_item(struct btrfs_path *path,
14234                                   const struct btrfs_key *root_key,
14235                                   const int read_only_mode)
14236 {
14237         const u64 root_id = root_key->objectid;
14238         struct cache_extent *entry;
14239         struct root_item_info *rii;
14240         struct btrfs_root_item ri;
14241         unsigned long offset;
14242
14243         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14244         if (!entry) {
14245                 fprintf(stderr,
14246                         "Error: could not find extent items for root %llu\n",
14247                         root_key->objectid);
14248                 return -ENOENT;
14249         }
14250
14251         rii = container_of(entry, struct root_item_info, cache_extent);
14252         ASSERT(rii->cache_extent.start == root_id);
14253         ASSERT(rii->cache_extent.size == 1);
14254
14255         if (rii->node_count != 1) {
14256                 fprintf(stderr,
14257                         "Error: could not find btree root extent for root %llu\n",
14258                         root_id);
14259                 return -ENOENT;
14260         }
14261
14262         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
14263         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
14264
14265         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
14266             btrfs_root_level(&ri) != rii->level ||
14267             btrfs_root_generation(&ri) != rii->gen) {
14268
14269                 /*
14270                  * If we're in repair mode but our caller told us to not update
14271                  * the root item, i.e. just check if it needs to be updated, don't
14272                  * print this message, since the caller will call us again shortly
14273                  * for the same root item without read only mode (the caller will
14274                  * open a transaction first).
14275                  */
14276                 if (!(read_only_mode && repair))
14277                         fprintf(stderr,
14278                                 "%sroot item for root %llu,"
14279                                 " current bytenr %llu, current gen %llu, current level %u,"
14280                                 " new bytenr %llu, new gen %llu, new level %u\n",
14281                                 (read_only_mode ? "" : "fixing "),
14282                                 root_id,
14283                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
14284                                 btrfs_root_level(&ri),
14285                                 rii->bytenr, rii->gen, rii->level);
14286
14287                 if (btrfs_root_generation(&ri) > rii->gen) {
14288                         fprintf(stderr,
14289                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
14290                                 root_id, btrfs_root_generation(&ri), rii->gen);
14291                         return -EINVAL;
14292                 }
14293
14294                 if (!read_only_mode) {
14295                         btrfs_set_root_bytenr(&ri, rii->bytenr);
14296                         btrfs_set_root_level(&ri, rii->level);
14297                         btrfs_set_root_generation(&ri, rii->gen);
14298                         write_extent_buffer(path->nodes[0], &ri,
14299                                             offset, sizeof(ri));
14300                 }
14301
14302                 return 1;
14303         }
14304
14305         return 0;
14306 }
14307
14308 /*
14309  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14310  * caused read-only snapshots to be corrupted if they were created at a moment
14311  * when the source subvolume/snapshot had orphan items. The issue was that the
14312  * on-disk root items became incorrect, referring to the pre orphan cleanup root
14313  * node instead of the post orphan cleanup root node.
14314  * So this function, and its callees, just detects and fixes those cases. Even
14315  * though the regression was for read-only snapshots, this function applies to
14316  * any snapshot/subvolume root.
14317  * This must be run before any other repair code - not doing it so, makes other
14318  * repair code delete or modify backrefs in the extent tree for example, which
14319  * will result in an inconsistent fs after repairing the root items.
14320  */
14321 static int repair_root_items(struct btrfs_fs_info *info)
14322 {
14323         struct btrfs_path path;
14324         struct btrfs_key key;
14325         struct extent_buffer *leaf;
14326         struct btrfs_trans_handle *trans = NULL;
14327         int ret = 0;
14328         int bad_roots = 0;
14329         int need_trans = 0;
14330
14331         btrfs_init_path(&path);
14332
14333         ret = build_roots_info_cache(info);
14334         if (ret)
14335                 goto out;
14336
14337         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14338         key.type = BTRFS_ROOT_ITEM_KEY;
14339         key.offset = 0;
14340
14341 again:
14342         /*
14343          * Avoid opening and committing transactions if a leaf doesn't have
14344          * any root items that need to be fixed, so that we avoid rotating
14345          * backup roots unnecessarily.
14346          */
14347         if (need_trans) {
14348                 trans = btrfs_start_transaction(info->tree_root, 1);
14349                 if (IS_ERR(trans)) {
14350                         ret = PTR_ERR(trans);
14351                         goto out;
14352                 }
14353         }
14354
14355         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14356                                 0, trans ? 1 : 0);
14357         if (ret < 0)
14358                 goto out;
14359         leaf = path.nodes[0];
14360
14361         while (1) {
14362                 struct btrfs_key found_key;
14363
14364                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14365                         int no_more_keys = find_next_key(&path, &key);
14366
14367                         btrfs_release_path(&path);
14368                         if (trans) {
14369                                 ret = btrfs_commit_transaction(trans,
14370                                                                info->tree_root);
14371                                 trans = NULL;
14372                                 if (ret < 0)
14373                                         goto out;
14374                         }
14375                         need_trans = 0;
14376                         if (no_more_keys)
14377                                 break;
14378                         goto again;
14379                 }
14380
14381                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14382
14383                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14384                         goto next;
14385                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14386                         goto next;
14387
14388                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14389                 if (ret < 0)
14390                         goto out;
14391                 if (ret) {
14392                         if (!trans && repair) {
14393                                 need_trans = 1;
14394                                 key = found_key;
14395                                 btrfs_release_path(&path);
14396                                 goto again;
14397                         }
14398                         bad_roots++;
14399                 }
14400 next:
14401                 path.slots[0]++;
14402         }
14403         ret = 0;
14404 out:
14405         free_roots_info_cache();
14406         btrfs_release_path(&path);
14407         if (trans)
14408                 btrfs_commit_transaction(trans, info->tree_root);
14409         if (ret < 0)
14410                 return ret;
14411
14412         return bad_roots;
14413 }
14414
14415 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14416 {
14417         struct btrfs_trans_handle *trans;
14418         struct btrfs_block_group_cache *bg_cache;
14419         u64 current = 0;
14420         int ret = 0;
14421
14422         /* Clear all free space cache inodes and its extent data */
14423         while (1) {
14424                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14425                 if (!bg_cache)
14426                         break;
14427                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14428                 if (ret < 0)
14429                         return ret;
14430                 current = bg_cache->key.objectid + bg_cache->key.offset;
14431         }
14432
14433         /* Don't forget to set cache_generation to -1 */
14434         trans = btrfs_start_transaction(fs_info->tree_root, 0);
14435         if (IS_ERR(trans)) {
14436                 error("failed to update super block cache generation");
14437                 return PTR_ERR(trans);
14438         }
14439         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14440         btrfs_commit_transaction(trans, fs_info->tree_root);
14441
14442         return ret;
14443 }
14444
14445 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14446                 int clear_version)
14447 {
14448         int ret = 0;
14449
14450         if (clear_version == 1) {
14451                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14452                         error(
14453                 "free space cache v2 detected, use --clear-space-cache v2");
14454                         ret = 1;
14455                         goto close_out;
14456                 }
14457                 printf("Clearing free space cache\n");
14458                 ret = clear_free_space_cache(fs_info);
14459                 if (ret) {
14460                         error("failed to clear free space cache");
14461                         ret = 1;
14462                 } else {
14463                         printf("Free space cache cleared\n");
14464                 }
14465         } else if (clear_version == 2) {
14466                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14467                         printf("no free space cache v2 to clear\n");
14468                         ret = 0;
14469                         goto close_out;
14470                 }
14471                 printf("Clear free space cache v2\n");
14472                 ret = btrfs_clear_free_space_tree(fs_info);
14473                 if (ret) {
14474                         error("failed to clear free space cache v2: %d", ret);
14475                         ret = 1;
14476                 } else {
14477                         printf("free space cache v2 cleared\n");
14478                 }
14479         }
14480 close_out:
14481         return ret;
14482 }
14483
14484 const char * const cmd_check_usage[] = {
14485         "btrfs check [options] <device>",
14486         "Check structural integrity of a filesystem (unmounted).",
14487         "Check structural integrity of an unmounted filesystem. Verify internal",
14488         "trees' consistency and item connectivity. In the repair mode try to",
14489         "fix the problems found. ",
14490         "WARNING: the repair mode is considered dangerous",
14491         "",
14492         "-s|--super <superblock>     use this superblock copy",
14493         "-b|--backup                 use the first valid backup root copy",
14494         "--force                     skip mount checks, repair is not possible",
14495         "--repair                    try to repair the filesystem",
14496         "--readonly                  run in read-only mode (default)",
14497         "--init-csum-tree            create a new CRC tree",
14498         "--init-extent-tree          create a new extent tree",
14499         "--mode <MODE>               allows choice of memory/IO trade-offs",
14500         "                            where MODE is one of:",
14501         "                            original - read inodes and extents to memory (requires",
14502         "                                       more memory, does less IO)",
14503         "                            lowmem   - try to use less memory but read blocks again",
14504         "                                       when needed",
14505         "--check-data-csum           verify checksums of data blocks",
14506         "-Q|--qgroup-report          print a report on qgroup consistency",
14507         "-E|--subvol-extents <subvolid>",
14508         "                            print subvolume extents and sharing state",
14509         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
14510         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
14511         "-p|--progress               indicate progress",
14512         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
14513         NULL
14514 };
14515
14516 int cmd_check(int argc, char **argv)
14517 {
14518         struct cache_tree root_cache;
14519         struct btrfs_root *root;
14520         struct btrfs_fs_info *info;
14521         u64 bytenr = 0;
14522         u64 subvolid = 0;
14523         u64 tree_root_bytenr = 0;
14524         u64 chunk_root_bytenr = 0;
14525         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14526         int ret = 0;
14527         int err = 0;
14528         u64 num;
14529         int init_csum_tree = 0;
14530         int readonly = 0;
14531         int clear_space_cache = 0;
14532         int qgroup_report = 0;
14533         int qgroups_repaired = 0;
14534         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14535         int force = 0;
14536
14537         while(1) {
14538                 int c;
14539                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14540                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14541                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14542                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14543                         GETOPT_VAL_FORCE };
14544                 static const struct option long_options[] = {
14545                         { "super", required_argument, NULL, 's' },
14546                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14547                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14548                         { "init-csum-tree", no_argument, NULL,
14549                                 GETOPT_VAL_INIT_CSUM },
14550                         { "init-extent-tree", no_argument, NULL,
14551                                 GETOPT_VAL_INIT_EXTENT },
14552                         { "check-data-csum", no_argument, NULL,
14553                                 GETOPT_VAL_CHECK_CSUM },
14554                         { "backup", no_argument, NULL, 'b' },
14555                         { "subvol-extents", required_argument, NULL, 'E' },
14556                         { "qgroup-report", no_argument, NULL, 'Q' },
14557                         { "tree-root", required_argument, NULL, 'r' },
14558                         { "chunk-root", required_argument, NULL,
14559                                 GETOPT_VAL_CHUNK_TREE },
14560                         { "progress", no_argument, NULL, 'p' },
14561                         { "mode", required_argument, NULL,
14562                                 GETOPT_VAL_MODE },
14563                         { "clear-space-cache", required_argument, NULL,
14564                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
14565                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14566                         { NULL, 0, NULL, 0}
14567                 };
14568
14569                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14570                 if (c < 0)
14571                         break;
14572                 switch(c) {
14573                         case 'a': /* ignored */ break;
14574                         case 'b':
14575                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14576                                 break;
14577                         case 's':
14578                                 num = arg_strtou64(optarg);
14579                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14580                                         error(
14581                                         "super mirror should be less than %d",
14582                                                 BTRFS_SUPER_MIRROR_MAX);
14583                                         exit(1);
14584                                 }
14585                                 bytenr = btrfs_sb_offset(((int)num));
14586                                 printf("using SB copy %llu, bytenr %llu\n", num,
14587                                        (unsigned long long)bytenr);
14588                                 break;
14589                         case 'Q':
14590                                 qgroup_report = 1;
14591                                 break;
14592                         case 'E':
14593                                 subvolid = arg_strtou64(optarg);
14594                                 break;
14595                         case 'r':
14596                                 tree_root_bytenr = arg_strtou64(optarg);
14597                                 break;
14598                         case GETOPT_VAL_CHUNK_TREE:
14599                                 chunk_root_bytenr = arg_strtou64(optarg);
14600                                 break;
14601                         case 'p':
14602                                 ctx.progress_enabled = true;
14603                                 break;
14604                         case '?':
14605                         case 'h':
14606                                 usage(cmd_check_usage);
14607                         case GETOPT_VAL_REPAIR:
14608                                 printf("enabling repair mode\n");
14609                                 repair = 1;
14610                                 ctree_flags |= OPEN_CTREE_WRITES;
14611                                 break;
14612                         case GETOPT_VAL_READONLY:
14613                                 readonly = 1;
14614                                 break;
14615                         case GETOPT_VAL_INIT_CSUM:
14616                                 printf("Creating a new CRC tree\n");
14617                                 init_csum_tree = 1;
14618                                 repair = 1;
14619                                 ctree_flags |= OPEN_CTREE_WRITES;
14620                                 break;
14621                         case GETOPT_VAL_INIT_EXTENT:
14622                                 init_extent_tree = 1;
14623                                 ctree_flags |= (OPEN_CTREE_WRITES |
14624                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
14625                                 repair = 1;
14626                                 break;
14627                         case GETOPT_VAL_CHECK_CSUM:
14628                                 check_data_csum = 1;
14629                                 break;
14630                         case GETOPT_VAL_MODE:
14631                                 check_mode = parse_check_mode(optarg);
14632                                 if (check_mode == CHECK_MODE_UNKNOWN) {
14633                                         error("unknown mode: %s", optarg);
14634                                         exit(1);
14635                                 }
14636                                 break;
14637                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
14638                                 if (strcmp(optarg, "v1") == 0) {
14639                                         clear_space_cache = 1;
14640                                 } else if (strcmp(optarg, "v2") == 0) {
14641                                         clear_space_cache = 2;
14642                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14643                                 } else {
14644                                         error(
14645                 "invalid argument to --clear-space-cache, must be v1 or v2");
14646                                         exit(1);
14647                                 }
14648                                 ctree_flags |= OPEN_CTREE_WRITES;
14649                                 break;
14650                         case GETOPT_VAL_FORCE:
14651                                 force = 1;
14652                                 break;
14653                 }
14654         }
14655
14656         if (check_argc_exact(argc - optind, 1))
14657                 usage(cmd_check_usage);
14658
14659         if (ctx.progress_enabled) {
14660                 ctx.tp = TASK_NOTHING;
14661                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14662         }
14663
14664         /* This check is the only reason for --readonly to exist */
14665         if (readonly && repair) {
14666                 error("repair options are not compatible with --readonly");
14667                 exit(1);
14668         }
14669
14670         /*
14671          * experimental and dangerous
14672          */
14673         if (repair && check_mode == CHECK_MODE_LOWMEM)
14674                 warning("low-memory mode repair support is only partial");
14675
14676         radix_tree_init();
14677         cache_tree_init(&root_cache);
14678
14679         ret = check_mounted(argv[optind]);
14680         if (!force) {
14681                 if (ret < 0) {
14682                         error("could not check mount status: %s",
14683                                         strerror(-ret));
14684                         err |= !!ret;
14685                         goto err_out;
14686                 } else if (ret) {
14687                         error(
14688 "%s is currently mounted, use --force if you really intend to check the filesystem",
14689                                 argv[optind]);
14690                         ret = -EBUSY;
14691                         err |= !!ret;
14692                         goto err_out;
14693                 }
14694         } else {
14695                 if (repair) {
14696                         error("repair and --force is not yet supported");
14697                         ret = 1;
14698                         err |= !!ret;
14699                         goto err_out;
14700                 }
14701                 if (ret < 0) {
14702                         warning(
14703 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14704                                 argv[optind]);
14705                 } else if (ret) {
14706                         warning(
14707                         "filesystem mounted, continuing because of --force");
14708                 }
14709                 /* A block device is mounted in exclusive mode by kernel */
14710                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14711         }
14712
14713         /* only allow partial opening under repair mode */
14714         if (repair)
14715                 ctree_flags |= OPEN_CTREE_PARTIAL;
14716
14717         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14718                                   chunk_root_bytenr, ctree_flags);
14719         if (!info) {
14720                 error("cannot open file system");
14721                 ret = -EIO;
14722                 err |= !!ret;
14723                 goto err_out;
14724         }
14725
14726         global_info = info;
14727         root = info->fs_root;
14728         uuid_unparse(info->super_copy->fsid, uuidbuf);
14729
14730         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14731
14732         /*
14733          * Check the bare minimum before starting anything else that could rely
14734          * on it, namely the tree roots, any local consistency checks
14735          */
14736         if (!extent_buffer_uptodate(info->tree_root->node) ||
14737             !extent_buffer_uptodate(info->dev_root->node) ||
14738             !extent_buffer_uptodate(info->chunk_root->node)) {
14739                 error("critical roots corrupted, unable to check the filesystem");
14740                 err |= !!ret;
14741                 ret = -EIO;
14742                 goto close_out;
14743         }
14744
14745         if (clear_space_cache) {
14746                 ret = do_clear_free_space_cache(info, clear_space_cache);
14747                 err |= !!ret;
14748                 goto close_out;
14749         }
14750
14751         /*
14752          * repair mode will force us to commit transaction which
14753          * will make us fail to load log tree when mounting.
14754          */
14755         if (repair && btrfs_super_log_root(info->super_copy)) {
14756                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14757                 if (!ret) {
14758                         ret = 1;
14759                         err |= !!ret;
14760                         goto close_out;
14761                 }
14762                 ret = zero_log_tree(root);
14763                 err |= !!ret;
14764                 if (ret) {
14765                         error("failed to zero log tree: %d", ret);
14766                         goto close_out;
14767                 }
14768         }
14769
14770         if (qgroup_report) {
14771                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14772                        uuidbuf);
14773                 ret = qgroup_verify_all(info);
14774                 err |= !!ret;
14775                 if (ret == 0)
14776                         report_qgroups(1);
14777                 goto close_out;
14778         }
14779         if (subvolid) {
14780                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14781                        subvolid, argv[optind], uuidbuf);
14782                 ret = print_extent_state(info, subvolid);
14783                 err |= !!ret;
14784                 goto close_out;
14785         }
14786
14787         if (init_extent_tree || init_csum_tree) {
14788                 struct btrfs_trans_handle *trans;
14789
14790                 trans = btrfs_start_transaction(info->extent_root, 0);
14791                 if (IS_ERR(trans)) {
14792                         error("error starting transaction");
14793                         ret = PTR_ERR(trans);
14794                         err |= !!ret;
14795                         goto close_out;
14796                 }
14797
14798                 if (init_extent_tree) {
14799                         printf("Creating a new extent tree\n");
14800                         ret = reinit_extent_tree(trans, info);
14801                         err |= !!ret;
14802                         if (ret)
14803                                 goto close_out;
14804                 }
14805
14806                 if (init_csum_tree) {
14807                         printf("Reinitialize checksum tree\n");
14808                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14809                         if (ret) {
14810                                 error("checksum tree initialization failed: %d",
14811                                                 ret);
14812                                 ret = -EIO;
14813                                 err |= !!ret;
14814                                 goto close_out;
14815                         }
14816
14817                         ret = fill_csum_tree(trans, info->csum_root,
14818                                              init_extent_tree);
14819                         err |= !!ret;
14820                         if (ret) {
14821                                 error("checksum tree refilling failed: %d", ret);
14822                                 return -EIO;
14823                         }
14824                 }
14825                 /*
14826                  * Ok now we commit and run the normal fsck, which will add
14827                  * extent entries for all of the items it finds.
14828                  */
14829                 ret = btrfs_commit_transaction(trans, info->extent_root);
14830                 err |= !!ret;
14831                 if (ret)
14832                         goto close_out;
14833         }
14834         if (!extent_buffer_uptodate(info->extent_root->node)) {
14835                 error("critical: extent_root, unable to check the filesystem");
14836                 ret = -EIO;
14837                 err |= !!ret;
14838                 goto close_out;
14839         }
14840         if (!extent_buffer_uptodate(info->csum_root->node)) {
14841                 error("critical: csum_root, unable to check the filesystem");
14842                 ret = -EIO;
14843                 err |= !!ret;
14844                 goto close_out;
14845         }
14846
14847         if (!init_extent_tree) {
14848                 ret = repair_root_items(info);
14849                 if (ret < 0) {
14850                         err = !!ret;
14851                         error("failed to repair root items: %s", strerror(-ret));
14852                         goto close_out;
14853                 }
14854                 if (repair) {
14855                         fprintf(stderr, "Fixed %d roots.\n", ret);
14856                         ret = 0;
14857                 } else if (ret > 0) {
14858                         fprintf(stderr,
14859                                 "Found %d roots with an outdated root item.\n",
14860                                 ret);
14861                         fprintf(stderr,
14862         "Please run a filesystem check with the option --repair to fix them.\n");
14863                         ret = 1;
14864                         err |= ret;
14865                         goto close_out;
14866                 }
14867         }
14868
14869         ret = do_check_chunks_and_extents(info);
14870         err |= !!ret;
14871         if (ret)
14872                 error(
14873                 "errors found in extent allocation tree or chunk allocation");
14874
14875         if (!ctx.progress_enabled) {
14876                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14877                         fprintf(stderr, "checking free space tree\n");
14878                 else
14879                         fprintf(stderr, "checking free space cache\n");
14880         }
14881         ret = check_space_cache(root);
14882         err |= !!ret;
14883         if (ret) {
14884                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14885                         error("errors found in free space tree");
14886                 else
14887                         error("errors found in free space cache");
14888                 goto out;
14889         }
14890
14891         /*
14892          * We used to have to have these hole extents in between our real
14893          * extents so if we don't have this flag set we need to make sure there
14894          * are no gaps in the file extents for inodes, otherwise we can just
14895          * ignore it when this happens.
14896          */
14897         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14898         ret = do_check_fs_roots(info, &root_cache);
14899         err |= !!ret;
14900         if (ret) {
14901                 error("errors found in fs roots");
14902                 goto out;
14903         }
14904
14905         fprintf(stderr, "checking csums\n");
14906         ret = check_csums(root);
14907         err |= !!ret;
14908         if (ret) {
14909                 error("errors found in csum tree");
14910                 goto out;
14911         }
14912
14913         fprintf(stderr, "checking root refs\n");
14914         /* For low memory mode, check_fs_roots_v2 handles root refs */
14915         if (check_mode != CHECK_MODE_LOWMEM) {
14916                 ret = check_root_refs(root, &root_cache);
14917                 err |= !!ret;
14918                 if (ret) {
14919                         error("errors found in root refs");
14920                         goto out;
14921                 }
14922         }
14923
14924         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14925                 struct extent_buffer *eb;
14926
14927                 eb = list_first_entry(&root->fs_info->recow_ebs,
14928                                       struct extent_buffer, recow);
14929                 list_del_init(&eb->recow);
14930                 ret = recow_extent_buffer(root, eb);
14931                 err |= !!ret;
14932                 if (ret) {
14933                         error("fails to fix transid errors");
14934                         break;
14935                 }
14936         }
14937
14938         while (!list_empty(&delete_items)) {
14939                 struct bad_item *bad;
14940
14941                 bad = list_first_entry(&delete_items, struct bad_item, list);
14942                 list_del_init(&bad->list);
14943                 if (repair) {
14944                         ret = delete_bad_item(root, bad);
14945                         err |= !!ret;
14946                 }
14947                 free(bad);
14948         }
14949
14950         if (info->quota_enabled) {
14951                 fprintf(stderr, "checking quota groups\n");
14952                 ret = qgroup_verify_all(info);
14953                 err |= !!ret;
14954                 if (ret) {
14955                         error("failed to check quota groups");
14956                         goto out;
14957                 }
14958                 report_qgroups(0);
14959                 ret = repair_qgroups(info, &qgroups_repaired);
14960                 err |= !!ret;
14961                 if (err) {
14962                         error("failed to repair quota groups");
14963                         goto out;
14964                 }
14965                 ret = 0;
14966         }
14967
14968         if (!list_empty(&root->fs_info->recow_ebs)) {
14969                 error("transid errors in file system");
14970                 ret = 1;
14971                 err |= !!ret;
14972         }
14973 out:
14974         printf("found %llu bytes used, ",
14975                (unsigned long long)bytes_used);
14976         if (err)
14977                 printf("error(s) found\n");
14978         else
14979                 printf("no error found\n");
14980         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14981         printf("total tree bytes: %llu\n",
14982                (unsigned long long)total_btree_bytes);
14983         printf("total fs tree bytes: %llu\n",
14984                (unsigned long long)total_fs_tree_bytes);
14985         printf("total extent tree bytes: %llu\n",
14986                (unsigned long long)total_extent_tree_bytes);
14987         printf("btree space waste bytes: %llu\n",
14988                (unsigned long long)btree_space_waste);
14989         printf("file data blocks allocated: %llu\n referenced %llu\n",
14990                 (unsigned long long)data_bytes_allocated,
14991                 (unsigned long long)data_bytes_referenced);
14992
14993         free_qgroup_counts();
14994         free_root_recs_tree(&root_cache);
14995 close_out:
14996         close_ctree(root);
14997 err_out:
14998         if (ctx.progress_enabled)
14999                 task_deinit(ctx.info);
15000
15001         return err;
15002 }